diff --git a/basics.ipynb b/basics.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..76ff958 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# Day-2 +Flask +Flask-WTF +Flask-SQLAlchemy +beautifulsoup4 +pytz +python-dotenv \ No newline at end of file diff --git a/scaper.ipynb b/scaper.ipynb new file mode 100644 index 0000000..746be86 --- /dev/null +++ b/scaper.ipynb @@ -0,0 +1,51 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}\n", + "url = \"https://www.imdb.com/chart/top/?ref_=nv_mv_250\"\n", + "response = requests.get(url, headers=headers)\n", + "html_content = response.content\n", + "soup = BeautifulSoup(html_content, \"html.parser\")\n", + "movies = soup.find_all(\"div\", class_=\"ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title\")\n", + "for x in movies:\n", + " print(x.h3.text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..ba9d5f5 --- /dev/null +++ b/scraper.py @@ -0,0 +1,11 @@ +import requests +from bs4 import BeautifulSoup + +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} +url = "https://www.imdb.com/chart/top/?ref_=nv_mv_250" +response = requests.get(url, headers=headers) +html_content = response.content +soup = BeautifulSoup(html_content, "html.parser") +movies = soup.find_all("div", class_="ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title") +for x in movies: + print(x.h3.text) \ No newline at end of file diff --git a/webscraping.ipynb b/webscraping.ipynb new file mode 100644 index 0000000..4cc56d2 --- /dev/null +++ b/webscraping.ipynb @@ -0,0 +1,71 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import requests as rq" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "True\n", + "200\n", + "b'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n\\n \\n \\n \\n \\n \\n \\n\\n\\n \\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n GitHub - ascendantaditya/pfolio: Thats My Portfolio\\n\\n\\n\\n \\n\\n \\n \\n\\n\\n \\n\\n\\n \\n\\n\\n \\n \\n\\n \\n \\n\\n \\n \\n \\n \\n \\n\\n\\n\\n \\n\\n \\n\\n\\n\\n\\n \\n\\n \\n\\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n\\n\\n \\n\\n\\n\\n \\n\\n\\n \\n \\n \\n \\n\\n \\n\\n \\n\\n \\n\\n \\n\\n\\n\\n \\n \\n\\n\\n \\n\\n \\n\\n \\n\\n \\n \\n \\n\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n
\\n \\n\\n\\n
\\n Skip to content\\n \\n \\n \\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n
\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n\\n\\n\\n
\\n \\n\\n
\\n
\\n \\n \\n \\n\\n \\n\\n \\n\\n
\\n \\n
\\n
\\n\\n\\n
\\n
\\n \\n\\n
\\n \\n\\n\\n\">\\n \\n \\n
\\n \\n \\n\\n
\\n Search or jump to...\\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n
\\n \\n

Search code, repositories, users, issues, pull requests...

\\n
\\n \\n
\\n
\\n \\n
\\n \\n \\n \\n \\n \\n\\n \\n
\\n
\\n
\\n
\\n \\n
\\n
\\n Clear\\n \\n\\n
\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n \\n
\\n
\\n
\\n\\n \\n
\\n
\\n\\n
\\n
\\n
\\n \\n
\\n \\n\\n \\n
\\n
\\n
\\n

\\n Provide feedback\\n

\\n
\\n
\\n \\n
\\n
\\n
\\n \\n
\\n

We read every piece of feedback, and take your input very seriously.

\\n \\n \\n \\n
\\n
\\n \\n
\\n\\n \\n \\n\\n \\n
\\n
\\n
\\n

\\n Saved searches\\n

\\n

Use saved searches to filter your results more quickly

\\n
\\n
\\n \\n
\\n
\\n
\\n \\n
\\n\\n \\n\\n
\\n
\\n
\\n\\n
\\n
\\n \\n
\\n
\\n
\\n\\n\\n\\n \\n\\n \\n Sign up\\n \\n
\\n
\\n
\\n \\n\\n\\n \\n \\n\\n
\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n\\n\\n \\n\\n\\n\\n \\n
\\n\\n\\n \\n \\n\\n\\n\\n\\n\\n\\n \\n
\\n
\\n \\n \\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n \\n
\\n\\n
\\n\\n
\\n \\n
\\n \\n \\n\\n \\n \\n \\n ascendantaditya\\n \\n /\\n \\n pfolio\\n \\n\\n Public\\n
\\n\\n\\n
\\n\\n
\\n \\n\\n
\\n
\\n\\n
\\n
\\n

\\n Thats My Portfolio\\n

\\n
\\n \\n \\n\\n \\n ascendantaditya.github.io/pfolio/\\n \\n
\\n\\n

License

\\n \\n\\n\\n \\n\\n
\\n
\\n \\n
\\n \\n \\n \\n\\n \\n
\\n
\\n\\n
\\n\\n\\n \\n\\n
\\n\\n \\n\\n\\n\\n\\n
\\n \\n\\n\\n \\n \\n

ascendantaditya/pfolio

\\n
\\n
\\n\\n \\n\\n \\n\\n
\\n
\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n
\\n\\n\\n \\n
\\n
\\n\\n
\\n
\\n
\\n
\\n

About

\\n\\n

\\n Thats My Portfolio\\n

\\n
\\n \\n \\n\\n \\n ascendantaditya.github.io/pfolio/\\n \\n
\\n\\n\\n

Resources

\\n \\n\\n

License

\\n \\n\\n\\n\\n\\n \\n \\n\\n \\n\\n\\n

Stars

\\n \\n\\n

Watchers

\\n \\n\\n

Forks

\\n \\n\\n \\n
\\n\\n
\\n
\\n\\n \\n
\\n
\\n

\\n \\n Releases\\n

\\n\\n
No releases published
\\n\\n
\\n
\\n\\n \\n \\n
\\n
\\n

\\n \\n Packages\\n \\n

\\n\\n\\n
\\n No packages published
\\n
\\n\\n\\n\\n
\\n
\\n\\n \\n \\n\\n \\n \\n \\n
\\n
\\n

Languages

\\n
\\n \\n \\n \\n \\n
\\n\\n\\n
\\n
\\n\\n
\\n
\\n \\n
\\n\\n
\\n\\n\\n
\\n\\n
\\n\\n\\n
\\n
\\n\\n \\n\\n
\\n

Footer

\\n\\n \\n\\n\\n
\\n
\\n \\n \\n \\n\\n\\n \\n © 2024 GitHub, Inc.\\n \\n
\\n\\n \\n
\\n
\\n\\n\\n\\n\\n \\n\\n\\n \\n\\n \\n\\n
\\n
\\n
\\n
\\n\\n \\n\\n\\n\\n\\n\\n \\n\\n
\\n
\\n \\n\\n\\n'\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "\n", + "url=\"https://github.com/ascendantaditya/pfolio\"\n", + "response=requests.get(url)\n", + "print(response) \n", + "print(response.ok) #if it is a successful response or not\n", + "print(response.status_code) #gives status code of response. 200 means successful request and 404 means request unsuccessful\n", + "print(response.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}