Skip to content

Commit

Permalink
commmmmmmmmmmmmmmmmmmmmmmmmmmitted
Browse files Browse the repository at this point in the history
  • Loading branch information
Jhwach authored Jan 30, 2024
1 parent 410de79 commit f9b6e4d
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 0 deletions.
Empty file added basics.ipynb
Empty file.
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Day-2
Flask
Flask-WTF
Flask-SQLAlchemy
beautifulsoup4
pytz
python-dotenv
51 changes: 51 additions & 0 deletions scaper.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}\n",
"url = \"https://www.imdb.com/chart/top/?ref_=nv_mv_250\"\n",
"response = requests.get(url, headers=headers)\n",
"html_content = response.content\n",
"soup = BeautifulSoup(html_content, \"html.parser\")\n",
"movies = soup.find_all(\"div\", class_=\"ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title\")\n",
"for x in movies:\n",
" print(x.h3.text)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
11 changes: 11 additions & 0 deletions scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
url = "https://www.imdb.com/chart/top/?ref_=nv_mv_250"
response = requests.get(url, headers=headers)
html_content = response.content
soup = BeautifulSoup(html_content, "html.parser")
movies = soup.find_all("div", class_="ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title")
for x in movies:
print(x.h3.text)
71 changes: 71 additions & 0 deletions webscraping.ipynb

Large diffs are not rendered by default.

0 comments on commit f9b6e4d

Please sign in to comment.