-
Notifications
You must be signed in to change notification settings - Fork 20
/
Movies & Series Scraper.py
137 lines (108 loc) · 5.48 KB
/
Movies & Series Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import requests
from bs4 import BeautifulSoup
import webbrowser
import sys
while True:
option = int(input("What do you want to watch ?(enter 1 or 2 or 3)\n1- Series\n2- Movie\n3- Download a whole season\n"))
if 3 >= option >= 1:
break
else:
print("please enter 1 or 2 or 3")
if option == 1: # Watch a series
series_name = input("Enter series name: ")
series_name.rstrip(" ")
# Preparing our html page and soup
html_page = requests.get("https://mycima.tube/search/"+series_name+"/list/series")
soup = BeautifulSoup(html_page.content, "lxml")
# Scraping links and titles of series resulted from the query
recommended_series = soup.find_all("div", {"class": "Thumb--GridItem"})
links_of_recommended_series = []
print("Which series do you want to watch?")
counter_series = 1
for i in recommended_series:
link = i.find("a")
links_of_recommended_series.append(link.get("href"))
print(str(counter_series)+"- "+link.get("title"))
counter_series += 1
series_number = int(input("Enter the number corresponding to your wanted series: "))
# Preparing html page and soup for desired series
html_page = requests.get(links_of_recommended_series[series_number-1])
soup = BeautifulSoup(html_page.content, "lxml")
# Scraping links and titles of seasons of the desired series
temp_list = soup.find("div", {"class": "List--Seasons--Episodes"})
if temp_list is None:
print("There is only one season available")
temp_list = soup.find("div", {"class": "Seasons--Episodes"})
episodes_links = temp_list.findAll("a")
else:
seasons_links = temp_list.findAll("a") # i.get("href")
season = int(input("There are " + str(len(seasons_links)) + " Seasons, Enter your desired season number: "))
# Preparing html page and soup to be used to scrape episodes links
html_page = requests.get(seasons_links[season - 1].get("href"))
soup = BeautifulSoup(html_page.content, "lxml")
# Scraping episodes links and titles
temp_list = soup.find("div", {"class": "Episodes--Seasons--Episodes"})
episodes_links = temp_list.findAll("a")
episodes_links.reverse()
print("There are " + str(len(episodes_links)) + " episodes")
ep1 = int(input("Enter episode number (from): "))
ep2 = int(input("Enter episode number (to): "))
counter_episodes = ep2
while counter_episodes >= ep1: # Iterating through desired episodes
html_page = requests.get(episodes_links[counter_episodes-1].get("href"))
soup = BeautifulSoup(html_page.content, "lxml")
link = soup.find("iframe",{"name":"watch"})
watching_link = link.get("data-lazy-src")
result = requests.get(watching_link)
webbrowser.open(watching_link)
counter_episodes -= 1
elif option == 2: # Watch a movie
movie_name = input("Enter movie name: ")
movie_name.rstrip(" ")
html_page = requests.get("https://mycima.tube/search/"+movie_name)
soup = BeautifulSoup(html_page.content, "lxml")
recommended_movies = soup.find_all("div", {"class": "Thumb--GridItem"})
links_of_recommended_movies= []
print("Which Movie do you want to watch?")
counter_series = 1
for i in recommended_movies:
link = i.find("a")
links_of_recommended_movies.append(link.get("href"))
print(str(counter_series) + "- " + link.get("title"))
counter_series += 1
movie_number = int(input("Enter the number corresponding to your wanted movie: "))
movie_link = links_of_recommended_movies[movie_number-1]
html_page = requests.get(movie_link)
soup = BeautifulSoup(html_page.content, "lxml")
link = soup.find("iframe", {"name": "watch"})
watching_link = link.get("data-lazy-src")
webbrowser.open(watching_link)
else: # Download a whole season
series_name = input("Enter series name: ")
series_name.rstrip(" ")
series_name = series_name.replace(" ", "-")
season = int(input("Enter Season number: "))
html_page = requests.get("https://mycima.tube/series/%d9%85%d8%b3%d9%84%d8%b3%d9%84-"+str(series_name)+"-%d9%85%d9%88%d8%b3%d9%85-"+str(season)+"-")
if html_page.status_code == 404:
html_page = requests.get("https://mycima.tube/series/%D9%85%D9%88%D8%B3%D9%85-"+str(season)+"-%D9%85%D8%B3%D9%84%D8%B3%D9%84-"+str(series_name))
if html_page.status_code == 404:
html_page = requests.get("https://mycima.tube/series/%d9%85%d9%88%d8%b3%d9%85-"+str(season)+"-"+str(series_name))
if html_page.status_code == 404:
html_page = requests.get("https://mycima.tube/series/"+str(series_name)+"-%d9%85%d9%88%d8%b3%d9%85-"+str(season))
soup = BeautifulSoup(html_page.content, "lxml")
x = soup.find('ul', {'class': "Season--Download--Mycima--Single"})
if x is None:
input("This feature is not available for "+str(series_name)+" season "+str(season)+"! \nPress any key to exit")
sys.exit()
temp = x.findAll("a")
qualities = soup.find_all("resolution")
links = []
for i in temp:
links.append(i.get("href"))
counter_qualities = 1
for i in qualities:
print(str(counter_qualities)+"- " + i.text)
counter_qualities += 1
quality = int(input("Enter your desired quality: "))
webbrowser.open(links[quality-1])
input("Enter any key to exit: ")