-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtitle_scraper.py
36 lines (29 loc) · 1.12 KB
/
subtitle_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import requests
import urllib.parse
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display
from subprocess import call
def start_browser(directory):
display = Display(visible=0, size=(800, 600))
display.start()
options = Options()
options.add_experimental_option("prefs", {
"download.default_directory": directory,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
return webdriver.Chrome(chrome_options=options)
def get_available_subs(query, browser):
url = "http://www.rentanadviser.com/en/subtitles/subtitles4songs.aspx?"+urllib.parse.urlencode({'src':query})
browser.get(url)
soup = BeautifulSoup(browser.page_source, 'html5lib')
subs = [{'name':table.a.text.replace('\n','').strip(), 'url':table.a['href']} for table in soup.findAll('table')]
return subs
def download_sub(sub, browser):
url = "http://www.rentanadviser.com/en/subtitles/" + sub['url']
browser.get(url)
browser.execute_script("__doPostBack('ctl00$ContentPlaceHolder1$btnSub','')")