-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
67 lines (50 loc) · 2.11 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
import sqlite3
from bs4 import BeautifulSoup
def parse_url(url):
"""Функция для парсинга данных по URL и возврата списка с результатами."""
response = requests.get(url)
if response.status_code == 200:
html_code = response.text
soup = BeautifulSoup(html_code, 'html.parser')
target_tag_1 = soup.find('div', class_='TT9eCd') # средняя оценка
target_tag_2 = soup.find('div', class_='g1rdde') # количество отзывов
target_tag_3 = soup.find_all(
'div', class_='ClM7O'
) # количество скачиваний
if target_tag_1:
text = target_tag_1.get('aria-label')
reviews_count = text.split()[1]
a = f"Средняя оценка: {reviews_count}"
if target_tag_2:
text = target_tag_2.get_text()
number = text.split()[0]
b = f"Количество отзывов: {number}"
if target_tag_3:
for tag in target_tag_3:
if '+' in tag.get_text():
c = f"Количество скачиваний: {tag.get_text()}"
return [a, b, c]
else:
print(f"Ошибка при получении страницы. "
f"Код статуса: {response.status_code}")
db_connection = sqlite3.connect('parse.db')
db_cursor = db_connection.cursor()
db_cursor.execute('''CREATE TABLE IF NOT EXISTS parse (
domain TEXT,
link TEXT,
output TEXT
)''')
file_path = "gplay_urls.txt"
with open(file_path, "r") as file:
lines = file.read().splitlines()
for line in lines[1:]:
domain, link = line.split('\t')
data = parse_url(link)
string_data = ', '.join(data)
db_cursor.execute(
'INSERT INTO parse (domain, link, output) '
'VALUES (?, ?, ?)', (domain, link, string_data)
)
db_connection.commit()
db_connection.close()