-
Notifications
You must be signed in to change notification settings - Fork 0
/
yande.py
95 lines (78 loc) · 2.89 KB
/
yande.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
__author__ = 'Eason'
# -*- coding: utf-8 -*-
import os
import urllib
from io import BytesIO
from sys import argv
import requests
from PIL import Image
from bs4 import BeautifulSoup
# Config
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
proxies = {
"http": "http://127.0.0.1:1080",
"https": "http://127.0.0.1:1080",
}
def check_proxy(proxy):
global proxies # 声明全局变量
print("[Initialization]Checking Proxy...")
try:
response = requests.get(url='https://www.google.com', proxies=proxy, timeout=5)
print("[Initialization]Proxy test passed...")
except requests.exceptions.ProxyError:
proxies = None
print("[Initialization]Proxy test failed...Direct download")
def get_tag(url):
params = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
tag = params['tags'][0]
return tag
def check_tag_dir(tag):
tag = tag.strip()
try:
print("[Initialization]Checking download dictionary...")
os.makedirs('./download', mode=0o777, exist_ok=False)
except FileExistsError:
print("[Initialization]Download dictionary check Success")
try:
print("[Initialization]Creating folder using tag...")
os.makedirs(f'{"./download/"}{tag}', mode=0o777, exist_ok=False)
except FileExistsError:
print("[Initialization]Folder already exist...Start downloading...")
path = f'{r"./download/"}{tag}{r"/"}'
path = path.strip() # 防止空格报错
return path
def get_link(url):
r = requests.get(url, headers) # 爬取图片
soup = BeautifulSoup(r.text, features="lxml")
url_list = soup.find_all(class_="directlink largeimg")
return url_list
def main():
if len(argv) == 2: # 判断是否有传参
url = argv[1]
else:
url = input('URL:')
print("Initializing...")
url = url.strip() # 去除URL中前后空格防止出错
enable_proxy = check_proxy(proxies)
url_list = get_link(url)
tag = get_tag(url) # 检查文件夹是否存在
path = check_tag_dir(tag)
count = 0
total = url_list.__len__()
for seq in url_list:
try:
response = requests.get(seq.attrs['href'], proxies=proxies) # Download image via proxy
except:
response = requests.get(seq.attrs['href']) # Download image directly
image = Image.open(BytesIO(response.content))
filename = str.split(seq.attrs['href'], '/')[-1] # Correct filename
raw_name = urllib.parse.unquote(filename)
image.save(path + raw_name)
count = count + 1
process_percentage = '{:.2%}'.format(count / total)
print("[Downloading", process_percentage, "]:", "Saved", count, 'image(s),', "Total", total, 'image(s) on page')
print("Done")
if __name__ == '__main__':
main()