From f5073d13a5c6769a9fcb5b424b666529e1c64364 Mon Sep 17 00:00:00 2001 From: bzorn Date: Tue, 23 Jul 2024 20:47:01 +0300 Subject: [PATCH] nhentai parser now uses the base mirror --- nlightreader/consts/urls.py | 3 +- .../parsers/hentai_manga/nhentai_hmanga.py | 56 ++++++++----------- 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/nlightreader/consts/urls.py b/nlightreader/consts/urls.py index 5d0434c..08393fc 100644 --- a/nlightreader/consts/urls.py +++ b/nlightreader/consts/urls.py @@ -21,8 +21,7 @@ URL_REMANGA = "https://remanga.org" URL_REMANGA_API = "https://remanga.org/api" -URL_NHENTAI = "https://nhentai.to" -URL_NHENTAI_API = "https://nhentai.net/api" +URL_NHENTAI = "https://nhentai.net" URL_ALLHENTAI = "https://20.allhen.online" URL_ALLHENTAI_API = "" diff --git a/nlightreader/parsers/hentai_manga/nhentai_hmanga.py b/nlightreader/parsers/hentai_manga/nhentai_hmanga.py index 2bc9712..12c7869 100644 --- a/nlightreader/parsers/hentai_manga/nhentai_hmanga.py +++ b/nlightreader/parsers/hentai_manga/nhentai_hmanga.py @@ -1,4 +1,5 @@ -from bs4 import BeautifulSoup +import validators +from bs4 import BeautifulSoup, element from nlightreader.consts.urls import URL_NHENTAI from nlightreader.models import Chapter, Image, Manga @@ -13,7 +14,6 @@ class NHentai(AbstractHentaiMangaCatalog): def __init__(self): super().__init__() self.url = URL_NHENTAI - self.url_api = URL_NHENTAI_API def search_manga(self, form): url = f"{self.url}/search" @@ -35,19 +35,26 @@ def search_manga(self, form): caption_tag = i.find("div", class_="caption") if caption_tag is not None: name = i.find("div", class_="caption").text - cover_tag = i.find("a", {"class": "cover"}) + cover_tag: element.Tag = i.find("a", {"class": "cover"}) if cover_tag is not None: manga_id = cover_tag["href"].split("/")[-2] if not manga_id: continue - mangas.append( - Manga( - manga_id, - self.CATALOG_ID, - name, - "", - ), + + manga = Manga( + manga_id, + self.CATALOG_ID, + name, + "", ) + + if (noscript_img_tag := cover_tag.find("noscript")) and ( + img_tag := noscript_img_tag.find("img") + ): + src = img_tag.get("src") + if validators.url(src): + manga.preview_url = src + mangas.append(manga) return mangas def get_chapters(self, manga: Manga): @@ -71,14 +78,8 @@ def get_images(self, manga: Manga, chapter: Chapter): for i in html_items: img_tag = i.find("img", class_="") img_url: str = img_tag["src"] - for img_format in ["png", "jpg", "gif"]: - if img_url.endswith(f"t.{img_format}"): - img_url = img_url.replace( - f"t.{img_format}", - f".{img_format}", - 1, - ) - break + if not validators.url(img_url): + continue images.append(Image("", html_items.index(i) + 1, img_url)) return images @@ -93,24 +94,11 @@ def get_image(self, image: Image): ) def get_preview(self, manga: Manga): - url = f"{self.url}/g/{manga.content_id}" - response = get_html( - url, + return get_html( + manga.preview_url, headers=self.headers, - content_type="text", + content_type="content", ) - if response: - soup = BeautifulSoup(response, "html.parser") - if html_item := soup.find("div", id="cover"): - if img_tag := html_item.find("img"): - img_request_headers = self.headers | { - "Referer": URL_NHENTAI, - } - return get_html( - img_tag["src"], - content_type="content", - headers=img_request_headers, - ) def get_manga_url(self, manga: Manga) -> str: return f"{self.url}/g/{manga.content_id}"