Skip to content

Commit

Permalink
Patch MD
Browse files Browse the repository at this point in the history
  • Loading branch information
stucka committed Oct 8, 2024
1 parent 18a3066 commit e65e1c8
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions warn/scrapers/md.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import re
from pathlib import Path
from time import sleep

from bs4 import BeautifulSoup

Expand All @@ -16,6 +17,8 @@

logger = logging.getLogger(__name__)

naptime = 3


def scrape(
data_dir: Path = utils.WARN_DATA_DIR,
Expand All @@ -42,6 +45,8 @@ def scrape(
# Save it to the cache
cache.write("md/source.html", html)

sleep(naptime) # Try to stop blocked connections by being less aggressive

# Parse the list of links
soup = BeautifulSoup(html, "html.parser")
a_list = soup.find_all("a", {"class": "sub"})
Expand All @@ -61,6 +66,8 @@ def scrape(
# Save it to the cache
cache.write(f"md/{href}.html", html)

sleep(naptime) # Try to stop blocked connections by being less aggressive

# Add it to the list
html_list.append(html)

Expand Down

0 comments on commit e65e1c8

Please sign in to comment.