From e65e1c8d2903e2952354365cf6c9455ad6604979 Mon Sep 17 00:00:00 2001 From: Mike Stucka Date: Tue, 8 Oct 2024 15:49:48 -0400 Subject: [PATCH] Patch MD --- warn/scrapers/md.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/warn/scrapers/md.py b/warn/scrapers/md.py index d637089..2803064 100644 --- a/warn/scrapers/md.py +++ b/warn/scrapers/md.py @@ -1,6 +1,7 @@ import logging import re from pathlib import Path +from time import sleep from bs4 import BeautifulSoup @@ -16,6 +17,8 @@ logger = logging.getLogger(__name__) +naptime = 3 + def scrape( data_dir: Path = utils.WARN_DATA_DIR, @@ -42,6 +45,8 @@ def scrape( # Save it to the cache cache.write("md/source.html", html) + sleep(naptime) # Try to stop blocked connections by being less aggressive + # Parse the list of links soup = BeautifulSoup(html, "html.parser") a_list = soup.find_all("a", {"class": "sub"}) @@ -61,6 +66,8 @@ def scrape( # Save it to the cache cache.write(f"md/{href}.html", html) + sleep(naptime) # Try to stop blocked connections by being less aggressive + # Add it to the list html_list.append(html)