Skip to content

Commit

Permalink
Ignore rewriting errors of special libretexts.org pages
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Nov 22, 2024
1 parent 2c3810d commit 56f998d
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions scraper/src/mindtouch2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,18 +514,28 @@ def _process_page(
# 3A_An_Introduction_to_Geology_(Johnson_Affolter_Inkenbrandt_and_Mosher)/zz
# %3A_Back_Matter/20%3A_Glossary
# same kind of pattern works for glossary, index, ... pages
if re.match(r"^.*\/zz:_[^\/]*?\/10:_[^\/]*$", page.path):
rewriten = rewrite_index(
rewriter=rewriter,
jinja2_template=self.libretexts_index_template,
mindtouch_client=self.mindtouch_client,
page=page,
)
elif re.match(r"^.*\/zz:_[^\/]*?\/20:_[^\/]*$", page.path):
rewriten = rewrite_glossary(
jinja2_template=self.libretexts_glossary_template,
original_content=page_content.html_body,
try:

Check warning on line 517 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L517

Added line #L517 was not covered by tests
if re.match(r"^.*\/zz:_[^\/]*?\/10:_[^\/]*$", page.path):
rewriten = rewrite_index(

Check warning on line 519 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L519

Added line #L519 was not covered by tests
rewriter=rewriter,
jinja2_template=self.libretexts_index_template,
mindtouch_client=self.mindtouch_client,
page=page,
)
elif re.match(r"^.*\/zz:_[^\/]*?\/20:_[^\/]*$", page.path):
rewriten = rewrite_glossary(

Check warning on line 526 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L526

Added line #L526 was not covered by tests
jinja2_template=self.libretexts_glossary_template,
original_content=page_content.html_body,
)
except Exception as exc:

Check warning on line 530 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L530

Added line #L530 was not covered by tests
# code has been tested to work "in-general", but many edge-case occurs
# and since these pages are absolutely not essential, we just display a
# warning and store an empty page
logger.warning(

Check warning on line 534 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L534

Added line #L534 was not covered by tests
f"Problem processing special page ID {page.id} ({page.encoded_url})"
f", page is probably empty, storing empty page: {exc}"
)
return ""

Check warning on line 538 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L538

Added line #L538 was not covered by tests
if not rewriten:
# Default rewriting for 'normal' pages
rewriten = rewriter.rewrite(page_content.html_body).content
Expand Down

0 comments on commit 56f998d

Please sign in to comment.