Skip to content

Commit

Permalink
Rework exception handling to properly capture the exception and backo…
Browse files Browse the repository at this point in the history
…ff only when needed
  • Loading branch information
benoit74 committed Nov 22, 2024
1 parent 2267330 commit 1d4b766
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
34 changes: 19 additions & 15 deletions scraper/src/mindtouch2zim/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from kiwixstorage import KiwixStorage, NotFoundError
from pif import get_public_ip
from PIL import Image
from requests.exceptions import HTTPError, RequestException
from urllib3.exceptions import HTTPError as BaseHTTPError
from requests.exceptions import RequestException
from zimscraperlib.download import stream_file
from zimscraperlib.image.optimization import optimize_webp
from zimscraperlib.image.presets import WebpMedium
from zimscraperlib.rewriting.url_rewriting import HttpUrl, ZimPath
from zimscraperlib.zim import Creator

from mindtouch2zim.constants import KNOWN_BAD_ASSETS_REGEX, logger, web_session
from mindtouch2zim.errors import KnownBadAssetFailedError
from mindtouch2zim.utils import backoff_hdlr

SUPPORTED_IMAGE_MIME_TYPES = {
Expand Down Expand Up @@ -77,12 +77,6 @@ def process_asset(
asset_path=asset_path, asset_details=asset_details, creator=creator
)

@backoff.on_exception(
backoff.expo,
RequestException,
max_time=16,
on_backoff=backoff_hdlr,
)
def _process_asset_internal(
self,
asset_path: ZimPath,
Expand All @@ -104,12 +98,9 @@ def _process_asset_internal(
content=asset_content.getvalue(),
)
break # file found and added
except (HTTPError, BaseHTTPError) as exc:
if self.bad_assets_regex and self.bad_assets_regex.findall(
asset_url.value
):
logger.debug(f"Ignoring asset for {asset_url.value}: {exc}")
continue
except KnownBadAssetFailedError as exc:
logger.debug(f"Ignoring known bad asset for {asset_url.value}: {exc}")
except RequestException as exc:

Check warning on line 103 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L102-L103

Added lines #L102 - L103 were not covered by tests
with self.lock:
self.bad_assets_count += 1

Check warning on line 105 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L105

Added line #L105 was not covered by tests
if (
Expand Down Expand Up @@ -239,6 +230,12 @@ def _download_from_online(self, asset_url: HttpUrl) -> BytesIO:
)
return asset_content

@backoff.on_exception(
backoff.expo,
RequestException,
max_time=2, # secs
on_backoff=backoff_hdlr,
)
def get_asset_content(
self, asset_path: ZimPath, asset_url: HttpUrl, *, always_fetch_online: bool
) -> BytesIO:
Expand All @@ -257,7 +254,14 @@ def get_asset_content(
else:
logger.debug(f"Not optimizing, unsupported mime type: {mime_type}")

return self._download_from_online(asset_url=asset_url)
try:
return self._download_from_online(asset_url=asset_url)
except RequestException as exc:

Check warning on line 259 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L257-L259

Added lines #L257 - L259 were not covered by tests
# check this early and raise custom exception to escape backoff (not need to
# loose time on assets which are expected to be bad)
if self.bad_assets_regex and self.bad_assets_regex.findall(asset_url.value):
raise KnownBadAssetFailedError() from exc
raise

Check warning on line 264 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L263-L264

Added lines #L263 - L264 were not covered by tests

def _setup_s3(self):
if not self.s3_url_with_credentials:
Expand Down
6 changes: 6 additions & 0 deletions scraper/src/mindtouch2zim/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ class NoIllustrationFoundError(Exception):
"""An exception raised when no suitable illustration has been found"""

pass


class KnownBadAssetFailedError(Exception):
"""An exception raised when an asset known to be failing, failed as expected"""

pass

0 comments on commit 1d4b766

Please sign in to comment.