Skip to content

Commit

Permalink
Added session creation and reuse in stream_file
Browse files Browse the repository at this point in the history
  • Loading branch information
rgaudin committed Aug 5, 2022
1 parent c9ca07b commit 619066b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).

## [Unreleased]

### Added

- `download.get_session()` to build a new requests Session

### Changed

- `download.stream_file()` accepts a `session` param to use instead of creating one

## [1.7.0] - 2022-08-02

### Added
Expand Down
2 changes: 1 addition & 1 deletion src/zimscraperlib/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.0
1.8.0
17 changes: 13 additions & 4 deletions src/zimscraperlib/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ def _get_retry_adapter(max_retries: Optional[int] = 5) -> requests.adapters.Base
return requests.adapters.HTTPAdapter(max_retries=retries)


def get_session(max_retries: Optional[int] = 5) -> requests.Session:
"""Session to hold cookies and connection pool together"""
session = requests.Session()
session.mount("http", _get_retry_adapter(max_retries)) # tied to http and https
return session


def stream_file(
url: str,
fpath: Optional[pathlib.Path] = None,
Expand All @@ -166,6 +173,7 @@ def stream_file(
only_first_block: Optional[bool] = False,
max_retries: Optional[int] = 5,
headers: Optional[Dict[str, str]] = None,
session: Optional[requests.Session] = None,
) -> Union[int, requests.structures.CaseInsensitiveDict]:
"""Stream data from a URL to either a BytesIO object or a file
Arguments -
Expand All @@ -175,16 +183,17 @@ def stream_file(
proxies - A dict of proxies to be used
https://requests.readthedocs.io/en/master/user/advanced/#proxies
only_first_block - Whether to download only one (first) block
max_retries - Maximum number of retries after which error is raised
max_retries - Maximum number of retries after which error is raised. Does not
apply if using your own session
session - Session object to make the request with. A new one created otherwise
Returns the total number of bytes downloaded and the response headers"""

# if no output option is supplied
if fpath is None and byte_stream is None:
raise ValueError("Either file path or a bytesIO object is needed")

session = requests.Session()
retry_adapter = _get_retry_adapter(max_retries)
session.mount("http", retry_adapter) # tied to http and https
if not session:
session = get_session(max_retries)
resp = session.get(
url,
stream=True,
Expand Down

0 comments on commit 619066b

Please sign in to comment.