diff --git a/.flake8 b/.flake8 index 96c8f44..dcd9c96 100644 --- a/.flake8 +++ b/.flake8 @@ -1,14 +1,2 @@ [flake8] -ignore = - # Refers to the max-line length. Let's suppress the error and simply - # let black take care on how it wants to format the lines. - E501, - - # Refers to "line break before/after binary operator". - # Similar to above, let black take care of the formatting. - W503, - W504, - - # black disagrees with flake8, and inserts whitespace - # E203: whitespace before ':' - E203, +ignore = E203, E501, E701, E704, W503, W504 diff --git a/mypy.ini b/mypy.ini index d4c7c85..ca6fba9 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,5 @@ [mypy] exclude = .*flycheck_.* -show_error_codes = True check_untyped_defs = True [mypy-w3lib.*] diff --git a/tox.ini b/tox.ini index e97b62f..1db37bc 100644 --- a/tox.ini +++ b/tox.ini @@ -19,9 +19,9 @@ commands = [testenv:typing] basepython = python3 deps = - # mypy would error if pytest (or its sub) not found + # mypy would error if pytest (or its stub) not found pytest - mypy==1.0.0 + mypy==1.10.0 commands = mypy --strict {posargs: w3lib tests} diff --git a/w3lib/html.py b/w3lib/html.py index 760c0da..1a4bc21 100644 --- a/w3lib/html.py +++ b/w3lib/html.py @@ -4,7 +4,7 @@ import re from html.entities import name2codepoint -from typing import AnyStr, Iterable, Match, Optional, Pattern, Tuple, Union +from typing import Iterable, Match, Optional, Pattern, Tuple, Union from urllib.parse import urljoin from w3lib._types import StrOrBytes @@ -34,7 +34,7 @@ def replace_entities( - text: AnyStr, + text: StrOrBytes, keep: Iterable[str] = (), remove_illegal: bool = True, encoding: str = "utf-8", @@ -99,11 +99,13 @@ def convert_entity(m: Match[str]) -> str: return _ent_re.sub(convert_entity, to_unicode(text, encoding)) -def has_entities(text: AnyStr, encoding: Optional[str] = None) -> bool: +def has_entities(text: StrOrBytes, encoding: Optional[str] = None) -> bool: return bool(_ent_re.search(to_unicode(text, encoding))) -def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None) -> str: +def replace_tags( + text: StrOrBytes, token: str = "", encoding: Optional[str] = None +) -> str: """Replace all markup tags found in the given `text` by the given token. By default `token` is an empty string so it just removes all tags. @@ -129,7 +131,7 @@ def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None) _REMOVECOMMENTS_RE = re.compile("|$)", re.DOTALL) -def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str: +def remove_comments(text: StrOrBytes, encoding: Optional[str] = None) -> str: """Remove HTML Comments. >>> import w3lib.html @@ -144,7 +146,7 @@ def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str: def remove_tags( - text: AnyStr, + text: StrOrBytes, which_ones: Iterable[str] = (), keep: Iterable[str] = (), encoding: Optional[str] = None, @@ -216,7 +218,7 @@ def remove_tag(m: Match[str]) -> str: def remove_tags_with_content( - text: AnyStr, which_ones: Iterable[str] = (), encoding: Optional[str] = None + text: StrOrBytes, which_ones: Iterable[str] = (), encoding: Optional[str] = None ) -> str: """Remove tags and their content. @@ -240,7 +242,7 @@ def remove_tags_with_content( def replace_escape_chars( - text: AnyStr, + text: StrOrBytes, which_ones: Iterable[str] = ("\n", "\t", "\r"), replace_by: StrOrBytes = "", encoding: Optional[str] = None, @@ -262,7 +264,7 @@ def replace_escape_chars( def unquote_markup( - text: AnyStr, + text: StrOrBytes, keep: Iterable[str] = (), remove_illegal: bool = True, encoding: Optional[str] = None, @@ -304,7 +306,7 @@ def _get_fragments( def get_base_url( - text: AnyStr, baseurl: StrOrBytes = "", encoding: str = "utf-8" + text: StrOrBytes, baseurl: StrOrBytes = "", encoding: str = "utf-8" ) -> str: """Return the base url if declared in the given HTML `text`, relative to the given base url. @@ -324,7 +326,7 @@ def get_base_url( def get_meta_refresh( - text: AnyStr, + text: StrOrBytes, baseurl: str = "", encoding: str = "utf-8", ignore_tags: Iterable[str] = ("script", "noscript"), diff --git a/w3lib/http.py b/w3lib/http.py index bdb3f66..8409d86 100644 --- a/w3lib/http.py +++ b/w3lib/http.py @@ -1,12 +1,30 @@ from base64 import b64encode -from typing import Any, AnyStr, List, Mapping, MutableMapping, Optional, Sequence, Union - +from typing import ( + Any, + List, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + overload, +) + +from w3lib._types import StrOrBytes from w3lib.util import to_bytes, to_unicode HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]] HeadersDictOutput = MutableMapping[bytes, List[bytes]] +@overload +def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ... + + +@overload +def headers_raw_to_dict(headers_raw: None) -> None: ... + + def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOutput]: r""" Convert raw headers (single multi-line bytestring) @@ -52,6 +70,14 @@ def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOut return result_dict +@overload +def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ... + + +@overload +def headers_dict_to_raw(headers_dict: None) -> None: ... + + def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[bytes]: r""" Returns a raw HTTP headers representation of headers @@ -85,7 +111,7 @@ def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[by def basic_auth_header( - username: AnyStr, password: AnyStr, encoding: str = "ISO-8859-1" + username: StrOrBytes, password: StrOrBytes, encoding: str = "ISO-8859-1" ) -> bytes: """ Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_ diff --git a/w3lib/url.py b/w3lib/url.py index 52cf6ad..28e70cb 100644 --- a/w3lib/url.py +++ b/w3lib/url.py @@ -19,6 +19,7 @@ Tuple, Union, cast, + overload, ) from urllib.parse import _coerce_args # type: ignore from urllib.parse import ( @@ -221,6 +222,24 @@ def is_url(text: str) -> bool: return text.partition("://")[0] in ("file", "http", "https") +@overload +def url_query_parameter( + url: StrOrBytes, + parameter: str, + default: None = None, + keep_blank_values: Union[bool, int] = 0, +) -> Optional[str]: ... + + +@overload +def url_query_parameter( + url: StrOrBytes, + parameter: str, + default: str, + keep_blank_values: Union[bool, int] = 0, +) -> str: ... + + def url_query_parameter( url: StrOrBytes, parameter: str,