Skip to content

Commit

Permalink
Merge pull request #226 from scrapy/typing-improvements
Browse files Browse the repository at this point in the history
Typing improvements.
  • Loading branch information
wRAR authored Jun 5, 2024
2 parents f41b216 + 3b600b1 commit a48cbc7
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 29 deletions.
14 changes: 1 addition & 13 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,14 +1,2 @@
[flake8]
ignore =
# Refers to the max-line length. Let's suppress the error and simply
# let black take care on how it wants to format the lines.
E501,

# Refers to "line break before/after binary operator".
# Similar to above, let black take care of the formatting.
W503,
W504,

# black disagrees with flake8, and inserts whitespace
# E203: whitespace before ':'
E203,
ignore = E203, E501, E701, E704, W503, W504
1 change: 0 additions & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[mypy]
exclude = .*flycheck_.*
show_error_codes = True
check_untyped_defs = True

[mypy-w3lib.*]
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ commands =
[testenv:typing]
basepython = python3
deps =
# mypy would error if pytest (or its sub) not found
# mypy would error if pytest (or its stub) not found
pytest
mypy==1.10.0
commands =
Expand Down
24 changes: 13 additions & 11 deletions w3lib/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import re
from html.entities import name2codepoint
from typing import AnyStr, Iterable, Match, Optional, Pattern, Tuple, Union
from typing import Iterable, Match, Optional, Pattern, Tuple, Union
from urllib.parse import urljoin

from w3lib._types import StrOrBytes
Expand Down Expand Up @@ -34,7 +34,7 @@


def replace_entities(
text: AnyStr,
text: StrOrBytes,
keep: Iterable[str] = (),
remove_illegal: bool = True,
encoding: str = "utf-8",
Expand Down Expand Up @@ -99,11 +99,13 @@ def convert_entity(m: Match[str]) -> str:
return _ent_re.sub(convert_entity, to_unicode(text, encoding))


def has_entities(text: AnyStr, encoding: Optional[str] = None) -> bool:
def has_entities(text: StrOrBytes, encoding: Optional[str] = None) -> bool:
return bool(_ent_re.search(to_unicode(text, encoding)))


def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None) -> str:
def replace_tags(
text: StrOrBytes, token: str = "", encoding: Optional[str] = None
) -> str:
"""Replace all markup tags found in the given `text` by the given token.
By default `token` is an empty string so it just removes all tags.
Expand All @@ -129,7 +131,7 @@ def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None)
_REMOVECOMMENTS_RE = re.compile("<!--.*?(?:-->|$)", re.DOTALL)


def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:
def remove_comments(text: StrOrBytes, encoding: Optional[str] = None) -> str:
"""Remove HTML Comments.
>>> import w3lib.html
Expand All @@ -144,7 +146,7 @@ def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:


def remove_tags(
text: AnyStr,
text: StrOrBytes,
which_ones: Iterable[str] = (),
keep: Iterable[str] = (),
encoding: Optional[str] = None,
Expand Down Expand Up @@ -216,7 +218,7 @@ def remove_tag(m: Match[str]) -> str:


def remove_tags_with_content(
text: AnyStr, which_ones: Iterable[str] = (), encoding: Optional[str] = None
text: StrOrBytes, which_ones: Iterable[str] = (), encoding: Optional[str] = None
) -> str:
"""Remove tags and their content.
Expand All @@ -240,7 +242,7 @@ def remove_tags_with_content(


def replace_escape_chars(
text: AnyStr,
text: StrOrBytes,
which_ones: Iterable[str] = ("\n", "\t", "\r"),
replace_by: StrOrBytes = "",
encoding: Optional[str] = None,
Expand All @@ -262,7 +264,7 @@ def replace_escape_chars(


def unquote_markup(
text: AnyStr,
text: StrOrBytes,
keep: Iterable[str] = (),
remove_illegal: bool = True,
encoding: Optional[str] = None,
Expand Down Expand Up @@ -304,7 +306,7 @@ def _get_fragments(


def get_base_url(
text: AnyStr, baseurl: StrOrBytes = "", encoding: str = "utf-8"
text: StrOrBytes, baseurl: StrOrBytes = "", encoding: str = "utf-8"
) -> str:
"""Return the base url if declared in the given HTML `text`,
relative to the given base url.
Expand All @@ -324,7 +326,7 @@ def get_base_url(


def get_meta_refresh(
text: AnyStr,
text: StrOrBytes,
baseurl: str = "",
encoding: str = "utf-8",
ignore_tags: Iterable[str] = ("script", "noscript"),
Expand Down
32 changes: 29 additions & 3 deletions w3lib/http.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
from base64 import b64encode
from typing import Any, AnyStr, List, Mapping, MutableMapping, Optional, Sequence, Union

from typing import (
Any,
List,
Mapping,
MutableMapping,
Optional,
Sequence,
Union,
overload,
)

from w3lib._types import StrOrBytes
from w3lib.util import to_bytes, to_unicode

HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
HeadersDictOutput = MutableMapping[bytes, List[bytes]]


@overload
def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ...


@overload
def headers_raw_to_dict(headers_raw: None) -> None: ...


def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOutput]:
r"""
Convert raw headers (single multi-line bytestring)
Expand Down Expand Up @@ -52,6 +70,14 @@ def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOut
return result_dict


@overload
def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ...


@overload
def headers_dict_to_raw(headers_dict: None) -> None: ...


def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[bytes]:
r"""
Returns a raw HTTP headers representation of headers
Expand Down Expand Up @@ -85,7 +111,7 @@ def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[by


def basic_auth_header(
username: AnyStr, password: AnyStr, encoding: str = "ISO-8859-1"
username: StrOrBytes, password: StrOrBytes, encoding: str = "ISO-8859-1"
) -> bytes:
"""
Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_
Expand Down
19 changes: 19 additions & 0 deletions w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Tuple,
Union,
cast,
overload,
)
from urllib.parse import _coerce_args # type: ignore
from urllib.parse import (
Expand Down Expand Up @@ -221,6 +222,24 @@ def is_url(text: str) -> bool:
return text.partition("://")[0] in ("file", "http", "https")


@overload
def url_query_parameter(
url: StrOrBytes,
parameter: str,
default: None = None,
keep_blank_values: Union[bool, int] = 0,
) -> Optional[str]: ...


@overload
def url_query_parameter(
url: StrOrBytes,
parameter: str,
default: str,
keep_blank_values: Union[bool, int] = 0,
) -> str: ...


def url_query_parameter(
url: StrOrBytes,
parameter: str,
Expand Down

0 comments on commit a48cbc7

Please sign in to comment.