Merge pull request #226 from scrapy/typing-improvements

Typing improvements.
scrapy · Jun 5, 2024 · a48cbc7 · a48cbc7
2 parents f41b216 + 3b600b1
commit a48cbc7
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 29 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,14 +1,2 @@
 [flake8]
-ignore =
-    # Refers to the max-line length. Let's suppress the error and simply
-    # let black take care on how it wants to format the lines.
-    E501,
-
-    # Refers to "line break before/after binary operator".
-    # Similar to above, let black take care of the formatting.
-    W503,
-    W504,
-
-    # black disagrees with flake8, and inserts whitespace
-    # E203: whitespace before ':'
-    E203,
+ignore = E203, E501, E701, E704, W503, W504
diff --git a/mypy.ini b/mypy.ini
@@ -1,6 +1,5 @@
 [mypy]
 exclude = .*flycheck_.*
-show_error_codes = True
 check_untyped_defs = True
 
 [mypy-w3lib.*]

diff --git a/tox.ini b/tox.ini
@@ -19,7 +19,7 @@ commands =
 [testenv:typing]
 basepython = python3
 deps =
-    # mypy would error if pytest (or its sub) not found
+    # mypy would error if pytest (or its stub) not found
     pytest
     mypy==1.10.0
 commands =

diff --git a/w3lib/html.py b/w3lib/html.py
@@ -4,7 +4,7 @@
 
 import re
 from html.entities import name2codepoint
-from typing import AnyStr, Iterable, Match, Optional, Pattern, Tuple, Union
+from typing import Iterable, Match, Optional, Pattern, Tuple, Union
 from urllib.parse import urljoin
 
 from w3lib._types import StrOrBytes
@@ -34,7 +34,7 @@
 
 
 def replace_entities(
-    text: AnyStr,
+    text: StrOrBytes,
     keep: Iterable[str] = (),
     remove_illegal: bool = True,
     encoding: str = "utf-8",
@@ -99,11 +99,13 @@ def convert_entity(m: Match[str]) -> str:
     return _ent_re.sub(convert_entity, to_unicode(text, encoding))
 
 
-def has_entities(text: AnyStr, encoding: Optional[str] = None) -> bool:
+def has_entities(text: StrOrBytes, encoding: Optional[str] = None) -> bool:
     return bool(_ent_re.search(to_unicode(text, encoding)))
 
 
-def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None) -> str:
+def replace_tags(
+    text: StrOrBytes, token: str = "", encoding: Optional[str] = None
+) -> str:
     """Replace all markup tags found in the given `text` by the given token.
     By default `token` is an empty string so it just removes all tags.
 
@@ -129,7 +131,7 @@ def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None)
 _REMOVECOMMENTS_RE = re.compile("<!--.*?(?:-->|$)", re.DOTALL)
 
 
-def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:
+def remove_comments(text: StrOrBytes, encoding: Optional[str] = None) -> str:
     """Remove HTML Comments.
 
     >>> import w3lib.html
@@ -144,7 +146,7 @@ def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:
 
 
 def remove_tags(
-    text: AnyStr,
+    text: StrOrBytes,
     which_ones: Iterable[str] = (),
     keep: Iterable[str] = (),
     encoding: Optional[str] = None,
@@ -216,7 +218,7 @@ def remove_tag(m: Match[str]) -> str:
 
 
 def remove_tags_with_content(
-    text: AnyStr, which_ones: Iterable[str] = (), encoding: Optional[str] = None
+    text: StrOrBytes, which_ones: Iterable[str] = (), encoding: Optional[str] = None
 ) -> str:
     """Remove tags and their content.
 
@@ -240,7 +242,7 @@ def remove_tags_with_content(
 
 
 def replace_escape_chars(
-    text: AnyStr,
+    text: StrOrBytes,
     which_ones: Iterable[str] = ("\n", "\t", "\r"),
     replace_by: StrOrBytes = "",
     encoding: Optional[str] = None,
@@ -262,7 +264,7 @@ def replace_escape_chars(
 
 
 def unquote_markup(
-    text: AnyStr,
+    text: StrOrBytes,
     keep: Iterable[str] = (),
     remove_illegal: bool = True,
     encoding: Optional[str] = None,
@@ -304,7 +306,7 @@ def _get_fragments(
 
 
 def get_base_url(
-    text: AnyStr, baseurl: StrOrBytes = "", encoding: str = "utf-8"
+    text: StrOrBytes, baseurl: StrOrBytes = "", encoding: str = "utf-8"
 ) -> str:
     """Return the base url if declared in the given HTML `text`,
     relative to the given base url.
@@ -324,7 +326,7 @@ def get_base_url(
 
 
 def get_meta_refresh(
-    text: AnyStr,
+    text: StrOrBytes,
     baseurl: str = "",
     encoding: str = "utf-8",
     ignore_tags: Iterable[str] = ("script", "noscript"),

diff --git a/w3lib/http.py b/w3lib/http.py
@@ -1,12 +1,30 @@
 from base64 import b64encode
-from typing import Any, AnyStr, List, Mapping, MutableMapping, Optional, Sequence, Union
-
+from typing import (
+    Any,
+    List,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Sequence,
+    Union,
+    overload,
+)
+
+from w3lib._types import StrOrBytes
 from w3lib.util import to_bytes, to_unicode
 
 HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
 HeadersDictOutput = MutableMapping[bytes, List[bytes]]
 
 
+@overload
+def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ...
+
+
+@overload
+def headers_raw_to_dict(headers_raw: None) -> None: ...
+
+
 def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOutput]:
     r"""
     Convert raw headers (single multi-line bytestring)
@@ -52,6 +70,14 @@ def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOut
     return result_dict
 
 
+@overload
+def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ...
+
+
+@overload
+def headers_dict_to_raw(headers_dict: None) -> None: ...
+
+
 def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[bytes]:
     r"""
     Returns a raw HTTP headers representation of headers
@@ -85,7 +111,7 @@ def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[by
 
 
 def basic_auth_header(
-    username: AnyStr, password: AnyStr, encoding: str = "ISO-8859-1"
+    username: StrOrBytes, password: StrOrBytes, encoding: str = "ISO-8859-1"
 ) -> bytes:
     """
     Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_

diff --git a/w3lib/url.py b/w3lib/url.py
@@ -19,6 +19,7 @@
     Tuple,
     Union,
     cast,
+    overload,
 )
 from urllib.parse import _coerce_args  # type: ignore
 from urllib.parse import (
@@ -221,6 +222,24 @@ def is_url(text: str) -> bool:
     return text.partition("://")[0] in ("file", "http", "https")
 
 
+@overload
+def url_query_parameter(
+    url: StrOrBytes,
+    parameter: str,
+    default: None = None,
+    keep_blank_values: Union[bool, int] = 0,
+) -> Optional[str]: ...
+
+
+@overload
+def url_query_parameter(
+    url: StrOrBytes,
+    parameter: str,
+    default: str,
+    keep_blank_values: Union[bool, int] = 0,
+) -> str: ...
+
+
 def url_query_parameter(
     url: StrOrBytes,
     parameter: str,