From 178c7ce23e7865af8bcb468e6c1f419ccfaa80a5 Mon Sep 17 00:00:00 2001 From: Alan Du Date: Fri, 20 Oct 2023 13:11:16 -0400 Subject: [PATCH 1/5] Update mypy to 1.6.1 --- ci/environment-typecheck.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/environment-typecheck.yml b/ci/environment-typecheck.yml index 1b7c482f5..688fe9cc3 100644 --- a/ci/environment-typecheck.yml +++ b/ci/environment-typecheck.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - mypy=1.4.1 + - mypy=1.6.1 - pyarrow - python=3.8 - pip From c5a0d8c9314cfa384c5d9927283c5bc47e0cac74 Mon Sep 17 00:00:00 2001 From: Alan Du Date: Fri, 20 Oct 2023 13:27:29 -0400 Subject: [PATCH 2/5] Add typehints for fsspec.mapping --- fsspec/mapping.py | 87 ++++++++++++++++++++++++++++++++++------------- setup.cfg | 3 ++ 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/fsspec/mapping.py b/fsspec/mapping.py index b9822ae17..be9346f3a 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -1,13 +1,29 @@ +from __future__ import annotations +from typing import ( + TYPE_CHECKING, + MutableMapping, + Literal, + Iterable, + Iterator, + overload, + TypeVar, +) + import array import posixpath import warnings -from collections.abc import MutableMapping from functools import cached_property from .core import url_to_fs +if TYPE_CHECKING: + from .spec import AbstractFileSystem + from .implementations.dirfs import DirFileSystem + + T = TypeVar("T") + -class FSMap(MutableMapping): +class FSMap(MutableMapping[str, bytes]): """Wrap a FileSystem instance as a mutable wrapping. The keys of the mapping become files under the given root, and the @@ -35,7 +51,14 @@ class FSMap(MutableMapping): b'Hello World' """ - def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): + def __init__( + self, + root: str, + fs: AbstractFileSystem, + check: bool = False, + create: bool = False, + missing_exceptions: tuple[type[Exception], ...] | None = None, + ): self.fs = fs self.root = fs._strip_protocol(root).rstrip("/") self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] @@ -61,13 +84,13 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None) self.fs.rm(root + "/a") @cached_property - def dirfs(self): + def dirfs(self) -> DirFileSystem: """dirfs instance that can be used with the same keys as the mapper""" from .implementations.dirfs import DirFileSystem return DirFileSystem(path=self._root_key_to_str, fs=self.fs) - def clear(self): + def clear(self) -> None: """Remove all keys below root - empties out mapping""" try: self.fs.rm(self.root, True) @@ -75,14 +98,30 @@ def clear(self): except: # noqa: E722 pass - def getitems(self, keys, on_error="raise"): + @overload + def getitems( + self, keys: Iterable[str], on_error: Literal["raise", "omit"] = ... + ) -> dict[str, bytes]: + pass + + @overload + def getitems( + self, keys: Iterable[str], on_error: Literal["return"] + ) -> dict[str, bytes | Exception]: + pass + + def getitems( + self, + keys: Iterable[str], + on_error: Literal["raise", "omit", "return"] = "raise", + ) -> dict[str, bytes | Exception] | dict[str, bytes]: """Fetch multiple items from the store If the backend is async-able, this might proceed concurrently Parameters ---------- - keys: list(str) + keys: iterable(str) They keys to be fetched on_error : "raise", "omit", "return" If raise, an underlying exception will be raised (converted to KeyError @@ -113,7 +152,7 @@ def getitems(self, keys, on_error="raise"): if on_error == "return" or not isinstance(out[k2], BaseException) } - def setitems(self, values_dict): + def setitems(self, values_dict: dict[str, bytes]) -> None: """Set the values of multiple items in the store Parameters @@ -123,11 +162,11 @@ def setitems(self, values_dict): values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} self.fs.pipe(values) - def delitems(self, keys): + def delitems(self, keys: Iterable[str]) -> None: """Remove multiple keys from the store""" self.fs.rm([self._key_to_str(k) for k in keys]) - def _key_to_str(self, key): + def _key_to_str(self, key: str) -> str: """Generate full path for the key""" if not isinstance(key, str): # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" @@ -140,11 +179,11 @@ def _key_to_str(self, key): key = str(key) return f"{self._root_key_to_str}{key}" - def _str_to_key(self, s): + def _str_to_key(self, s: str) -> str: """Strip path of to leave key name""" return s[len(self.root) :].lstrip("/") - def __getitem__(self, key, default=None): + def __getitem__(self, key: str, default: bytes | None = None) -> bytes: """Retrieve data""" k = self._key_to_str(key) try: @@ -155,7 +194,7 @@ def __getitem__(self, key, default=None): raise KeyError(key) return result - def pop(self, key, default=None): + def pop(self, key: str, default: bytes | None = None) -> bytes: # type: ignore[override] """Pop data""" result = self.__getitem__(key, default) try: @@ -164,26 +203,26 @@ def pop(self, key, default=None): pass return result - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: bytes) -> None: """Store value in key""" key = self._key_to_str(key) self.fs.mkdirs(self.fs._parent(key), exist_ok=True) self.fs.pipe_file(key, maybe_convert(value)) - def __iter__(self): + def __iter__(self) -> Iterator[str]: return (self._str_to_key(x) for x in self.fs.find(self.root)) - def __len__(self): + def __len__(self) -> int: return len(self.fs.find(self.root)) - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: """Remove key""" try: self.fs.rm(self._key_to_str(key)) except: # noqa: E722 raise KeyError - def __contains__(self, key): + def __contains__(self, key: str) -> bool: # type: ignore[override] """Does key exist in mapping?""" path = self._key_to_str(key) return self.fs.exists(path) and self.fs.isfile(path) @@ -204,13 +243,13 @@ def maybe_convert(value): def get_mapper( - url="", - check=False, - create=False, - missing_exceptions=None, - alternate_root=None, + url: str = "", + check: bool = False, + create: bool = False, + missing_exceptions: tuple[type[Exception], ...] | None = None, + alternate_root: str | None = None, **kwargs, -): +) -> FSMap: """Create key-value interface for given URL and options The URL will be of the form "protocol://location" and point to the root diff --git a/setup.cfg b/setup.cfg index 67467cd20..7da0ca87e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,5 +57,8 @@ exclude = (test.*|conftest)\.py$ [mypy-fsspec.caching] check_untyped_defs = True +[mypy-fsspec.mapping] +check_untyped_defs = True + [mypy-fsspec.utils] check_untyped_defs = True From fe3efa8c5e1aee27cd91cd7745f7c24842b813cf Mon Sep 17 00:00:00 2001 From: Alan Du Date: Fri, 20 Oct 2023 16:44:09 -0400 Subject: [PATCH 3/5] Add mypy to dircache --- fsspec/dircache.py | 40 ++++++++++++++++++++++++---------------- setup.cfg | 3 +++ 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/fsspec/dircache.py b/fsspec/dircache.py index eca19566b..90b66f893 100644 --- a/fsspec/dircache.py +++ b/fsspec/dircache.py @@ -1,9 +1,17 @@ +from __future__ import annotations import time -from collections.abc import MutableMapping +from typing import MutableMapping, List, TypedDict, Iterator, Any + from functools import lru_cache -class DirCache(MutableMapping): +class DirEntry(TypedDict): + name: str + size: int + type: str + + +class DirCache(MutableMapping[str, List[DirEntry]]): """ Caching of directory listings, in a structure like:: @@ -26,10 +34,10 @@ class DirCache(MutableMapping): def __init__( self, - use_listings_cache=True, - listings_expiry_time=None, - max_paths=None, - **kwargs, + use_listings_cache: bool = True, + listings_expiry_time: float | None = None, + max_paths: int | None = None, + **kwargs: Any, ): """ @@ -45,15 +53,15 @@ def __init__( The number of most recent listings that are considered valid; 'recent' refers to when the entry was set. """ - self._cache = {} - self._times = {} + self._cache: dict[str, list[DirEntry]] = {} + self._times: dict[str, float] = {} if max_paths: self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None)) self.use_listings_cache = use_listings_cache self.listings_expiry_time = listings_expiry_time self.max_paths = max_paths - def __getitem__(self, item): + def __getitem__(self, item: str) -> list[DirEntry]: if self.listings_expiry_time is not None: if self._times.get(item, 0) - time.time() < -self.listings_expiry_time: del self._cache[item] @@ -61,20 +69,20 @@ def __getitem__(self, item): self._q(item) return self._cache[item] # maybe raises KeyError - def clear(self): + def clear(self) -> None: self._cache.clear() - def __len__(self): + def __len__(self) -> int: return len(self._cache) - def __contains__(self, item): + def __contains__(self, item: object) -> bool: try: - self[item] + self[item] # type: ignore[index] return True except KeyError: return False - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: List[DirEntry]) -> None: if not self.use_listings_cache: return if self.max_paths: @@ -83,10 +91,10 @@ def __setitem__(self, key, value): if self.listings_expiry_time is not None: self._times[key] = time.time() - def __delitem__(self, key): + def __delitem__(self, key) -> None: del self._cache[key] - def __iter__(self): + def __iter__(self) -> Iterator[str]: entries = list(self._cache) return (k for k in entries if k in self) diff --git a/setup.cfg b/setup.cfg index 7da0ca87e..a8c2e7503 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,6 +57,9 @@ exclude = (test.*|conftest)\.py$ [mypy-fsspec.caching] check_untyped_defs = True +[mypy-fsspec.dircache] +check_untyped_defs = True + [mypy-fsspec.mapping] check_untyped_defs = True From 4e66956dcc1be3ee5be67d898fe5a8ca93f27ed4 Mon Sep 17 00:00:00 2001 From: Alan Du Date: Fri, 20 Oct 2023 16:58:23 -0400 Subject: [PATCH 4/5] Fix lint errors --- fsspec/dircache.py | 4 ++-- fsspec/mapping.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fsspec/dircache.py b/fsspec/dircache.py index 90b66f893..ae88464fe 100644 --- a/fsspec/dircache.py +++ b/fsspec/dircache.py @@ -1,8 +1,8 @@ from __future__ import annotations -import time -from typing import MutableMapping, List, TypedDict, Iterator, Any +import time from functools import lru_cache +from typing import Any, Iterator, List, MutableMapping, TypedDict class DirEntry(TypedDict): diff --git a/fsspec/mapping.py b/fsspec/mapping.py index be9346f3a..a8bbaa7e4 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -1,19 +1,19 @@ from __future__ import annotations + +import array +import posixpath +import warnings +from functools import cached_property from typing import ( TYPE_CHECKING, - MutableMapping, - Literal, Iterable, Iterator, - overload, + Literal, + MutableMapping, TypeVar, + overload, ) -import array -import posixpath -import warnings -from functools import cached_property - from .core import url_to_fs if TYPE_CHECKING: @@ -114,7 +114,7 @@ def getitems( self, keys: Iterable[str], on_error: Literal["raise", "omit", "return"] = "raise", - ) -> dict[str, bytes | Exception] | dict[str, bytes]: + ) -> dict[str, bytes | Exception] | dict[str, bytes]: """Fetch multiple items from the store If the backend is async-able, this might proceed concurrently From 50f9c6dbfa325d3fbb7793424c3c5797a06512c4 Mon Sep 17 00:00:00 2001 From: Alan Du Date: Fri, 10 Nov 2023 10:25:26 -0500 Subject: [PATCH 5/5] Respond to code review --- fsspec/dircache.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fsspec/dircache.py b/fsspec/dircache.py index ae88464fe..33ecf44b7 100644 --- a/fsspec/dircache.py +++ b/fsspec/dircache.py @@ -2,16 +2,14 @@ import time from functools import lru_cache -from typing import Any, Iterator, List, MutableMapping, TypedDict +from typing import Any, Iterator, MutableMapping, TypedDict, TypeAlias, TYPE_CHECKING +if TYPE_CHECKING: + # TODO: consider a more-precise type using TypedDict + DirEntry: TypeAlias = dict[str, Any] -class DirEntry(TypedDict): - name: str - size: int - type: str - -class DirCache(MutableMapping[str, List[DirEntry]]): +class DirCache(MutableMapping[str, list[DirEntry]]): """ Caching of directory listings, in a structure like:: @@ -91,7 +89,7 @@ def __setitem__(self, key: str, value: List[DirEntry]) -> None: if self.listings_expiry_time is not None: self._times[key] = time.time() - def __delitem__(self, key) -> None: + def __delitem__(self, key: str) -> None: del self._cache[key] def __iter__(self) -> Iterator[str]: