diff --git a/ci/environment-typecheck.yml b/ci/environment-typecheck.yml index 1b7c482f5..688fe9cc3 100644 --- a/ci/environment-typecheck.yml +++ b/ci/environment-typecheck.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - mypy=1.4.1 + - mypy=1.6.1 - pyarrow - python=3.8 - pip diff --git a/fsspec/dircache.py b/fsspec/dircache.py index eca19566b..33ecf44b7 100644 --- a/fsspec/dircache.py +++ b/fsspec/dircache.py @@ -1,9 +1,15 @@ +from __future__ import annotations + import time -from collections.abc import MutableMapping from functools import lru_cache +from typing import Any, Iterator, MutableMapping, TypedDict, TypeAlias, TYPE_CHECKING + +if TYPE_CHECKING: + # TODO: consider a more-precise type using TypedDict + DirEntry: TypeAlias = dict[str, Any] -class DirCache(MutableMapping): +class DirCache(MutableMapping[str, list[DirEntry]]): """ Caching of directory listings, in a structure like:: @@ -26,10 +32,10 @@ class DirCache(MutableMapping): def __init__( self, - use_listings_cache=True, - listings_expiry_time=None, - max_paths=None, - **kwargs, + use_listings_cache: bool = True, + listings_expiry_time: float | None = None, + max_paths: int | None = None, + **kwargs: Any, ): """ @@ -45,15 +51,15 @@ def __init__( The number of most recent listings that are considered valid; 'recent' refers to when the entry was set. """ - self._cache = {} - self._times = {} + self._cache: dict[str, list[DirEntry]] = {} + self._times: dict[str, float] = {} if max_paths: self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None)) self.use_listings_cache = use_listings_cache self.listings_expiry_time = listings_expiry_time self.max_paths = max_paths - def __getitem__(self, item): + def __getitem__(self, item: str) -> list[DirEntry]: if self.listings_expiry_time is not None: if self._times.get(item, 0) - time.time() < -self.listings_expiry_time: del self._cache[item] @@ -61,20 +67,20 @@ def __getitem__(self, item): self._q(item) return self._cache[item] # maybe raises KeyError - def clear(self): + def clear(self) -> None: self._cache.clear() - def __len__(self): + def __len__(self) -> int: return len(self._cache) - def __contains__(self, item): + def __contains__(self, item: object) -> bool: try: - self[item] + self[item] # type: ignore[index] return True except KeyError: return False - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: List[DirEntry]) -> None: if not self.use_listings_cache: return if self.max_paths: @@ -83,10 +89,10 @@ def __setitem__(self, key, value): if self.listings_expiry_time is not None: self._times[key] = time.time() - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: del self._cache[key] - def __iter__(self): + def __iter__(self) -> Iterator[str]: entries = list(self._cache) return (k for k in entries if k in self) diff --git a/fsspec/mapping.py b/fsspec/mapping.py index b9822ae17..a8bbaa7e4 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -1,13 +1,29 @@ +from __future__ import annotations + import array import posixpath import warnings -from collections.abc import MutableMapping from functools import cached_property +from typing import ( + TYPE_CHECKING, + Iterable, + Iterator, + Literal, + MutableMapping, + TypeVar, + overload, +) from .core import url_to_fs +if TYPE_CHECKING: + from .spec import AbstractFileSystem + from .implementations.dirfs import DirFileSystem + + T = TypeVar("T") + -class FSMap(MutableMapping): +class FSMap(MutableMapping[str, bytes]): """Wrap a FileSystem instance as a mutable wrapping. The keys of the mapping become files under the given root, and the @@ -35,7 +51,14 @@ class FSMap(MutableMapping): b'Hello World' """ - def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): + def __init__( + self, + root: str, + fs: AbstractFileSystem, + check: bool = False, + create: bool = False, + missing_exceptions: tuple[type[Exception], ...] | None = None, + ): self.fs = fs self.root = fs._strip_protocol(root).rstrip("/") self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] @@ -61,13 +84,13 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None) self.fs.rm(root + "/a") @cached_property - def dirfs(self): + def dirfs(self) -> DirFileSystem: """dirfs instance that can be used with the same keys as the mapper""" from .implementations.dirfs import DirFileSystem return DirFileSystem(path=self._root_key_to_str, fs=self.fs) - def clear(self): + def clear(self) -> None: """Remove all keys below root - empties out mapping""" try: self.fs.rm(self.root, True) @@ -75,14 +98,30 @@ def clear(self): except: # noqa: E722 pass - def getitems(self, keys, on_error="raise"): + @overload + def getitems( + self, keys: Iterable[str], on_error: Literal["raise", "omit"] = ... + ) -> dict[str, bytes]: + pass + + @overload + def getitems( + self, keys: Iterable[str], on_error: Literal["return"] + ) -> dict[str, bytes | Exception]: + pass + + def getitems( + self, + keys: Iterable[str], + on_error: Literal["raise", "omit", "return"] = "raise", + ) -> dict[str, bytes | Exception] | dict[str, bytes]: """Fetch multiple items from the store If the backend is async-able, this might proceed concurrently Parameters ---------- - keys: list(str) + keys: iterable(str) They keys to be fetched on_error : "raise", "omit", "return" If raise, an underlying exception will be raised (converted to KeyError @@ -113,7 +152,7 @@ def getitems(self, keys, on_error="raise"): if on_error == "return" or not isinstance(out[k2], BaseException) } - def setitems(self, values_dict): + def setitems(self, values_dict: dict[str, bytes]) -> None: """Set the values of multiple items in the store Parameters @@ -123,11 +162,11 @@ def setitems(self, values_dict): values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} self.fs.pipe(values) - def delitems(self, keys): + def delitems(self, keys: Iterable[str]) -> None: """Remove multiple keys from the store""" self.fs.rm([self._key_to_str(k) for k in keys]) - def _key_to_str(self, key): + def _key_to_str(self, key: str) -> str: """Generate full path for the key""" if not isinstance(key, str): # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" @@ -140,11 +179,11 @@ def _key_to_str(self, key): key = str(key) return f"{self._root_key_to_str}{key}" - def _str_to_key(self, s): + def _str_to_key(self, s: str) -> str: """Strip path of to leave key name""" return s[len(self.root) :].lstrip("/") - def __getitem__(self, key, default=None): + def __getitem__(self, key: str, default: bytes | None = None) -> bytes: """Retrieve data""" k = self._key_to_str(key) try: @@ -155,7 +194,7 @@ def __getitem__(self, key, default=None): raise KeyError(key) return result - def pop(self, key, default=None): + def pop(self, key: str, default: bytes | None = None) -> bytes: # type: ignore[override] """Pop data""" result = self.__getitem__(key, default) try: @@ -164,26 +203,26 @@ def pop(self, key, default=None): pass return result - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: bytes) -> None: """Store value in key""" key = self._key_to_str(key) self.fs.mkdirs(self.fs._parent(key), exist_ok=True) self.fs.pipe_file(key, maybe_convert(value)) - def __iter__(self): + def __iter__(self) -> Iterator[str]: return (self._str_to_key(x) for x in self.fs.find(self.root)) - def __len__(self): + def __len__(self) -> int: return len(self.fs.find(self.root)) - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: """Remove key""" try: self.fs.rm(self._key_to_str(key)) except: # noqa: E722 raise KeyError - def __contains__(self, key): + def __contains__(self, key: str) -> bool: # type: ignore[override] """Does key exist in mapping?""" path = self._key_to_str(key) return self.fs.exists(path) and self.fs.isfile(path) @@ -204,13 +243,13 @@ def maybe_convert(value): def get_mapper( - url="", - check=False, - create=False, - missing_exceptions=None, - alternate_root=None, + url: str = "", + check: bool = False, + create: bool = False, + missing_exceptions: tuple[type[Exception], ...] | None = None, + alternate_root: str | None = None, **kwargs, -): +) -> FSMap: """Create key-value interface for given URL and options The URL will be of the form "protocol://location" and point to the root diff --git a/setup.cfg b/setup.cfg index 67467cd20..a8c2e7503 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,5 +57,11 @@ exclude = (test.*|conftest)\.py$ [mypy-fsspec.caching] check_untyped_defs = True +[mypy-fsspec.dircache] +check_untyped_defs = True + +[mypy-fsspec.mapping] +check_untyped_defs = True + [mypy-fsspec.utils] check_untyped_defs = True