Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add typehints to fsspec.mapping and fsspec.dircache #1396

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/environment-typecheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: test_env
channels:
- conda-forge
dependencies:
- mypy=1.4.1
- mypy=1.6.1
- pyarrow
- python=3.8
- pip
Expand Down
38 changes: 22 additions & 16 deletions fsspec/dircache.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from __future__ import annotations

import time
from collections.abc import MutableMapping
from functools import lru_cache
from typing import Any, Iterator, MutableMapping, TypedDict, TypeAlias, TYPE_CHECKING

if TYPE_CHECKING:
# TODO: consider a more-precise type using TypedDict
DirEntry: TypeAlias = dict[str, Any]


class DirCache(MutableMapping):
class DirCache(MutableMapping[str, list[DirEntry]]):
"""
Caching of directory listings, in a structure like::

Expand All @@ -26,10 +32,10 @@ class DirCache(MutableMapping):

def __init__(
self,
use_listings_cache=True,
listings_expiry_time=None,
max_paths=None,
**kwargs,
use_listings_cache: bool = True,
listings_expiry_time: float | None = None,
max_paths: int | None = None,
**kwargs: Any,
):
"""

Expand All @@ -45,36 +51,36 @@ def __init__(
The number of most recent listings that are considered valid; 'recent'
refers to when the entry was set.
"""
self._cache = {}
self._times = {}
self._cache: dict[str, list[DirEntry]] = {}
self._times: dict[str, float] = {}
if max_paths:
self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
self.use_listings_cache = use_listings_cache
self.listings_expiry_time = listings_expiry_time
self.max_paths = max_paths

def __getitem__(self, item):
def __getitem__(self, item: str) -> list[DirEntry]:
if self.listings_expiry_time is not None:
if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
del self._cache[item]
if self.max_paths:
self._q(item)
return self._cache[item] # maybe raises KeyError

def clear(self):
def clear(self) -> None:
self._cache.clear()

def __len__(self):
def __len__(self) -> int:
return len(self._cache)

def __contains__(self, item):
def __contains__(self, item: object) -> bool:
try:
self[item]
self[item] # type: ignore[index]
Comment on lines +76 to +78
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little unfortunate -- the MutableMappign typehints say that __contrains__ should take all objects, not just the key type, hence the type ignore here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hate this kind of inconsistency :|

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I don't love it either.. it's not great.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can around this by adding an assert isinstance here.

return True
except KeyError:
return False

def __setitem__(self, key, value):
def __setitem__(self, key: str, value: List[DirEntry]) -> None:
if not self.use_listings_cache:
return
if self.max_paths:
Expand All @@ -83,10 +89,10 @@ def __setitem__(self, key, value):
if self.listings_expiry_time is not None:
self._times[key] = time.time()

def __delitem__(self, key):
def __delitem__(self, key: str) -> None:
del self._cache[key]

def __iter__(self):
def __iter__(self) -> Iterator[str]:
entries = list(self._cache)

return (k for k in entries if k in self)
Expand Down
87 changes: 63 additions & 24 deletions fsspec/mapping.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
from __future__ import annotations

import array
import posixpath
import warnings
from collections.abc import MutableMapping
from functools import cached_property
from typing import (
TYPE_CHECKING,
Iterable,
Iterator,
Literal,
MutableMapping,
TypeVar,
overload,
)

from .core import url_to_fs

if TYPE_CHECKING:
from .spec import AbstractFileSystem
from .implementations.dirfs import DirFileSystem

T = TypeVar("T")


class FSMap(MutableMapping):
class FSMap(MutableMapping[str, bytes]):
"""Wrap a FileSystem instance as a mutable wrapping.

The keys of the mapping become files under the given root, and the
Expand Down Expand Up @@ -35,7 +51,14 @@ class FSMap(MutableMapping):
b'Hello World'
"""

def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
def __init__(
self,
root: str,
fs: AbstractFileSystem,
check: bool = False,
create: bool = False,
missing_exceptions: tuple[type[Exception], ...] | None = None,
):
self.fs = fs
self.root = fs._strip_protocol(root).rstrip("/")
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
Expand All @@ -61,28 +84,44 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None)
self.fs.rm(root + "/a")

@cached_property
def dirfs(self):
def dirfs(self) -> DirFileSystem:
"""dirfs instance that can be used with the same keys as the mapper"""
from .implementations.dirfs import DirFileSystem

return DirFileSystem(path=self._root_key_to_str, fs=self.fs)

def clear(self):
def clear(self) -> None:
"""Remove all keys below root - empties out mapping"""
try:
self.fs.rm(self.root, True)
self.fs.mkdir(self.root)
except: # noqa: E722
pass

def getitems(self, keys, on_error="raise"):
@overload
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit of ugliness to make the output type of getitems dynamic based on on_error

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not just use str and ignore the overload? I don't think this kind of construct is worth it.

def getitems(
self, keys: Iterable[str], on_error: Literal["raise", "omit"] = ...
) -> dict[str, bytes]:
pass

@overload
def getitems(
self, keys: Iterable[str], on_error: Literal["return"]
) -> dict[str, bytes | Exception]:
pass

def getitems(
self,
keys: Iterable[str],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could keep this as a list if we want to add that restriction.

on_error: Literal["raise", "omit", "return"] = "raise",
) -> dict[str, bytes | Exception] | dict[str, bytes]:
"""Fetch multiple items from the store

If the backend is async-able, this might proceed concurrently

Parameters
----------
keys: list(str)
keys: iterable(str)
They keys to be fetched
on_error : "raise", "omit", "return"
If raise, an underlying exception will be raised (converted to KeyError
Expand Down Expand Up @@ -113,7 +152,7 @@ def getitems(self, keys, on_error="raise"):
if on_error == "return" or not isinstance(out[k2], BaseException)
}

def setitems(self, values_dict):
def setitems(self, values_dict: dict[str, bytes]) -> None:
"""Set the values of multiple items in the store

Parameters
Expand All @@ -123,11 +162,11 @@ def setitems(self, values_dict):
values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
self.fs.pipe(values)

def delitems(self, keys):
def delitems(self, keys: Iterable[str]) -> None:
"""Remove multiple keys from the store"""
self.fs.rm([self._key_to_str(k) for k in keys])

def _key_to_str(self, key):
def _key_to_str(self, key: str) -> str:
"""Generate full path for the key"""
if not isinstance(key, str):
# raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
Expand All @@ -140,11 +179,11 @@ def _key_to_str(self, key):
key = str(key)
return f"{self._root_key_to_str}{key}"

def _str_to_key(self, s):
def _str_to_key(self, s: str) -> str:
"""Strip path of to leave key name"""
return s[len(self.root) :].lstrip("/")

def __getitem__(self, key, default=None):
def __getitem__(self, key: str, default: bytes | None = None) -> bytes:
"""Retrieve data"""
k = self._key_to_str(key)
try:
Expand All @@ -155,7 +194,7 @@ def __getitem__(self, key, default=None):
raise KeyError(key)
return result

def pop(self, key, default=None):
def pop(self, key: str, default: bytes | None = None) -> bytes: # type: ignore[override]
"""Pop data"""
result = self.__getitem__(key, default)
try:
Expand All @@ -164,26 +203,26 @@ def pop(self, key, default=None):
pass
return result

def __setitem__(self, key, value):
def __setitem__(self, key: str, value: bytes) -> None:
"""Store value in key"""
key = self._key_to_str(key)
self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
self.fs.pipe_file(key, maybe_convert(value))

def __iter__(self):
def __iter__(self) -> Iterator[str]:
return (self._str_to_key(x) for x in self.fs.find(self.root))

def __len__(self):
def __len__(self) -> int:
return len(self.fs.find(self.root))

def __delitem__(self, key):
def __delitem__(self, key: str) -> None:
"""Remove key"""
try:
self.fs.rm(self._key_to_str(key))
except: # noqa: E722
raise KeyError

def __contains__(self, key):
def __contains__(self, key: str) -> bool: # type: ignore[override]
"""Does key exist in mapping?"""
path = self._key_to_str(key)
return self.fs.exists(path) and self.fs.isfile(path)
Expand All @@ -204,13 +243,13 @@ def maybe_convert(value):


def get_mapper(
url="",
check=False,
create=False,
missing_exceptions=None,
alternate_root=None,
url: str = "",
check: bool = False,
create: bool = False,
missing_exceptions: tuple[type[Exception], ...] | None = None,
alternate_root: str | None = None,
**kwargs,
):
) -> FSMap:
"""Create key-value interface for given URL and options

The URL will be of the form "protocol://location" and point to the root
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,11 @@ exclude = (test.*|conftest)\.py$
[mypy-fsspec.caching]
check_untyped_defs = True

[mypy-fsspec.dircache]
check_untyped_defs = True

[mypy-fsspec.mapping]
check_untyped_defs = True

[mypy-fsspec.utils]
check_untyped_defs = True
Loading