Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DirFileSystem #745

Merged
merged 41 commits into from
Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
31db3d5
Add PrefixFileSystem
lucmos Sep 8, 2021
e42d5a4
Refactor test_directories to remove linux-specific paths
lucmos Sep 22, 2021
d60f884
Fix test_isdir to avoid (failing) folder access in prefix
lucmos Sep 22, 2021
63ef3b6
Assume empty prefix indicates the root
lucmos Sep 22, 2021
f054009
Refactor the `_add_fs_prefix` without using os.path.join
lucmos Sep 22, 2021
f97f9d1
Remove `Union[str, Path]` typing in favor of `str`
lucmos Sep 22, 2021
2a8ffb4
Refactor the `_remove_fs_prefix` without using os.path.relpath
lucmos Sep 22, 2021
507745a
Remove unused import
lucmos Sep 22, 2021
7b938dd
Add prefix on open
lucmos Sep 24, 2021
1685e6b
Add PrefixFileSystem behaviour on open() docstring
lucmos Oct 5, 2021
799da25
Remove checks for Path
lucmos Oct 5, 2021
687d7e8
Add PrefixFileSystem to API docs
lucmos Oct 5, 2021
1b31851
Default to root_marker if prefix is not specified
lucmos Oct 5, 2021
91b09ec
Rename filesystem variable to fs
lucmos Oct 5, 2021
b219d33
Delegate to `fs.makedirs` when overriding `makedirs`
lucmos Oct 5, 2021
24ea21f
Fix `fs.sign` delegation
lucmos Oct 5, 2021
4f78a17
Fix `open` to match parent signature
lucmos Oct 5, 2021
7ee0107
Update fsspec/implementations/prefix.py
lucmos Oct 5, 2021
7d0bbcd
Raise error on `prefix` ill defined
lucmos Oct 6, 2021
72061ba
Remove the `root_marker` not `sep` from user-paths to force them to b…
lucmos Oct 6, 2021
573a41b
Add prefix sanity checks, remove trailing sep
lucmos Oct 6, 2021
5f540b2
Use `sep` and `root_marker` of the wrapped fs
lucmos Oct 6, 2021
b62589f
Refactor utility functions and add tests
lucmos Oct 6, 2021
5c4c7cf
Add test for prefix root
lucmos Oct 6, 2021
52c1482
Fix comment
lucmos Oct 6, 2021
dc2d378
Fix docstring
lucmos Oct 6, 2021
bc2d5bc
Fix pytest parametrization
lucmos Oct 6, 2021
5be29bb
Add tests for current ls behaviour
lucmos Oct 6, 2021
9fc0545
prefixfs: make helpers private
efiop Nov 27, 2021
4d6a788
tests: prefixfs: no root_marker on windows
efiop Nov 28, 2021
b3b80f8
prefixfs: remove incomplete/broken type annotations
efiop Nov 28, 2021
2e85d5d
prefixfs: note that it is experimental
efiop Nov 28, 2021
7b81d65
prefixfs: don't handle protocol
efiop Nov 28, 2021
ac4ffd1
prefixfs: remove unused PrefixBufferedFile
efiop Nov 28, 2021
b1590d4
prefixfs: preserve method signature
efiop Nov 28, 2021
99b0452
prefixfs: open: remove redundant docstring
efiop Nov 28, 2021
15c927c
tests: prefixfs: use posixpath
efiop Nov 28, 2021
c49b112
tests: prefix: remove redundant import
efiop Nov 29, 2021
9398f1f
prefixfs -> dirfs
efiop Dec 16, 2021
47dd357
remove tests
efiop Dec 19, 2021
8b6512c
tests: dirfs: add unit tests
efiop Jan 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/environment-win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ dependencies:
- pyftpdlib
- cloudpickle
- pytest
- pytest-asyncio
- pytest-benchmark
- pytest-cov
- pytest-mock
- pytest-vcr
- python-libarchive-c
- numpy
Expand Down
4 changes: 4 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Built-in Implementations
fsspec.implementations.libarchive.LibArchiveFileSystem
fsspec.implementations.dbfs.DatabricksFileSystem
fsspec.implementations.reference.ReferenceFileSystem
fsspec.implementations.dirfs.DirFileSystem

.. autoclass:: fsspec.implementations.ftp.FTPFileSystem
:members: __init__
Expand Down Expand Up @@ -183,6 +184,9 @@ Built-in Implementations
.. autoclass:: fsspec.implementations.reference.ReferenceFileSystem
:members: __init__

.. autoclass:: fsspec.implementations.dirfs.DirFileSystem
:members: __init__

Other Known Implementations
---------------------------

Expand Down
Binary file added fsspec/implementations/.dirfs.py.swp
Binary file not shown.
325 changes: 325 additions & 0 deletions fsspec/implementations/dirfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
from ..asyn import AsyncFileSystem


class DirFileSystem(AsyncFileSystem):
def __init__(self, path, fs, *args, **storage_options):
"""
Parameters
----------
path: str
Path to the directory.
fs: AbstractFileSystem
An instantiated filesystem to wrap.
"""
super().__init__(*args, **storage_options)

if self.asynchronous and not fs.async_impl:
raise ValueError("can't use asynchronous with non-async fs")

if fs.async_impl and self.asynchronous != fs.asynchronous:
raise ValueError("both dirfs and fs should be in the same sync/async mode")

self.path = fs._strip_protocol(path)
self.fs = fs

def _join(self, path):
if isinstance(path, str):
if not self.path:
return path
if not path:
return self.path
return self.fs.sep.join((self.path, path))
return [self._join(_path) for _path in path]

def _relpath(self, path):
if isinstance(path, str):
if not self.path:
return path
if path == self.path:
return ""
prefix = self.path + self.fs.sep
assert path.startswith(prefix)
return path[len(prefix) :]
return [self._relpath(_path) for _path in path]

# Wrappers below

@property
def sep(self):
return self.fs.sep

async def set_session(self, *args, **kwargs):
return await self.fs.set_session(*args, **kwargs)

async def _rm_file(self, path, **kwargs):
return await self.fs._rm_file(self._join(path), **kwargs)

def rm_file(self, path, **kwargs):
return self.fs.rm_file(self._join(path), **kwargs)

async def _rm(self, path, *args, **kwargs):
return await self.fs._rm(self._join(path), *args, **kwargs)

def rm(self, path, *args, **kwargs):
return self.fs.rm(self._join(path), *args, **kwargs)

async def _cp_file(self, path1, path2, **kwargs):
return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs)

def cp_file(self, path1, path2, **kwargs):
return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs)

async def _copy(
self,
path1,
path2,
*args,
**kwargs,
):
return await self.fs._copy(
self._join(path1),
self._join(path2),
*args,
**kwargs,
)

def copy(self, path1, path2, *args, **kwargs):
return self.fs.copy(
self._join(path1),
self._join(path2),
*args,
**kwargs,
)

async def _pipe(self, path, *args, **kwargs):
return await self.fs._pipe(self._join(path), *args, **kwargs)

def pipe(self, path, *args, **kwargs):
return self.fs.pipe(self._join(path), *args, **kwargs)

async def _cat_file(self, path, *args, **kwargs):
return await self.fs._cat_file(self._join(path), *args, **kwargs)

def cat_file(self, path, *args, **kwargs):
return self.fs.cat_file(self._join(path), *args, **kwargs)

async def _cat(self, path, *args, **kwargs):
ret = await self.fs._cat(
self._join(path),
*args,
**kwargs,
)

if isinstance(ret, dict):
return {self._relpath(key): value for key, value in ret.items()}

return ret

def cat(self, path, *args, **kwargs):
ret = self.fs.cat(
self._join(path),
*args,
**kwargs,
)

if isinstance(ret, dict):
return {self._relpath(key): value for key, value in ret.items()}

return ret

async def _put_file(self, lpath, rpath, **kwargs):
return await self.fs._put_file(lpath, self._join(rpath), **kwargs)

def put_file(self, lpath, rpath, **kwargs):
return self.fs.put_file(lpath, self._join(rpath), **kwargs)

async def _put(
self,
lpath,
rpath,
*args,
**kwargs,
):
return await self.fs._put(
lpath,
self._join(rpath),
*args,
**kwargs,
)

def put(self, lpath, rpath, *args, **kwargs):
return self.fs.put(
lpath,
self._join(rpath),
*args,
**kwargs,
)

async def _get_file(self, rpath, lpath, **kwargs):
return await self.fs._get_file(self._join(rpath), lpath, **kwargs)

def get_file(self, rpath, lpath, **kwargs):
return self.fs.get_file(self._join(rpath), lpath, **kwargs)

async def _get(self, rpath, *args, **kwargs):
return await self.fs._get(self._join(rpath), *args, **kwargs)

def get(self, rpath, *args, **kwargs):
return self.fs.get(self._join(rpath), *args, **kwargs)

async def _isfile(self, path):
return await self.fs._isfile(self._join(path))

def isfile(self, path):
return self.fs.isfile(self._join(path))

async def _isdir(self, path):
return await self.fs._isdir(self._join(path))

def isdir(self, path):
return self.fs.isdir(self._join(path))

async def _size(self, path):
return await self.fs._size(self._join(path))

def size(self, path):
return self.fs.size(self._join(path))

async def _exists(self, path):
return await self.fs._exists(self._join(path))

def exists(self, path):
return self.fs.exists(self._join(path))

async def _info(self, path, **kwargs):
return await self.fs._info(self._join(path), **kwargs)

def info(self, path, **kwargs):
return self.fs.info(self._join(path), **kwargs)

async def _ls(self, path, detail=True, **kwargs):
ret = await self.fs._ls(self._join(path), detail=detail, **kwargs)
if detail:
for entry in ret:
entry["name"] = self._relpath(entry["name"])
return ret

return self._relpath(ret)

def ls(self, path, detail=True, **kwargs):
ret = self.fs.ls(self._join(path), detail=detail, **kwargs)
if detail:
for entry in ret:
entry["name"] = self._relpath(entry["name"])
return ret

return self._relpath(ret)

async def _walk(self, path, *args, **kwargs):
async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs):
yield self._relpath(root), dirs, files

def walk(self, path, *args, **kwargs):
for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs):
yield self._relpath(root), dirs, files

async def _glob(self, path, **kwargs):
detail = kwargs.get("detail", False)
ret = await self.fs._glob(self._join(path), **kwargs)
if detail:
return {self._relpath(path): info for path, info in ret.items()}
return self._relpath(ret)

def glob(self, path, **kwargs):
detail = kwargs.get("detail", False)
ret = self.fs.glob(self._join(path), **kwargs)
if detail:
return {self._relpath(path): info for path, info in ret.items()}
return self._relpath(ret)

async def _du(self, path, *args, **kwargs):
total = kwargs.get("total", True)
ret = await self.fs._du(self._join(path), *args, **kwargs)
if total:
return ret

return {self._relpath(path): size for path, size in ret.items()}

def du(self, path, *args, **kwargs):
total = kwargs.get("total", True)
ret = self.fs.du(self._join(path), *args, **kwargs)
if total:
return ret

return {self._relpath(path): size for path, size in ret.items()}

async def _find(self, path, *args, **kwargs):
detail = kwargs.get("detail", False)
ret = await self.fs._find(self._join(path), *args, **kwargs)
if detail:
return {self._relpath(path): info for path, info in ret.items()}
return self._relpath(ret)

def find(self, path, *args, **kwargs):
detail = kwargs.get("detail", False)
ret = self.fs.find(self._join(path), *args, **kwargs)
if detail:
return {self._relpath(path): info for path, info in ret.items()}
return self._relpath(ret)

async def _expand_path(self, path, *args, **kwargs):
return self._relpath(
await self.fs._expand_path(self._join(path), *args, **kwargs)
)

def expand_path(self, path, *args, **kwargs):
return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs))

async def _mkdir(self, path, *args, **kwargs):
return await self.fs._mkdir(self._join(path), *args, **kwargs)

def mkdir(self, path, *args, **kwargs):
return self.fs.mkdir(self._join(path), *args, **kwargs)

async def _makedirs(self, path, *args, **kwargs):
return await self.fs._makedirs(self._join(path), *args, **kwargs)

def makedirs(self, path, *args, **kwargs):
return self.fs.makedirs(self._join(path), *args, **kwargs)

def rmdir(self, path):
return self.fs.rmdir(self._join(path))

def mv_file(self, path1, path2, **kwargs):
return self.fs.mv_file(
self._join(path1),
self._join(path2),
**kwargs,
)

def touch(self, path, **kwargs):
return self.fs.touch(self._join(path), **kwargs)

def created(self, path):
return self.fs.created(self._join(path))

def modified(self, path):
return self.fs.modified(self._join(path))

def sign(self, path, *args, **kwargs):
return self.fs.sign(self._join(path), *args, **kwargs)

def __repr__(self):
return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})"

def open(
self,
path,
*args,
**kwargs,
):
return self.fs.open(
self._join(path),
*args,
**kwargs,
)
Loading