Skip to content

Commit

Permalink
Consistent glob behaviour for "**" patterns (#143)
Browse files Browse the repository at this point in the history
* tests: parametrize glob tests

* upath: fix glob behavior

* upath.core: move fsspec glob check to rglob
  • Loading branch information
ap-- authored Sep 20, 2023
1 parent 2c350bb commit 0102132
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 7 deletions.
36 changes: 32 additions & 4 deletions upath/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@
"UPath",
]

_FSSPEC_HAS_WORKING_GLOB = None


def _check_fsspec_has_working_glob():
global _FSSPEC_HAS_WORKING_GLOB
from fsspec.implementations.memory import MemoryFileSystem

m = type("_M", (MemoryFileSystem,), {"store": {}, "pseudo_dirs": [""]})()
m.touch("a.txt")
m.touch("f/b.txt")
g = _FSSPEC_HAS_WORKING_GLOB = len(m.glob("**/*.txt")) == 2
return g


class _FSSpecAccessor:
__slots__ = ("_fs",)
Expand Down Expand Up @@ -377,14 +390,29 @@ def glob(self: PT, pattern: str) -> Generator[PT, None, None]:
yield self._make_child(name)

def rglob(self: PT, pattern: str) -> Generator[PT, None, None]:
path_pattern = self.joinpath(pattern)
r_path_pattern = self.joinpath("**", pattern)
for p in (path_pattern, r_path_pattern):
for name in self._accessor.glob(self, p):
if _FSSPEC_HAS_WORKING_GLOB is None:
_check_fsspec_has_working_glob()

if _FSSPEC_HAS_WORKING_GLOB:
r_path_pattern = self.joinpath("**", pattern)
for name in self._accessor.glob(self, r_path_pattern):
name = self._sub_path(name)
name = name.split(self._flavour.sep)
yield self._make_child(name)

else:
path_pattern = self.joinpath(pattern)
r_path_pattern = self.joinpath("**", pattern)
seen = set()
for p in (path_pattern, r_path_pattern):
for name in self._accessor.glob(self, p):
name = self._sub_path(name)
name = name.split(self._flavour.sep)
pth = self._make_child(name)
if pth.parts not in seen:
yield pth
seen.add(pth.parts)

def _sub_path(self, name):
# only want the path name with iterdir
sp = self._path
Expand Down
21 changes: 18 additions & 3 deletions upath/tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from pathlib import Path

import pytest
from fsspec import __version__ as fsspec_version
from fsspec import filesystem
from packaging.version import Version

from upath import UPath

Expand Down Expand Up @@ -41,9 +43,22 @@ def test_expanduser(self):
with pytest.raises(NotImplementedError):
self.path.expanduser()

def test_glob(self, pathlib_base):
mock_glob = list(self.path.glob("**.txt"))
path_glob = list(pathlib_base.glob("**/*.txt"))
@pytest.mark.parametrize(
"pattern",
(
"*.txt",
"*",
pytest.param(
"**/*.txt",
marks=pytest.mark.xfail()
if Version(fsspec_version) < Version("2023.9.0")
else (),
),
),
)
def test_glob(self, pathlib_base, pattern):
mock_glob = list(self.path.glob(pattern))
path_glob = list(pathlib_base.glob(pattern))

_mock_start = len(self.path.parts)
mock_glob_normalized = sorted([a.parts[_mock_start:] for a in mock_glob])
Expand Down

0 comments on commit 0102132

Please sign in to comment.