diff --git a/upath/core312plus.py b/upath/core312plus.py index 828052a7..c477238c 100644 --- a/upath/core312plus.py +++ b/upath/core312plus.py @@ -131,6 +131,7 @@ class UPath(Path): _fs_cached: AbstractFileSystem pathmod = _flavour = fsspecpathmod + _supports_empty_parts = False def __new__( cls, *args, protocol: str | None = None, **storage_options: Any @@ -274,6 +275,20 @@ def with_segments(self, *pathsegments): **self._storage_options, ) + @classmethod + def _parse_path(cls, path): + if cls._supports_empty_parts: + drv, root, rel = cls._flavour.splitroot(path) + if not root: + parsed = [] + else: + parsed = list(map(sys.intern, rel.split(cls._flavour.sep))) + if parsed[-1] == ".": + parsed[-1] = "" + parsed = [x for x in parsed if x != "."] + return drv, root, parsed + return super()._parse_path(path) + def __str__(self): if self._protocol: return f"{self._protocol}://{self.path}" @@ -367,6 +382,10 @@ def open(self, mode="r", buffering=-1, encoding=None, errors=None, newline=None) return self.fs.open(self.path, mode) # fixme def iterdir(self): + if self._supports_empty_parts and self.parts[-1:] == ("",): + base = self.with_segments(self.anchor, *self._tail[:-1]) + else: + base = self for name in self.fs.listdir(self.path): # fsspec returns dictionaries if isinstance(name, dict): @@ -375,8 +394,8 @@ def iterdir(self): # Yielding a path object for these makes little sense continue # only want the path name with iterdir - _, _, name = name.rpartition(self._flavour.sep) - yield self._make_child_relpath(name) + _, _, name = name.removesuffix("/").rpartition(self._flavour.sep) + yield base._make_child_relpath(name) def _scandir(self): # return os.scandir(self) @@ -427,10 +446,13 @@ def resolve(self, strict: bool = False) -> Self: resolved: list[str] = [] resolvable_parts = _parts[1:] - for part in resolvable_parts: + last_idx = len(resolvable_parts) - 1 + for idx, part in enumerate(resolvable_parts): if part == "..": if resolved: resolved.pop() + if self._supports_empty_parts and idx == last_idx: + resolved.append("") elif part != ".": resolved.append(part) diff --git a/upath/implementations/http.py b/upath/implementations/http.py index 9489fb3d..a5ba2013 100644 --- a/upath/implementations/http.py +++ b/upath/implementations/http.py @@ -1,6 +1,5 @@ from __future__ import annotations -import posixpath import sys from urllib.parse import urlunsplit @@ -96,7 +95,10 @@ def path(self) -> str: return urlunsplit(self._url) -if sys.version_info >= (3, 12): +if sys.version_info >= (3, 12): # noqa + from itertools import chain + from urllib.parse import urlsplit + from upath.core312plus import PathOrStr from upath.core312plus import fsspecpathmod from upath.core312plus import strip_upath_protocol @@ -107,17 +109,102 @@ class httppathmod(fsspecpathmod): @staticmethod def join(__path: PathOrStr, *paths: PathOrStr) -> str: - return posixpath.join(*map(strip_upath_protocol, [__path, *paths])) + path = strip_upath_protocol(__path).removesuffix("/") + paths = map(strip_upath_protocol, paths) + sep = httppathmod.sep + for b in paths: + if b.startswith(sep): + path = b + elif not path: + path += b + else: + path += sep + b + return path @staticmethod def splitroot(__path: PathOrStr) -> tuple[str, str, str]: - path = strip_upath_protocol(__path) - return posixpath.splitroot(path) + # path = strip_upath_protocol(__path) + url = urlsplit(__path) + drive = urlunsplit(url._replace(path="", query="", fragment="")) + path = urlunsplit(url._replace(scheme="", netloc="")) + root = "/" if path.startswith("/") else "" + return drive, root, path.removeprefix("/") @staticmethod def splitdrive(__path: PathOrStr) -> tuple[str, str]: path = strip_upath_protocol(__path) - return posixpath.splitdrive(path) - - class HTTPPath(upath.core.UPath): # noqa - pathmod = httppathmod + url = urlsplit(path) + path = urlunsplit(url._replace(scheme="", netloc="")) + drive = urlunsplit(url._replace(path="", query="", fragment="")) + return drive, path + + class HTTPPath(upath.core312plus.UPath): # noqa + pathmod = _flavour = httppathmod + _supports_empty_parts = True + + @property + def root(self) -> str: + return super().root or "/" + + def __str__(self): + return super(upath.core312plus.UPath, self).__str__() + + def is_file(self): + try: + next(super().iterdir()) + except (StopIteration, NotADirectoryError): + return True + except FileNotFoundError: + return False + else: + return False + + def is_dir(self): + try: + next(super().iterdir()) + except (StopIteration, NotADirectoryError): + return False + except FileNotFoundError: + return False + else: + return True + + def iterdir(self): + it = iter(super().iterdir()) + try: + item0 = next(it) + except (StopIteration, NotADirectoryError): + raise NotADirectoryError(str(self)) + except FileNotFoundError: + raise FileNotFoundError(str(self)) + else: + yield from chain([item0], it) + + def resolve( + self: HTTPPath, + strict: bool = False, + follow_redirects: bool = True, + ) -> HTTPPath: + """Normalize the path and resolve redirects.""" + # Normalise the path + resolved_path = super().resolve(strict=strict) + + if follow_redirects: + # Get the fsspec fs + fs = self.fs + url = str(self) + # Ensure we have a session + session = sync(fs.loop, fs.set_session) + # Use HEAD requests if the server allows it, falling back to GETs + for method in (session.head, session.get): + r = sync(fs.loop, method, url, allow_redirects=True) + try: + r.raise_for_status() + except Exception as exc: + if method == session.get: + raise FileNotFoundError(self) from exc + else: + resolved_path = HTTPPath(str(r.url)) + break + + return resolved_path