Skip to content

Commit

Permalink
upath: working http implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ap-- committed Oct 11, 2023
1 parent e9380c3 commit 9549db3
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 12 deletions.
28 changes: 25 additions & 3 deletions upath/core312plus.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class UPath(Path):
_fs_cached: AbstractFileSystem

pathmod = _flavour = fsspecpathmod
_supports_empty_parts = False

def __new__(
cls, *args, protocol: str | None = None, **storage_options: Any
Expand Down Expand Up @@ -274,6 +275,20 @@ def with_segments(self, *pathsegments):
**self._storage_options,
)

@classmethod
def _parse_path(cls, path):
if cls._supports_empty_parts:
drv, root, rel = cls._flavour.splitroot(path)
if not root:
parsed = []
else:
parsed = list(map(sys.intern, rel.split(cls._flavour.sep)))
if parsed[-1] == ".":
parsed[-1] = ""
parsed = [x for x in parsed if x != "."]
return drv, root, parsed
return super()._parse_path(path)

def __str__(self):
if self._protocol:
return f"{self._protocol}://{self.path}"
Expand Down Expand Up @@ -367,6 +382,10 @@ def open(self, mode="r", buffering=-1, encoding=None, errors=None, newline=None)
return self.fs.open(self.path, mode) # fixme

def iterdir(self):
if self._supports_empty_parts and self.parts[-1:] == ("",):
base = self.with_segments(self.anchor, *self._tail[:-1])
else:
base = self
for name in self.fs.listdir(self.path):
# fsspec returns dictionaries
if isinstance(name, dict):
Expand All @@ -375,8 +394,8 @@ def iterdir(self):
# Yielding a path object for these makes little sense
continue
# only want the path name with iterdir
_, _, name = name.rpartition(self._flavour.sep)
yield self._make_child_relpath(name)
_, _, name = name.removesuffix("/").rpartition(self._flavour.sep)
yield base._make_child_relpath(name)

def _scandir(self):
# return os.scandir(self)
Expand Down Expand Up @@ -427,10 +446,13 @@ def resolve(self, strict: bool = False) -> Self:

resolved: list[str] = []
resolvable_parts = _parts[1:]
for part in resolvable_parts:
last_idx = len(resolvable_parts) - 1
for idx, part in enumerate(resolvable_parts):
if part == "..":
if resolved:
resolved.pop()
if self._supports_empty_parts and idx == last_idx:
resolved.append("")
elif part != ".":
resolved.append(part)

Expand Down
105 changes: 96 additions & 9 deletions upath/implementations/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import posixpath
import sys
from urllib.parse import urlunsplit

Expand Down Expand Up @@ -96,7 +95,10 @@ def path(self) -> str:
return urlunsplit(self._url)


if sys.version_info >= (3, 12):
if sys.version_info >= (3, 12): # noqa
from itertools import chain
from urllib.parse import urlsplit

from upath.core312plus import PathOrStr
from upath.core312plus import fsspecpathmod
from upath.core312plus import strip_upath_protocol
Expand All @@ -107,17 +109,102 @@ class httppathmod(fsspecpathmod):

@staticmethod
def join(__path: PathOrStr, *paths: PathOrStr) -> str:
return posixpath.join(*map(strip_upath_protocol, [__path, *paths]))
path = strip_upath_protocol(__path).removesuffix("/")
paths = map(strip_upath_protocol, paths)
sep = httppathmod.sep
for b in paths:
if b.startswith(sep):
path = b
elif not path:
path += b
else:
path += sep + b
return path

@staticmethod
def splitroot(__path: PathOrStr) -> tuple[str, str, str]:
path = strip_upath_protocol(__path)
return posixpath.splitroot(path)
# path = strip_upath_protocol(__path)
url = urlsplit(__path)
drive = urlunsplit(url._replace(path="", query="", fragment=""))
path = urlunsplit(url._replace(scheme="", netloc=""))
root = "/" if path.startswith("/") else ""
return drive, root, path.removeprefix("/")

@staticmethod
def splitdrive(__path: PathOrStr) -> tuple[str, str]:
path = strip_upath_protocol(__path)
return posixpath.splitdrive(path)

class HTTPPath(upath.core.UPath): # noqa
pathmod = httppathmod
url = urlsplit(path)
path = urlunsplit(url._replace(scheme="", netloc=""))
drive = urlunsplit(url._replace(path="", query="", fragment=""))
return drive, path

class HTTPPath(upath.core312plus.UPath): # noqa
pathmod = _flavour = httppathmod
_supports_empty_parts = True

@property
def root(self) -> str:
return super().root or "/"

def __str__(self):
return super(upath.core312plus.UPath, self).__str__()

def is_file(self):
try:
next(super().iterdir())
except (StopIteration, NotADirectoryError):
return True
except FileNotFoundError:
return False
else:
return False

def is_dir(self):
try:
next(super().iterdir())
except (StopIteration, NotADirectoryError):
return False
except FileNotFoundError:
return False
else:
return True

def iterdir(self):
it = iter(super().iterdir())
try:
item0 = next(it)
except (StopIteration, NotADirectoryError):
raise NotADirectoryError(str(self))
except FileNotFoundError:
raise FileNotFoundError(str(self))
else:
yield from chain([item0], it)

def resolve(
self: HTTPPath,
strict: bool = False,
follow_redirects: bool = True,
) -> HTTPPath:
"""Normalize the path and resolve redirects."""
# Normalise the path
resolved_path = super().resolve(strict=strict)

if follow_redirects:
# Get the fsspec fs
fs = self.fs
url = str(self)
# Ensure we have a session
session = sync(fs.loop, fs.set_session)
# Use HEAD requests if the server allows it, falling back to GETs
for method in (session.head, session.get):
r = sync(fs.loop, method, url, allow_redirects=True)
try:
r.raise_for_status()
except Exception as exc:
if method == session.get:
raise FileNotFoundError(self) from exc
else:
resolved_path = HTTPPath(str(r.url))
break

return resolved_path

0 comments on commit 9549db3

Please sign in to comment.