Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use variable for client schemes, allowing override #467

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# cloudpathlib Changelog

## UNRELEASED

- Added support for custom schemes in CloudPath and Client subclases. (Issue [#466](https://github.com/drivendataorg/cloudpathlib/issues/466), PR [#467](https://github.com/drivendataorg/cloudpathlib/pull/467))

## v0.19.0 (2024-08-29)

- Fixed an error that occurred when loading and dumping `CloudPath` objects using pickle multiple times. (Issue [#450](https://github.com/drivendataorg/cloudpathlib/issues/450), PR [#454](https://github.com/drivendataorg/cloudpathlib/pull/454), thanks to [@kujenga](https://github.com/kujenga))
Expand Down
14 changes: 10 additions & 4 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,14 @@
) -> Iterable[Tuple[AzureBlobPath, bool]]:
if not cloud_path.container:
for container in self.service_client.list_containers():
yield self.CloudPath(f"az://{container.name}"), True
yield self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), True

if not recursive:
continue

yield from self._list_dir(self.CloudPath(f"az://{container.name}"), recursive=True)
yield from self._list_dir(

Check warning on line 284 in cloudpathlib/azure/azblobclient.py

View check run for this annotation

Codecov / codecov/patch

cloudpathlib/azure/azblobclient.py#L284

Added line #L284 was not covered by tests
self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), recursive=True
)
return

container_client = self.service_client.get_container_client(cloud_path.container)
Expand All @@ -295,7 +297,9 @@
paths = file_system_client.get_paths(path=cloud_path.blob, recursive=recursive)

for path in paths:
yield self.CloudPath(f"az://{cloud_path.container}/{path.name}"), path.is_directory
yield self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.container}/{path.name}"
), path.is_directory

else:
if not recursive:
Expand All @@ -306,7 +310,9 @@
for blob in blobs:
# walk_blobs returns folders with a trailing slash
blob_path = blob.name.rstrip("/")
blob_cloud_path = self.CloudPath(f"az://{cloud_path.container}/{blob_path}")
blob_cloud_path = self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.container}/{blob_path}"
)

yield blob_cloud_path, (
isinstance(blob, BlobPrefix)
Expand Down
16 changes: 11 additions & 5 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
)

yield from (
(self.CloudPath(f"gs://{str(b)}"), True) for b in self.client.list_buckets()
(self.CloudPath(f"{cloud_path.cloud_prefix}{str(b)}"), True)
for b in self.client.list_buckets()
)
return

Expand All @@ -200,25 +201,30 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
# if we haven't surfaced this directory already
if parent not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{prefix}{parent}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}"
),
True, # is a directory
)
yielded_dirs.add(parent)
yield (self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}"), False) # is a file
yield (
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{o.name}"),
False,
) # is a file
else:
iterator = bucket.list_blobs(delimiter="/", prefix=prefix)

# files must be iterated first for `.prefixes` to be populated:
# see: https://github.com/googleapis/python-storage/issues/863
for file in iterator:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{file.name}"),
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{file.name}"),
False, # is a file
)

for directory in iterator.prefixes:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{directory}"),
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{directory}"),
True, # is a directory
)

Expand Down
18 changes: 13 additions & 5 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
)

yield from (
(self.CloudPath(f"s3://{b['Name']}"), True)
(self.CloudPath(f"{cloud_path.cloud_prefix}{b['Name']}"), True)
for b in self.client.list_buckets().get("Buckets", [])
)
return
Expand All @@ -241,7 +241,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
canonical = result_prefix.get("Prefix").rstrip("/") # keep a canonical form
if canonical not in yielded_dirs:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
),
True,
)
yielded_dirs.add(canonical)
Expand All @@ -254,7 +256,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
parent_canonical = prefix + str(parent).rstrip("/")
if parent_canonical not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{parent_canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{parent_canonical}"
),
True,
)
yielded_dirs.add(parent_canonical)
Expand All @@ -267,15 +271,19 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
# s3 fake directories have 0 size and end with "/"
if result_key.get("Key").endswith("/") and result_key.get("Size") == 0:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
),
True,
)
yielded_dirs.add(canonical)

# yield object as file
else:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{result_key.get('Key')}"
),
False,
)

Expand Down
53 changes: 52 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import mimetypes
import os
from pathlib import Path
import random
import string
from pathlib import Path

import pytest

from cloudpathlib import CloudPath
from cloudpathlib.client import register_client_class
from cloudpathlib.cloudpath import implementation_registry, register_path_class
from cloudpathlib.s3.s3client import S3Client
from cloudpathlib.s3.s3path import S3Path


def test_default_client_instantiation(rig):
Expand Down Expand Up @@ -116,3 +121,49 @@ def my_content_type(path):

for suffix, content_type in mimes:
_test_write_content_type(suffix, content_type, rig)


@pytest.fixture
def custom_s3_path():
# A fixture isolates these classes as they modify the global registry of
# implementations.
@register_path_class("mys3")
class MyS3Path(S3Path):
cloud_prefix: str = "mys3://"

@register_client_class("mys3")
class MyS3Client(S3Client):
pass

yield (MyS3Path, MyS3Client)

# cleanup after use
implementation_registry.pop("mys3")


def test_custom_mys3path_instantiation(custom_s3_path):
CustomPath, _ = custom_s3_path

path = CustomPath("mys3://bucket/dir/file.txt")
assert isinstance(path, CustomPath)
assert path.cloud_prefix == "mys3://"
assert path.bucket == "bucket"
assert path.key == "dir/file.txt"


def test_custom_mys3client_instantiation(custom_s3_path):
_, CustomClient = custom_s3_path

client = CustomClient()
assert isinstance(client, CustomClient)
assert client.CloudPath("mys3://bucket/dir/file.txt").cloud_prefix == "mys3://"


def test_custom_mys3client_default_client(custom_s3_path):
_, CustomClient = custom_s3_path

CustomClient().set_as_default_client()

path = CloudPath("mys3://bucket/dir/file.txt")
assert isinstance(path.client, CustomClient)
assert path.cloud_prefix == "mys3://"
Loading