From 0d43eb38d2d16ceb94639fc20b49018a2424d549 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 22 Nov 2024 04:12:04 -0800 Subject: [PATCH] Backport PR #1774: (fix): python debugger dask h5 meta array (#1775) Co-authored-by: Ilan Gold --- src/anndata/_io/specs/lazy_methods.py | 6 ++++-- tests/test_io_elementwise.py | 27 +++++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index a34f627e7..0b35c34da 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -150,7 +150,7 @@ def read_h5_array( c if c not in {None, -1} else s for c, s in zip(chunks, shape, strict=True) ) if chunks is not None - else (_DEFAULT_STRIDE,) * len(shape) + else tuple(min(_DEFAULT_STRIDE, s) for s in shape) ) chunk_layout = tuple( @@ -159,7 +159,9 @@ def read_h5_array( ) make_chunk = partial(make_dask_chunk, path, elem_name) - return da.map_blocks(make_chunk, dtype=dtype, chunks=chunk_layout) + return da.map_blocks( + make_chunk, dtype=dtype, chunks=chunk_layout, meta=np.array([]) + ) @_LAZY_REGISTRY.register_read(ZarrArray, IOSpec("array", "0.2.0")) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 3ca5324b8..91a7e3425 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -66,6 +66,7 @@ def store(request, tmp_path) -> H5Group | ZarrGroup: sparse_formats = ["csr", "csc"] SIZE = 2500 +DEFAULT_SHAPE = (SIZE, SIZE * 2) @pytest.fixture(params=sparse_formats) @@ -73,15 +74,17 @@ def sparse_format(request): return request.param -def create_dense_store(store, n_dims: int = 2): - X = np.random.randn(*[SIZE * (i + 1) for i in range(n_dims)]) +def create_dense_store( + store: str, *, shape: tuple[int, ...] = DEFAULT_SHAPE +) -> H5Group | ZarrGroup: + X = np.random.randn(*shape) write_elem(store, "X", X) return store def create_sparse_store( - sparse_format: Literal["csc", "csr"], store: G, shape=(SIZE, SIZE * 2) + sparse_format: Literal["csc", "csr"], store: G, shape=DEFAULT_SHAPE ) -> G: """Returns a store @@ -289,7 +292,7 @@ def test_read_lazy_2d_dask(sparse_format, store): ], ) def test_read_lazy_subsets_nd_dask(store, n_dims, chunks): - arr_store = create_dense_store(store, n_dims) + arr_store = create_dense_store(store, shape=DEFAULT_SHAPE[:n_dims]) X_dask_from_disk = read_elem_as_dask(arr_store["X"], chunks=chunks) X_from_disk = read_elem(arr_store["X"]) assert_equal(X_from_disk, X_dask_from_disk) @@ -317,6 +320,14 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path): assert_equal(X_from_disk, X_dask_from_disk) +def test_undersized_shape_to_default(store: H5Group | ZarrGroup): + shape = (3000, 50) + arr_store = create_dense_store(store, shape=shape) + X_dask_from_disk = read_elem_as_dask(arr_store["X"]) + assert (c < s for c, s in zip(X_dask_from_disk.chunksize, shape)) + assert X_dask_from_disk.shape == shape + + @pytest.mark.parametrize( ("arr_type", "chunks", "expected_chunksize"), [ @@ -329,10 +340,10 @@ def test_read_lazy_h5_cluster(sparse_format, tmp_path): ("csc", (-1, 10), (SIZE, 10)), ("csr", (10, None), (10, SIZE * 2)), ("csc", (None, 10), (SIZE, 10)), - ("csc", (None, None), (SIZE, SIZE * 2)), - ("csr", (None, None), (SIZE, SIZE * 2)), - ("csr", (-1, -1), (SIZE, SIZE * 2)), - ("csc", (-1, -1), (SIZE, SIZE * 2)), + ("csc", (None, None), DEFAULT_SHAPE), + ("csr", (None, None), DEFAULT_SHAPE), + ("csr", (-1, -1), DEFAULT_SHAPE), + ("csc", (-1, -1), DEFAULT_SHAPE), ], ) def test_read_lazy_2d_chunk_kwargs(