Skip to content

Commit

Permalink
Merge pull request #72 from blowekamp/zarr_rechunk_compressor
Browse files Browse the repository at this point in the history
Add compressor option to rechunking
  • Loading branch information
blowekamp authored Aug 31, 2023
2 parents 6432824 + da06e08 commit ae81ec8
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
10 changes: 8 additions & 2 deletions pytools/HedwigZarrImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ def shape(self) -> Tuple[int]:
"""
return self._ome_ngff_multiscale_get_array(0).shape

def rechunk(self, chunk_size: int) -> None:
def rechunk(self, chunk_size: int, compressor=None) -> None:
"""
Change the chunk size of each ZARR array inplace in the pyramid.
The chunk_size is applied to all spacial dimension, and other dimension (CT) are the full size.
The ImageZarrImage need write access to the ZARR.
:param chunk_size: The size as an integer to resize the chunk sizes.
:param compressor: The output arrays will be written with the provided compressor, if None then the compressor
of the input arrays will be used.
"""

logger.info(f'Processing group: "{self.zarr_group.name}"...')
Expand All @@ -96,13 +100,15 @@ def rechunk(self, chunk_size: int) -> None:
logger.info("Chunks already requested size")
continue

if compressor is None:
compressor = arr.compressor
# copy array to a temp zarr array on file
zarr.copy(
arr,
self.zarr_group,
name=arr_name + ".temp",
chunks=chunks,
compressor=arr.compressor,
compressor=arr.compressor if compressor is None else compressor,
dimension_separator=arr._dimension_separator,
filters=arr.filters,
overwrite=False,
Expand Down
13 changes: 11 additions & 2 deletions pytools/zarr_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
from pytools import __version__
from pytools.HedwigZarrImages import HedwigZarrImages
from numcodecs import Blosc


@click.command()
Expand All @@ -17,14 +18,22 @@
type=click.IntRange(min=1),
help="The size of zarr chunks stored in spatial dimensions.",
)
@click.option(
"--recompress",
is_flag=True,
show_default=True,
default=False,
help="Use the preferred compressor when recompressing.",
)
@click.version_option(__version__)
def main(input_zarr, log_level, chunk_size):
def main(input_zarr, log_level, chunk_size, recompress):
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.getLevelName(log_level))

compressor = Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE)
z = HedwigZarrImages(input_zarr, read_only=False)

for k in z.get_series_keys():
z[k].rechunk(chunk_size)
z[k].rechunk(chunk_size, compressor=compressor if recompress else None)


if __name__ == "__main__":
Expand Down

0 comments on commit ae81ec8

Please sign in to comment.