Skip to content

Commit

Permalink
Add compressor option to rechunking
Browse files Browse the repository at this point in the history
Enables using the prefered compressor when writing the new chunks.
  • Loading branch information
blowekamp committed Aug 31, 2023
1 parent 6432824 commit da06e08
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
10 changes: 8 additions & 2 deletions pytools/HedwigZarrImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ def shape(self) -> Tuple[int]:
"""
return self._ome_ngff_multiscale_get_array(0).shape

def rechunk(self, chunk_size: int) -> None:
def rechunk(self, chunk_size: int, compressor=None) -> None:
"""
Change the chunk size of each ZARR array inplace in the pyramid.
The chunk_size is applied to all spacial dimension, and other dimension (CT) are the full size.
The ImageZarrImage need write access to the ZARR.
:param chunk_size: The size as an integer to resize the chunk sizes.
:param compressor: The output arrays will be written with the provided compressor, if None then the compressor
of the input arrays will be used.
"""

logger.info(f'Processing group: "{self.zarr_group.name}"...')
Expand All @@ -96,13 +100,15 @@ def rechunk(self, chunk_size: int) -> None:
logger.info("Chunks already requested size")
continue

if compressor is None:
compressor = arr.compressor
# copy array to a temp zarr array on file
zarr.copy(
arr,
self.zarr_group,
name=arr_name + ".temp",
chunks=chunks,
compressor=arr.compressor,
compressor=arr.compressor if compressor is None else compressor,
dimension_separator=arr._dimension_separator,
filters=arr.filters,
overwrite=False,
Expand Down
13 changes: 11 additions & 2 deletions pytools/zarr_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
from pytools import __version__
from pytools.HedwigZarrImages import HedwigZarrImages
from numcodecs import Blosc


@click.command()
Expand All @@ -17,14 +18,22 @@
type=click.IntRange(min=1),
help="The size of zarr chunks stored in spatial dimensions.",
)
@click.option(
"--recompress",
is_flag=True,
show_default=True,
default=False,
help="Use the preferred compressor when recompressing.",
)
@click.version_option(__version__)
def main(input_zarr, log_level, chunk_size):
def main(input_zarr, log_level, chunk_size, recompress):
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.getLevelName(log_level))

compressor = Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE)
z = HedwigZarrImages(input_zarr, read_only=False)

for k in z.get_series_keys():
z[k].rechunk(chunk_size)
z[k].rechunk(chunk_size, compressor=compressor if recompress else None)


if __name__ == "__main__":
Expand Down

0 comments on commit da06e08

Please sign in to comment.