diff --git a/CHANGELOG.md b/CHANGELOG.md index b251f3f5..18aeefef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove the `ndarray` dev dependency - Remove the `ndarray` dependency for the `sharding` feature - Replace deprecated `tempdir` with `tempfile` for tests + - **Breaking**: Change `ByteRange` enum to have `FromStart` and `FromEnd` variants ## [0.2.0] - 2023-09-25 diff --git a/Cargo.toml b/Cargo.toml index db2ef94e..1bc2f0e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "zarrs" description = "A library for the Zarr V3 storage format for multidimensional arrays and metadata" -version = "0.2.1" +version = "0.3.0" authors = ["Lachlan Deakin "] edition = "2021" license = "MIT OR Apache-2.0" diff --git a/src/array/codec.rs b/src/array/codec.rs index e0f488e6..a626481c 100644 --- a/src/array/codec.rs +++ b/src/array/codec.rs @@ -189,7 +189,7 @@ pub trait BytesPartialDecoderTraits: Send + Sync { /// Returns [`CodecError`] if a codec fails. fn decode(&self, decoded_representation: &BytesRepresentation) -> Result, CodecError> { Ok(self - .partial_decode(decoded_representation, &[ByteRange::All])? + .partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])? .remove(0)) } @@ -203,7 +203,7 @@ pub trait BytesPartialDecoderTraits: Send + Sync { decoded_representation: &BytesRepresentation, ) -> Result, CodecError> { Ok(self - .par_partial_decode(decoded_representation, &[ByteRange::All])? + .par_partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])? .remove(0)) } } @@ -453,26 +453,26 @@ fn extract_byte_ranges_rs( let mut out = Vec::with_capacity(byte_ranges.len()); for byte_range in byte_ranges { let data: Vec = match byte_range { - ByteRange::All => { - bytes.seek(SeekFrom::Start(0))?; + ByteRange::FromStart(offset, None) => { + bytes.seek(SeekFrom::Start(*offset as u64))?; let mut data = vec![0; len]; bytes.read_exact(&mut data)?; data } - ByteRange::FromStart(length) => { - bytes.seek(SeekFrom::Start(0))?; + ByteRange::FromStart(offset, Some(length)) => { + bytes.seek(SeekFrom::Start(*offset as u64))?; let mut data = vec![0; *length]; bytes.read_exact(&mut data)?; data } - ByteRange::FromEnd(length) => { - bytes.seek(SeekFrom::End(-i64::try_from(*length).unwrap()))?; - let mut data = vec![0; *length]; + ByteRange::FromEnd(offset, None) => { + bytes.seek(SeekFrom::Start(0))?; + let mut data = vec![0; len - offset]; bytes.read_exact(&mut data)?; data } - ByteRange::Interval(offset, length) => { - bytes.seek(SeekFrom::Start(u64::try_from(*offset).unwrap()))?; + ByteRange::FromEnd(offset, Some(length)) => { + bytes.seek(SeekFrom::End(-i64::try_from(*offset + *length).unwrap()))?; let mut data = vec![0; *length]; bytes.read_exact(&mut data)?; data diff --git a/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs b/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs index 30329e87..4ff8a53a 100644 --- a/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs +++ b/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs @@ -73,12 +73,12 @@ impl<'a> ShardingPartialDecoder<'a> { let encoded_shard_index = if parallel { self.input_handle.par_partial_decode( &BytesRepresentation::VariableSize, - &[ByteRange::FromEnd(index_encoded_size)], + &[ByteRange::FromEnd(0, Some(index_encoded_size))], ) } else { self.input_handle.partial_decode( &BytesRepresentation::VariableSize, - &[ByteRange::FromEnd(index_encoded_size)], + &[ByteRange::FromEnd(0, Some(index_encoded_size))], ) }? .remove(0); diff --git a/src/array/codec/byte_interval_partial_decoder.rs b/src/array/codec/byte_interval_partial_decoder.rs index 2514fb00..39f604f9 100644 --- a/src/array/codec/byte_interval_partial_decoder.rs +++ b/src/array/codec/byte_interval_partial_decoder.rs @@ -38,14 +38,19 @@ impl<'a> BytesPartialDecoderTraits for ByteIntervalPartialDecoder<'a> { let byte_ranges: Vec = byte_ranges .iter() .map(|byte_range| match byte_range { - ByteRange::All => ByteRange::Interval(self.byte_offset, self.byte_length), - ByteRange::FromStart(length) => ByteRange::Interval(self.byte_offset, *length), - ByteRange::FromEnd(length) => { - ByteRange::Interval(self.byte_offset + self.byte_length - *length, *length) + ByteRange::FromStart(offset, None) => { + ByteRange::FromStart(self.byte_offset + offset, Some(self.byte_length)) } - ByteRange::Interval(start, length) => { - ByteRange::Interval(self.byte_offset + start, *length) + ByteRange::FromStart(offset, Some(length)) => { + ByteRange::FromStart(self.byte_offset + offset, Some(*length)) } + ByteRange::FromEnd(offset, None) => { + ByteRange::FromStart(self.byte_offset, Some(self.byte_length - *offset)) + } + ByteRange::FromEnd(offset, Some(length)) => ByteRange::FromEnd( + self.byte_offset + self.byte_length - offset - *length, + Some(*length), + ), }) .collect(); self.inner @@ -54,7 +59,7 @@ impl<'a> BytesPartialDecoderTraits for ByteIntervalPartialDecoder<'a> { fn decode(&self, decoded_representation: &BytesRepresentation) -> Result, CodecError> { Ok(self - .partial_decode(decoded_representation, &[ByteRange::All])? + .partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])? .remove(0)) } } diff --git a/src/array/codec/bytes_to_bytes/crc32c.rs b/src/array/codec/bytes_to_bytes/crc32c.rs index c2544eea..cf883dd3 100644 --- a/src/array/codec/bytes_to_bytes/crc32c.rs +++ b/src/array/codec/bytes_to_bytes/crc32c.rs @@ -59,7 +59,7 @@ mod tests { let codec = Crc32cCodec::new_with_configuration(&codec_configuration); let encoded = codec.encode(bytes.clone()).unwrap(); - let decoded_regions = [ByteRange::Interval(3, 2)]; + let decoded_regions = [ByteRange::FromStart(3, Some(2))]; let input_handle = Box::new(std::io::Cursor::new(encoded)); let partial_decoder = codec.partial_decoder(input_handle); let decoded_partial_chunk = partial_decoder diff --git a/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs b/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs index 94ac6ad6..781ea6c1 100644 --- a/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs +++ b/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs @@ -33,10 +33,15 @@ impl BytesPartialDecoderTraits for Crc32cPartialDecoder<'_> { // Drop trailing checksum for (bytes, byte_range) in bytes.iter_mut().zip(decoded_regions) { match byte_range { - ByteRange::All | ByteRange::FromEnd(_) => { + ByteRange::FromStart(_, Some(_)) => {} + ByteRange::FromStart(_, None) => { bytes.resize(bytes.len() - CHECKSUM_SIZE, 0); } - _ => {} + ByteRange::FromEnd(offset, _) => { + if *offset < CHECKSUM_SIZE { + bytes.resize(bytes.len() - (CHECKSUM_SIZE - offset), 0); + } + } }; } diff --git a/src/array/codec/bytes_to_bytes/gzip.rs b/src/array/codec/bytes_to_bytes/gzip.rs index 903eb376..8dd89a0c 100644 --- a/src/array/codec/bytes_to_bytes/gzip.rs +++ b/src/array/codec/bytes_to_bytes/gzip.rs @@ -73,7 +73,10 @@ mod tests { let codec = GzipCodec::new_with_configuration(&configuration); let encoded = codec.encode(bytes.clone()).unwrap(); - let decoded_regions = [ByteRange::Interval(4, 4), ByteRange::Interval(10, 2)]; + let decoded_regions = [ + ByteRange::FromStart(4, Some(4)), + ByteRange::FromStart(10, Some(2)), + ]; let input_handle = Box::new(std::io::Cursor::new(encoded)); let partial_decoder = codec.partial_decoder(input_handle); diff --git a/src/array/codec/bytes_to_bytes/zstd.rs b/src/array/codec/bytes_to_bytes/zstd.rs index 6c41b2f5..fe52171e 100644 --- a/src/array/codec/bytes_to_bytes/zstd.rs +++ b/src/array/codec/bytes_to_bytes/zstd.rs @@ -53,7 +53,10 @@ mod tests { let codec = ZstdCodec::new_with_configuration(&configuration); let encoded = codec.encode(bytes.clone()).unwrap(); - let decoded_regions = [ByteRange::Interval(4, 4), ByteRange::Interval(10, 2)]; + let decoded_regions = [ + ByteRange::FromStart(4, Some(4)), + ByteRange::FromStart(10, Some(2)), + ]; let input_handle = Box::new(std::io::Cursor::new(encoded)); let partial_decoder = codec.partial_decoder(input_handle); diff --git a/src/array_subset.rs b/src/array_subset.rs index 25ef3b2d..9defe739 100644 --- a/src/array_subset.rs +++ b/src/array_subset.rs @@ -196,11 +196,7 @@ impl ArraySubset { { let byte_index = array_index * element_size; let byte_length = element_size * contiguous_elements; - if byte_index == 0 { - byte_ranges.push(ByteRange::FromStart(byte_length)); - } else { - byte_ranges.push(ByteRange::Interval(byte_index, byte_length)); - } + byte_ranges.push(ByteRange::FromStart(byte_index, Some(byte_length))); } byte_ranges } diff --git a/src/byte_range.rs b/src/byte_range.rs index 036c6da3..c24c0046 100644 --- a/src/byte_range.rs +++ b/src/byte_range.rs @@ -16,14 +16,14 @@ pub type ByteLength = usize; /// A byte range. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ByteRange { - /// All bytes. - All, - /// A byte interval. - Interval(ByteOffset, ByteLength), - /// A length of bytes from the start. - FromStart(ByteLength), - /// A length of bytes from the end. - FromEnd(ByteLength), + /// A byte range from the start. + /// + /// If the byte length is [`None`], reads to the end of the value. + FromStart(ByteOffset, Option), + /// A byte range from the end. + /// + /// If the byte length is [`None`], reads to the start of the value. + FromEnd(ByteOffset, Option), } impl ByteRange { @@ -31,10 +31,11 @@ impl ByteRange { #[must_use] pub fn start(&self, size: usize) -> usize { match self { - ByteRange::All => 0, - ByteRange::FromStart(_offset) => 0, - ByteRange::FromEnd(length) => size - *length, - ByteRange::Interval(start, _length) => *start, + ByteRange::FromStart(offset, _) => *offset, + ByteRange::FromEnd(offset, length) => match length { + Some(length) => size - *offset - *length, + None => 0, + }, } } @@ -42,10 +43,11 @@ impl ByteRange { #[must_use] pub fn end(&self, size: usize) -> usize { match self { - ByteRange::All => size, - ByteRange::FromStart(offset) => *offset, - ByteRange::FromEnd(_length) => size, - ByteRange::Interval(start, length) => start + length, + ByteRange::FromStart(offset, length) => match length { + Some(length) => offset + length, + None => size, + }, + ByteRange::FromEnd(offset, _) => size - offset, } } @@ -53,9 +55,8 @@ impl ByteRange { #[must_use] pub fn length(&self, size: usize) -> usize { match self { - ByteRange::All => size, - ByteRange::FromStart(length) | ByteRange::FromEnd(length) => *length, - ByteRange::Interval(_start, length) => *length, + ByteRange::FromStart(offset, None) | ByteRange::FromEnd(offset, None) => size - offset, + ByteRange::FromStart(_, Some(length)) | ByteRange::FromEnd(_, Some(length)) => *length, } } } @@ -68,9 +69,9 @@ pub struct InvalidByteRangeError; fn validate_byte_ranges(byte_ranges: &[ByteRange], bytes_len: usize) -> bool { for byte_range in byte_ranges { let valid = match byte_range { - ByteRange::All => true, - ByteRange::FromStart(length) | ByteRange::FromEnd(length) => *length <= bytes_len, - ByteRange::Interval(offset, length) => offset + length <= bytes_len, + ByteRange::FromStart(offset, length) | ByteRange::FromEnd(offset, length) => { + offset + length.unwrap_or(0) <= bytes_len + } }; if !valid { return false; @@ -109,10 +110,14 @@ pub unsafe fn extract_byte_ranges_unchecked( for byte_range in byte_ranges { out.push( match byte_range { - ByteRange::All => bytes, - ByteRange::FromStart(length) => &bytes[0..*length], - ByteRange::FromEnd(length) => &bytes[bytes.len() - length..], - ByteRange::Interval(offset, length) => &bytes[*offset..offset + length], + ByteRange::FromStart(offset, length) => match length { + Some(length) => &bytes[*offset..offset + length], + None => &bytes[*offset..], + }, + ByteRange::FromEnd(offset, length) => match length { + Some(length) => &bytes[bytes.len() - offset - length..bytes.len() - offset], + None => &bytes[..bytes.len() - offset], + }, } .to_vec(), ); diff --git a/src/storage/store/filesystem.rs b/src/storage/store/filesystem.rs index a512751f..9c6f5016 100644 --- a/src/storage/store/filesystem.rs +++ b/src/storage/store/filesystem.rs @@ -171,25 +171,25 @@ impl FilesystemStore { let buffer = { // Seek match byte_range { - ByteRange::All | ByteRange::FromStart(_) => file.seek(SeekFrom::Start(0)), - ByteRange::Interval(start, _) => file.seek(SeekFrom::Start(*start as u64)), - ByteRange::FromEnd(length) => file.seek(SeekFrom::End( - -(i64::try_from(*length).map_err(|_| InvalidByteRangeError)?), + ByteRange::FromStart(offset, _) => file.seek(SeekFrom::Start(*offset as u64)), + ByteRange::FromEnd(_, None) => file.seek(SeekFrom::Start(0u64)), + ByteRange::FromEnd(offset, Some(length)) => file.seek(SeekFrom::End( + -(i64::try_from(*offset + *length).map_err(|_| InvalidByteRangeError)?), )), }?; // Read match byte_range { - ByteRange::FromStart(length) | ByteRange::Interval(_, length) => { - let mut buffer = vec![0; *length]; - file.read_exact(&mut buffer)?; - buffer - } - ByteRange::All | ByteRange::FromEnd(_) => { + ByteRange::FromStart(_, None) | ByteRange::FromEnd(_, None) => { let mut buffer = Vec::new(); file.read_to_end(&mut buffer)?; buffer } + ByteRange::FromStart(_, Some(length)) | ByteRange::FromEnd(_, Some(length)) => { + let mut buffer = vec![0; *length]; + file.read_exact(&mut buffer)?; + buffer + } } }; @@ -237,7 +237,7 @@ impl FilesystemStore { impl ReadableStorageTraits for FilesystemStore { fn get(&self, key: &StoreKey) -> Result, StorageError> { - self.get_impl(key, &ByteRange::All) + self.get_impl(key, &ByteRange::FromStart(0, None)) } fn get_partial_values( diff --git a/src/storage/store/memory.rs b/src/storage/store/memory.rs index 905e80f9..feb2286f 100644 --- a/src/storage/store/memory.rs +++ b/src/storage/store/memory.rs @@ -88,7 +88,7 @@ impl MemoryStore { impl ReadableStorageTraits for MemoryStore { fn get(&self, key: &StoreKey) -> Result, StorageError> { - self.get_impl(key, &ByteRange::All) + self.get_impl(key, &ByteRange::FromStart(0, None)) } fn get_partial_values(