Skip to content

Commit

Permalink
Change ByteRange enum to FromStart/End variants
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Sep 26, 2023
1 parent 52d1aae commit 2f4d4d1
Show file tree
Hide file tree
Showing 13 changed files with 87 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Remove the `ndarray` dev dependency
- Remove the `ndarray` dependency for the `sharding` feature
- Replace deprecated `tempdir` with `tempfile` for tests
- **Breaking**: Change `ByteRange` enum to have `FromStart` and `FromEnd` variants

## [0.2.0] - 2023-09-25

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "zarrs"
description = "A library for the Zarr V3 storage format for multidimensional arrays and metadata"
version = "0.2.1"
version = "0.3.0"
authors = ["Lachlan Deakin <ljdgit@gmail.com>"]
edition = "2021"
license = "MIT OR Apache-2.0"
Expand Down
22 changes: 11 additions & 11 deletions src/array/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ pub trait BytesPartialDecoderTraits: Send + Sync {
/// Returns [`CodecError`] if a codec fails.
fn decode(&self, decoded_representation: &BytesRepresentation) -> Result<Vec<u8>, CodecError> {
Ok(self
.partial_decode(decoded_representation, &[ByteRange::All])?
.partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])?
.remove(0))
}

Expand All @@ -203,7 +203,7 @@ pub trait BytesPartialDecoderTraits: Send + Sync {
decoded_representation: &BytesRepresentation,
) -> Result<Vec<u8>, CodecError> {
Ok(self
.par_partial_decode(decoded_representation, &[ByteRange::All])?
.par_partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])?
.remove(0))
}
}
Expand Down Expand Up @@ -453,26 +453,26 @@ fn extract_byte_ranges_rs<T: Read + Seek>(
let mut out = Vec::with_capacity(byte_ranges.len());
for byte_range in byte_ranges {
let data: Vec<u8> = match byte_range {
ByteRange::All => {
bytes.seek(SeekFrom::Start(0))?;
ByteRange::FromStart(offset, None) => {
bytes.seek(SeekFrom::Start(*offset as u64))?;
let mut data = vec![0; len];
bytes.read_exact(&mut data)?;
data
}
ByteRange::FromStart(length) => {
bytes.seek(SeekFrom::Start(0))?;
ByteRange::FromStart(offset, Some(length)) => {
bytes.seek(SeekFrom::Start(*offset as u64))?;
let mut data = vec![0; *length];
bytes.read_exact(&mut data)?;
data
}
ByteRange::FromEnd(length) => {
bytes.seek(SeekFrom::End(-i64::try_from(*length).unwrap()))?;
let mut data = vec![0; *length];
ByteRange::FromEnd(offset, None) => {
bytes.seek(SeekFrom::Start(0))?;
let mut data = vec![0; len - offset];
bytes.read_exact(&mut data)?;
data
}
ByteRange::Interval(offset, length) => {
bytes.seek(SeekFrom::Start(u64::try_from(*offset).unwrap()))?;
ByteRange::FromEnd(offset, Some(length)) => {
bytes.seek(SeekFrom::End(-i64::try_from(*offset + *length).unwrap()))?;
let mut data = vec![0; *length];
bytes.read_exact(&mut data)?;
data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ impl<'a> ShardingPartialDecoder<'a> {
let encoded_shard_index = if parallel {
self.input_handle.par_partial_decode(
&BytesRepresentation::VariableSize,
&[ByteRange::FromEnd(index_encoded_size)],
&[ByteRange::FromEnd(0, Some(index_encoded_size))],
)
} else {
self.input_handle.partial_decode(
&BytesRepresentation::VariableSize,
&[ByteRange::FromEnd(index_encoded_size)],
&[ByteRange::FromEnd(0, Some(index_encoded_size))],
)
}?
.remove(0);
Expand Down
19 changes: 12 additions & 7 deletions src/array/codec/byte_interval_partial_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ impl<'a> BytesPartialDecoderTraits for ByteIntervalPartialDecoder<'a> {
let byte_ranges: Vec<ByteRange> = byte_ranges
.iter()
.map(|byte_range| match byte_range {
ByteRange::All => ByteRange::Interval(self.byte_offset, self.byte_length),
ByteRange::FromStart(length) => ByteRange::Interval(self.byte_offset, *length),
ByteRange::FromEnd(length) => {
ByteRange::Interval(self.byte_offset + self.byte_length - *length, *length)
ByteRange::FromStart(offset, None) => {
ByteRange::FromStart(self.byte_offset + offset, Some(self.byte_length))
}
ByteRange::Interval(start, length) => {
ByteRange::Interval(self.byte_offset + start, *length)
ByteRange::FromStart(offset, Some(length)) => {
ByteRange::FromStart(self.byte_offset + offset, Some(*length))
}
ByteRange::FromEnd(offset, None) => {
ByteRange::FromStart(self.byte_offset, Some(self.byte_length - *offset))
}
ByteRange::FromEnd(offset, Some(length)) => ByteRange::FromEnd(
self.byte_offset + self.byte_length - offset - *length,
Some(*length),
),
})
.collect();
self.inner
Expand All @@ -54,7 +59,7 @@ impl<'a> BytesPartialDecoderTraits for ByteIntervalPartialDecoder<'a> {

fn decode(&self, decoded_representation: &BytesRepresentation) -> Result<Vec<u8>, CodecError> {
Ok(self
.partial_decode(decoded_representation, &[ByteRange::All])?
.partial_decode(decoded_representation, &[ByteRange::FromStart(0, None)])?
.remove(0))
}
}
2 changes: 1 addition & 1 deletion src/array/codec/bytes_to_bytes/crc32c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ mod tests {
let codec = Crc32cCodec::new_with_configuration(&codec_configuration);

let encoded = codec.encode(bytes.clone()).unwrap();
let decoded_regions = [ByteRange::Interval(3, 2)];
let decoded_regions = [ByteRange::FromStart(3, Some(2))];
let input_handle = Box::new(std::io::Cursor::new(encoded));
let partial_decoder = codec.partial_decoder(input_handle);
let decoded_partial_chunk = partial_decoder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@ impl BytesPartialDecoderTraits for Crc32cPartialDecoder<'_> {
// Drop trailing checksum
for (bytes, byte_range) in bytes.iter_mut().zip(decoded_regions) {
match byte_range {
ByteRange::All | ByteRange::FromEnd(_) => {
ByteRange::FromStart(_, Some(_)) => {}
ByteRange::FromStart(_, None) => {
bytes.resize(bytes.len() - CHECKSUM_SIZE, 0);
}
_ => {}
ByteRange::FromEnd(offset, _) => {
if *offset < CHECKSUM_SIZE {
bytes.resize(bytes.len() - (CHECKSUM_SIZE - offset), 0);
}
}
};
}

Expand Down
5 changes: 4 additions & 1 deletion src/array/codec/bytes_to_bytes/gzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ mod tests {
let codec = GzipCodec::new_with_configuration(&configuration);

let encoded = codec.encode(bytes.clone()).unwrap();
let decoded_regions = [ByteRange::Interval(4, 4), ByteRange::Interval(10, 2)];
let decoded_regions = [
ByteRange::FromStart(4, Some(4)),
ByteRange::FromStart(10, Some(2)),
];

let input_handle = Box::new(std::io::Cursor::new(encoded));
let partial_decoder = codec.partial_decoder(input_handle);
Expand Down
5 changes: 4 additions & 1 deletion src/array/codec/bytes_to_bytes/zstd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ mod tests {
let codec = ZstdCodec::new_with_configuration(&configuration);

let encoded = codec.encode(bytes.clone()).unwrap();
let decoded_regions = [ByteRange::Interval(4, 4), ByteRange::Interval(10, 2)];
let decoded_regions = [
ByteRange::FromStart(4, Some(4)),
ByteRange::FromStart(10, Some(2)),
];

let input_handle = Box::new(std::io::Cursor::new(encoded));
let partial_decoder = codec.partial_decoder(input_handle);
Expand Down
6 changes: 1 addition & 5 deletions src/array_subset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,7 @@ impl ArraySubset {
{
let byte_index = array_index * element_size;
let byte_length = element_size * contiguous_elements;
if byte_index == 0 {
byte_ranges.push(ByteRange::FromStart(byte_length));
} else {
byte_ranges.push(ByteRange::Interval(byte_index, byte_length));
}
byte_ranges.push(ByteRange::FromStart(byte_index, Some(byte_length)));
}
byte_ranges
}
Expand Down
57 changes: 31 additions & 26 deletions src/byte_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,46 +16,47 @@ pub type ByteLength = usize;
/// A byte range.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ByteRange {
/// All bytes.
All,
/// A byte interval.
Interval(ByteOffset, ByteLength),
/// A length of bytes from the start.
FromStart(ByteLength),
/// A length of bytes from the end.
FromEnd(ByteLength),
/// A byte range from the start.
///
/// If the byte length is [`None`], reads to the end of the value.
FromStart(ByteOffset, Option<ByteLength>),
/// A byte range from the end.
///
/// If the byte length is [`None`], reads to the start of the value.
FromEnd(ByteOffset, Option<ByteLength>),
}

impl ByteRange {
/// Return the start of a byte range. `size` is the size of the entire bytes.
#[must_use]
pub fn start(&self, size: usize) -> usize {
match self {
ByteRange::All => 0,
ByteRange::FromStart(_offset) => 0,
ByteRange::FromEnd(length) => size - *length,
ByteRange::Interval(start, _length) => *start,
ByteRange::FromStart(offset, _) => *offset,
ByteRange::FromEnd(offset, length) => match length {
Some(length) => size - *offset - *length,
None => 0,
},
}
}

/// Return the exclusive end of a byte range. `size` is the size of the entire bytes.
#[must_use]
pub fn end(&self, size: usize) -> usize {
match self {
ByteRange::All => size,
ByteRange::FromStart(offset) => *offset,
ByteRange::FromEnd(_length) => size,
ByteRange::Interval(start, length) => start + length,
ByteRange::FromStart(offset, length) => match length {
Some(length) => offset + length,
None => size,
},
ByteRange::FromEnd(offset, _) => size - offset,
}
}

/// Return the length of a byte range. `size` is the size of the entire bytes.
#[must_use]
pub fn length(&self, size: usize) -> usize {
match self {
ByteRange::All => size,
ByteRange::FromStart(length) | ByteRange::FromEnd(length) => *length,
ByteRange::Interval(_start, length) => *length,
ByteRange::FromStart(offset, None) | ByteRange::FromEnd(offset, None) => size - offset,
ByteRange::FromStart(_, Some(length)) | ByteRange::FromEnd(_, Some(length)) => *length,
}
}
}
Expand All @@ -68,9 +69,9 @@ pub struct InvalidByteRangeError;
fn validate_byte_ranges(byte_ranges: &[ByteRange], bytes_len: usize) -> bool {
for byte_range in byte_ranges {
let valid = match byte_range {
ByteRange::All => true,
ByteRange::FromStart(length) | ByteRange::FromEnd(length) => *length <= bytes_len,
ByteRange::Interval(offset, length) => offset + length <= bytes_len,
ByteRange::FromStart(offset, length) | ByteRange::FromEnd(offset, length) => {
offset + length.unwrap_or(0) <= bytes_len
}
};
if !valid {
return false;
Expand Down Expand Up @@ -109,10 +110,14 @@ pub unsafe fn extract_byte_ranges_unchecked(
for byte_range in byte_ranges {
out.push(
match byte_range {
ByteRange::All => bytes,
ByteRange::FromStart(length) => &bytes[0..*length],
ByteRange::FromEnd(length) => &bytes[bytes.len() - length..],
ByteRange::Interval(offset, length) => &bytes[*offset..offset + length],
ByteRange::FromStart(offset, length) => match length {
Some(length) => &bytes[*offset..offset + length],
None => &bytes[*offset..],
},
ByteRange::FromEnd(offset, length) => match length {
Some(length) => &bytes[bytes.len() - offset - length..bytes.len() - offset],
None => &bytes[..bytes.len() - offset],
},
}
.to_vec(),
);
Expand Down
22 changes: 11 additions & 11 deletions src/storage/store/filesystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,25 +171,25 @@ impl FilesystemStore {
let buffer = {
// Seek
match byte_range {
ByteRange::All | ByteRange::FromStart(_) => file.seek(SeekFrom::Start(0)),
ByteRange::Interval(start, _) => file.seek(SeekFrom::Start(*start as u64)),
ByteRange::FromEnd(length) => file.seek(SeekFrom::End(
-(i64::try_from(*length).map_err(|_| InvalidByteRangeError)?),
ByteRange::FromStart(offset, _) => file.seek(SeekFrom::Start(*offset as u64)),
ByteRange::FromEnd(_, None) => file.seek(SeekFrom::Start(0u64)),
ByteRange::FromEnd(offset, Some(length)) => file.seek(SeekFrom::End(
-(i64::try_from(*offset + *length).map_err(|_| InvalidByteRangeError)?),
)),
}?;

// Read
match byte_range {
ByteRange::FromStart(length) | ByteRange::Interval(_, length) => {
let mut buffer = vec![0; *length];
file.read_exact(&mut buffer)?;
buffer
}
ByteRange::All | ByteRange::FromEnd(_) => {
ByteRange::FromStart(_, None) | ByteRange::FromEnd(_, None) => {
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
buffer
}
ByteRange::FromStart(_, Some(length)) | ByteRange::FromEnd(_, Some(length)) => {
let mut buffer = vec![0; *length];
file.read_exact(&mut buffer)?;
buffer
}
}
};

Expand Down Expand Up @@ -237,7 +237,7 @@ impl FilesystemStore {

impl ReadableStorageTraits for FilesystemStore {
fn get(&self, key: &StoreKey) -> Result<Vec<u8>, StorageError> {
self.get_impl(key, &ByteRange::All)
self.get_impl(key, &ByteRange::FromStart(0, None))
}

fn get_partial_values(
Expand Down
2 changes: 1 addition & 1 deletion src/storage/store/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl MemoryStore {

impl ReadableStorageTraits for MemoryStore {
fn get(&self, key: &StoreKey) -> Result<Vec<u8>, StorageError> {
self.get_impl(key, &ByteRange::All)
self.get_impl(key, &ByteRange::FromStart(0, None))
}

fn get_partial_values(
Expand Down

0 comments on commit 2f4d4d1

Please sign in to comment.