diff --git a/CHANGELOG.md b/CHANGELOG.md index d28eab8c..2a01840b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added default implementation for `WritableStorageTraits::set_partial_values` - `WritableStorageTraits` now requires `ReadableStorageTraits` - Added `From<&[u8]>` for `FillValue` + - Added `FillValue::all_equal` and fill value benchmark + - Implements a much faster fill value test ### Changed - **Breaking**: `array::data_type::DataType` is now marked `#[non_exhaustive]` diff --git a/Cargo.toml b/Cargo.toml index a587bca5..413e9c29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,3 +71,7 @@ harness = false [[bench]] name = "codecs" harness = false + +[[bench]] +name = "fill_value" +harness = false diff --git a/benches/fill_value.rs b/benches/fill_value.rs new file mode 100644 index 00000000..7c1ae761 --- /dev/null +++ b/benches/fill_value.rs @@ -0,0 +1,28 @@ +use criterion::{ + criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration, + Throughput, +}; +use zarrs::array::FillValue; + +fn fill_value(c: &mut Criterion) { + for element_size in [1, 2, 4, 8, 16] { + let plot_config = PlotConfiguration::default().summary_scale(AxisScale::Logarithmic); + let mut group = c.benchmark_group(format!("fill_value_{element_size}")); + group.plot_config(plot_config); + + for size in [32, 64, 128].iter() { + let size3 = size * size * size; + let num_elements = size3 / element_size; + let fill_value: FillValue = FillValue::new(vec![0; element_size]); + + let data = vec![0u8; (num_elements * element_size).try_into().unwrap()]; + group.throughput(Throughput::Bytes((num_elements * element_size) as u64)); + group.bench_function(BenchmarkId::new("equals_all", size3), |b| { + b.iter(|| fill_value.equals_all(&data)); + }); + } + } +} + +criterion_group!(benches, fill_value); +criterion_main!(benches); diff --git a/src/array.rs b/src/array.rs index 171c1965..0208bd84 100644 --- a/src/array.rs +++ b/src/array.rs @@ -898,11 +898,11 @@ impl Array { )); } - let fill_value = self.fill_value().as_ne_bytes(); - let any_non_fill_value = chunk_bytes - .chunks_exact(fill_value.len()) - .any(|f| f != fill_value); - if any_non_fill_value { + let all_fill_value = self.fill_value().equals_all(chunk_bytes); + if all_fill_value { + self.erase_chunk(chunk_indices)?; + Ok(()) + } else { let storage_handle = Arc::new(StorageHandle::new(&*self.storage)); let storage_transformer = self .storage_transformers() @@ -923,9 +923,6 @@ impl Array { &chunk_encoded, ) .map_err(ArrayError::StorageError) - } else { - self.erase_chunk(chunk_indices)?; - Ok(()) } } diff --git a/src/array/codec/array_to_bytes/sharding/sharding_codec.rs b/src/array/codec/array_to_bytes/sharding/sharding_codec.rs index eeba7e41..b721412d 100644 --- a/src/array/codec/array_to_bytes/sharding/sharding_codec.rs +++ b/src/array/codec/array_to_bytes/sharding/sharding_codec.rs @@ -127,7 +127,6 @@ impl ArrayCodecTraits for ShardingCodec { // Iterate over chunk indices let mut shard_inner_chunks = Vec::new(); let mut encoded_shard_offset: usize = 0; - let fill_value_bytes = chunk_representation.fill_value().as_ne_bytes(); for (chunk_index, (_chunk_indices, chunk_subset)) in unsafe { ArraySubset::new_with_shape(shard_representation.shape().to_vec()) .iter_chunks_unchecked(&self.chunk_shape) @@ -141,10 +140,8 @@ impl ArrayCodecTraits for ShardingCodec { shard_representation.element_size(), ) }; - if bytes - .chunks_exact(fill_value_bytes.len()) - .any(|b| b != fill_value_bytes) - { + let all_fill_value = chunk_representation.fill_value().equals_all(&bytes); + if !all_fill_value { // Encode chunk let chunk_encoded = self.inner_codecs.encode(bytes, &chunk_representation)?; @@ -197,7 +194,6 @@ impl ArrayCodecTraits for ShardingCodec { .map_err(|e| CodecError::Other(e.to_string()))?; // Iterate over chunk indices - let fill_value_bytes = chunk_representation.fill_value().as_ne_bytes(); let shard_inner_chunks = unsafe { ArraySubset::new_with_shape(shard_representation.shape().to_vec()) .iter_chunks_unchecked(&self.chunk_shape) @@ -212,15 +208,13 @@ impl ArrayCodecTraits for ShardingCodec { shard_representation.element_size(), ) }; - if bytes - .chunks_exact(fill_value_bytes.len()) - .any(|b| b != fill_value_bytes) - { - // Encode chunk + let all_fill_value = chunk_representation.fill_value().equals_all(&bytes); + if all_fill_value { + Ok((chunk_index, None)) + } else { + // let chunk_encoded = self.inner_codecs.par_encode(bytes, &chunk_representation)?; let chunk_encoded = self.inner_codecs.encode(bytes, &chunk_representation)?; Ok((chunk_index, Some(chunk_encoded))) - } else { - Ok((chunk_index, None)) } }) .collect::, CodecError>>()?; diff --git a/src/array/fill_value.rs b/src/array/fill_value.rs index 296d7a77..98770ee1 100644 --- a/src/array/fill_value.rs +++ b/src/array/fill_value.rs @@ -140,4 +140,51 @@ impl FillValue { pub fn as_ne_bytes(&self) -> &[u8] { &self.0 } + + /// Check if the bytes are equal to a sequence of the fill value. + #[allow(clippy::missing_panics_doc)] + #[must_use] + pub fn equals_all(&self, bytes: &[u8]) -> bool { + match self.0.len() { + 1 => { + let fill_value = self.0[0]; + let fill_value_128 = u128::from_ne_bytes([self.0[0]; 16]); + let (prefix, aligned, suffix) = unsafe { bytes.align_to::() }; + prefix.iter().all(|&x| x == fill_value) + && suffix.iter().all(|&x| x == fill_value) + && aligned.iter().all(|&x| x == fill_value_128) + } + 2 => { + let fill_value_128 = u128::from_ne_bytes(self.0[..2].repeat(8).try_into().unwrap()); + let (prefix, aligned, suffix) = unsafe { bytes.align_to::() }; + prefix.chunks_exact(2).all(|x| x == self.0) + && suffix.chunks_exact(2).all(|x| x == self.0) + && aligned.iter().all(|&x| x == fill_value_128) + } + 4 => { + let fill_value_128 = u128::from_ne_bytes(self.0[..4].repeat(4).try_into().unwrap()); + let (prefix, aligned, suffix) = unsafe { bytes.align_to::() }; + prefix.chunks_exact(4).all(|x| x == self.0) + && suffix.chunks_exact(4).all(|x| x == self.0) + && aligned.iter().all(|&x| x == fill_value_128) + } + 8 => { + let fill_value_128 = u128::from_ne_bytes(self.0[..8].repeat(2).try_into().unwrap()); + let (prefix, aligned, suffix) = unsafe { bytes.align_to::() }; + prefix.chunks_exact(8).all(|x| x == self.0) + && suffix.chunks_exact(8).all(|x| x == self.0) + && aligned.iter().all(|&x| x == fill_value_128) + } + 16 => { + let fill_value_128 = u128::from_ne_bytes(self.0[..16].try_into().unwrap()); + let (prefix, aligned, suffix) = unsafe { bytes.align_to::() }; + prefix.chunks_exact(16).all(|x| x == self.0) + && suffix.chunks_exact(16).all(|x| x == self.0) + && aligned.iter().all(|&x| x == fill_value_128) + } + _ => bytes + .chunks_exact(bytes.len()) + .all(|element| element == self.0), + } + } }