From 483355a903c7072122cddec336d5b14cc2b2fd5e Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Oct 2023 19:33:46 +1100 Subject: [PATCH] refactor(datatypes)!: major revisions Data type extensions removed, r*/float16/bfloat16 now core and the half crate is a required dependency. Fixes for the bytes codec. See changelog. --- CHANGELOG.md | 16 ++ Cargo.toml | 10 +- src/array/array_errors.rs | 4 +- src/array/codec/array_to_bytes/bytes.rs | 26 ++- .../codec/array_to_bytes/bytes/bytes_codec.rs | 4 +- .../bytes/bytes_partial_decoder.rs | 2 +- src/array/data_type.rs | 155 ++++++++++++------ src/array/data_type/bfloat16.rs | 115 ------------- src/array/data_type/float16.rs | 115 ------------- src/array/data_type/raw_bits.rs | 83 ---------- src/array/fill_value.rs | 2 - src/array/fill_value_metadata.rs | 61 +++++++ src/lib.rs | 3 +- src/plugin.rs | 7 +- 14 files changed, 217 insertions(+), 386 deletions(-) delete mode 100644 src/array/data_type/bfloat16.rs delete mode 100644 src/array/data_type/float16.rs delete mode 100644 src/array/data_type/raw_bits.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fc80fe8..113183b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + - **Breaking**: Added `UnsupportedDataTypeError` + +### Changed + - **Breaking**: `array::data_type::DataType` is now marked `#[non_exhaustive]` + - **Breaking**: Promote the `r*` (raw bits), `float16` and `bfloat16` data types to standard data types in `array::data_type::DataType`, rather than extension data types + - **Breaking**: Remove the crate features: `raw_bits`, `float16`, `bfloat16` + - **Breaking**: Removes `array::data_type::RawBitsDataType/Bfloat16DataType/Float16DataType` + - **Breaking**: `half` is now a required dependency + +### Fixed + - Bytes codec handling of complex and raw bits data types + +### Removed + - **Breaking**: Disabled data type extensions `array::data_type::DataType::Extension`. + ## [0.5.1] - 2023-10-10 ### Added diff --git a/Cargo.toml b/Cargo.toml index fe923de3..a98c76f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "zarrs" description = "A library for the Zarr V3 storage format for multidimensional arrays and metadata" -version = "0.5.1" +version = "0.6.0" authors = ["Lachlan Deakin "] edition = "2021" license = "MIT OR Apache-2.0" @@ -11,7 +11,7 @@ categories = ["encoding"] exclude = [".dockerignore", ".github", ".editorconfig", "Dockerfile", "coverage.sh", "TODO.md"] [features] -default = ["transpose", "blosc", "gzip", "sharding", "crc32c", "zstd", "raw_bits", "float16", "bfloat16", "http", "zip", "ndarray"] +default = ["transpose", "blosc", "gzip", "sharding", "crc32c", "zstd", "http", "zip", "ndarray"] # Codecs transpose = ["dep:ndarray"] blosc = ["dep:blosc-sys"] @@ -19,10 +19,6 @@ gzip = ["dep:flate2"] sharding = [] crc32c = ["dep:crc32fast"] zstd = ["dep:zstd"] -# Optional/extension data types -raw_bits = [] -float16 = ["dep:half"] -bfloat16 = ["dep:half"] # Stores http = ["dep:reqwest", "dep:url"] zip = ["dep:zip"] @@ -38,7 +34,7 @@ crc32fast = { version = "1.3", optional = true } derive_more = "0.99" dyn-clone = "1" flate2 = { version = "1", optional = true } -half = { version = "2", optional = true } +half = "2" inventory = "0.3" itertools = "0.11" ndarray = { version = "0.15", optional = true } diff --git a/src/array/array_errors.rs b/src/array/array_errors.rs index 25e34bcd..60d1c0a1 100644 --- a/src/array/array_errors.rs +++ b/src/array/array_errors.rs @@ -11,7 +11,7 @@ use crate::{ use super::{ chunk_grid::{InvalidArrayIndicesError, InvalidChunkGridIndicesError}, codec::CodecError, - data_type::IncompatibleFillValueErrorMetadataError, + data_type::{IncompatibleFillValueErrorMetadataError, UnsupportedDataTypeError}, ArrayIndices, ArrayShape, }; @@ -32,7 +32,7 @@ pub enum ArrayCreateError { UnsupportedAdditionalFieldError(#[from] UnsupportedAdditionalFieldError), /// Unsupported data type. #[error(transparent)] - DataTypeCreateError(PluginCreateError), + DataTypeCreateError(UnsupportedDataTypeError), /// Invalid fill value. #[error(transparent)] InvalidFillValue(#[from] IncompatibleFillValueErrorMetadataError), diff --git a/src/array/codec/array_to_bytes/bytes.rs b/src/array/codec/array_to_bytes/bytes.rs index b733eb6c..2d73b63a 100644 --- a/src/array/codec/array_to_bytes/bytes.rs +++ b/src/array/codec/array_to_bytes/bytes.rs @@ -14,6 +14,8 @@ pub use bytes_codec::BytesCodec; use derive_more::Display; +use crate::array::DataType; + /// The endianness of each element in an array, either `big` or `little`. #[derive(Copy, Clone, Eq, PartialEq, Debug, Display)] pub enum Endianness { @@ -60,10 +62,26 @@ const NATIVE_ENDIAN: Endianness = Endianness::Big; #[cfg(target_endian = "little")] const NATIVE_ENDIAN: Endianness = Endianness::Little; -fn reverse_endianness(v: &mut [u8], bytes_per_element: usize) { - if bytes_per_element > 1 { - v.chunks_exact_mut(bytes_per_element) - .for_each(<[u8]>::reverse); +fn reverse_endianness(v: &mut [u8], data_type: &DataType) { + match data_type { + DataType::Bool | DataType::Int8 | DataType::UInt8 | DataType::RawBits(_) => {} + DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 + | DataType::BFloat16 => { + v.chunks_exact_mut(data_type.size()) + .for_each(<[u8]>::reverse); + } + DataType::Complex64 | DataType::Complex128 => { + v.chunks_exact_mut(data_type.size() / 2) + .for_each(<[u8]>::reverse); + } } } diff --git a/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/src/array/codec/array_to_bytes/bytes/bytes_codec.rs index efc232dc..a8d4840e 100644 --- a/src/array/codec/array_to_bytes/bytes/bytes_codec.rs +++ b/src/array/codec/array_to_bytes/bytes/bytes_codec.rs @@ -112,7 +112,7 @@ impl ArrayCodecTraits for BytesCodec { if let Some(endian) = &self.endian { if !endian.is_native() { - reverse_endianness(&mut decoded_value, decoded_representation.element_size()); + reverse_endianness(&mut decoded_value, decoded_representation.data_type()); } } Ok(decoded_value) @@ -125,7 +125,7 @@ impl ArrayCodecTraits for BytesCodec { ) -> Result, CodecError> { if let Some(endian) = &self.endian { if !endian.is_native() { - reverse_endianness(&mut encoded_value, decoded_representation.element_size()); + reverse_endianness(&mut encoded_value, decoded_representation.data_type()); } } Ok(encoded_value) diff --git a/src/array/codec/array_to_bytes/bytes/bytes_partial_decoder.rs b/src/array/codec/array_to_bytes/bytes/bytes_partial_decoder.rs index 6f8c9ebc..8d4fb683 100644 --- a/src/array/codec/array_to_bytes/bytes/bytes_partial_decoder.rs +++ b/src/array/codec/array_to_bytes/bytes/bytes_partial_decoder.rs @@ -61,7 +61,7 @@ impl ArrayPartialDecoderTraits for BytesPartialDecoder<'_> { if !endian.is_native() { reverse_endianness( &mut bytes_subset, - decoded_representation.element_size(), + decoded_representation.data_type(), ); } } diff --git a/src/array/data_type.rs b/src/array/data_type.rs index 319cf940..b230a559 100644 --- a/src/array/data_type.rs +++ b/src/array/data_type.rs @@ -2,27 +2,11 @@ //! //! See . -#[cfg(feature = "raw_bits")] -mod raw_bits; -#[cfg(feature = "raw_bits")] -pub use raw_bits::RawBitsDataType; - -#[cfg(feature = "float16")] -mod float16; -#[cfg(feature = "float16")] -pub use float16::Float16DataType; - -#[cfg(feature = "bfloat16")] -mod bfloat16; -#[cfg(feature = "bfloat16")] -pub use bfloat16::Bfloat16DataType; - +use derive_more::From; +use half::{bf16, f16}; use thiserror::Error; -use crate::{ - metadata::Metadata, - plugin::{Plugin, PluginCreateError}, -}; +use crate::metadata::Metadata; use super::{ fill_value_metadata::{FillValueFloat, FillValueFloatStringNonFinite, FillValueMetadata}, @@ -31,6 +15,7 @@ use super::{ /// A data type. #[derive(Clone, Debug)] +#[non_exhaustive] pub enum DataType { /// `bool` Boolean. Bool, @@ -50,18 +35,30 @@ pub enum DataType { UInt32, /// `uint64` Integer in `[0, 2^64-1]`. UInt64, + /// `float16` IEEE 754 half-precision floating point: sign bit, 5 bits exponent, 10 bits mantissa. + Float16, /// `float32` IEEE 754 single-precision floating point: sign bit, 8 bits exponent, 23 bits mantissa. Float32, /// `float64` IEEE 754 double-precision floating point: sign bit, 11 bits exponent, 52 bits mantissa. Float64, + /// `bfloat16` brain floating point data type: sign bit, 5 bits exponent, 10 bits mantissa. + BFloat16, /// `complex64` real and complex components are each IEEE 754 single-precision floating point. Complex64, /// `complex128` real and complex components are each IEEE 754 double-precision floating point. Complex128, - /// An optional or extension data type. - Extension(Box), + /// `r*` raw bits, variable size given by *, limited to be a multiple of 8. + RawBits(usize), // the stored usize is the size in bytes + + // /// An extension data type. + // Extension(Box), } +/// An unsupported data type error. +#[derive(Debug, Error, From)] +#[error("data type {_0} is unsupported")] +pub struct UnsupportedDataTypeError(String); + impl PartialEq for DataType { fn eq(&self, other: &Self) -> bool { self.name() == other.name() @@ -70,9 +67,9 @@ impl PartialEq for DataType { impl Eq for DataType {} -/// A data type plugin. -pub type DataTypePlugin = Plugin>; -inventory::collect!(DataTypePlugin); +// /// A data type plugin. +// pub type DataTypePlugin = Plugin>; +// inventory::collect!(DataTypePlugin); /// A fill value metadata incompatibility error. #[derive(Debug, Error)] @@ -137,11 +134,14 @@ impl DataType { Self::UInt16 => "uint16", Self::UInt32 => "uint32", Self::UInt64 => "uint64", + Self::Float16 => "float16", Self::Float32 => "float32", Self::Float64 => "float64", + Self::BFloat16 => "bfloat16", Self::Complex64 => "complex64", Self::Complex128 => "complex128", - Self::Extension(extension) => extension.identifier(), + Self::RawBits(_usize) => "r*", + // Self::Extension(extension) => extension.identifier(), } } @@ -149,7 +149,8 @@ impl DataType { #[must_use] pub fn name(&self) -> String { match self { - Self::Extension(extension) => extension.name(), + Self::RawBits(size) => format!("r{}", size * 8), + // Self::Extension(extension) => extension.name(), _ => self.identifier().to_string(), } } @@ -157,10 +158,11 @@ impl DataType { /// Returns the metadata. #[must_use] pub fn metadata(&self) -> Metadata { - match self { - Self::Extension(extension) => extension.metadata(), - _ => Metadata::new(self.identifier()), - } + Metadata::new(&self.name()) + // match self { + // // Self::Extension(extension) => extension.metadata(), + // _ => Metadata::new(&self.name()), + // } } /// Returns the size in bytes. @@ -168,11 +170,12 @@ impl DataType { pub fn size(&self) -> usize { match self { Self::Bool | Self::Int8 | Self::UInt8 => 1, - Self::Int16 | Self::UInt16 => 2, + Self::Int16 | Self::UInt16 | Self::Float16 | Self::BFloat16 => 2, Self::Int32 | Self::UInt32 | Self::Float32 => 4, Self::Int64 | Self::UInt64 | Self::Float64 | Self::Complex64 => 8, Self::Complex128 => 16, - Self::Extension(extension) => extension.size(), + Self::RawBits(size) => *size, + // Self::Extension(extension) => extension.size(), } } @@ -180,8 +183,8 @@ impl DataType { /// /// # Errors /// - /// Returns [`PluginCreateError`] if the metadata is invalid or not associated with a registered data type plugin. - pub fn from_metadata(metadata: &Metadata) -> Result { + /// Returns [`UnsupportedDataTypeError`] if the metadata is invalid or not associated with a registered data type plugin. + pub fn from_metadata(metadata: &Metadata) -> Result { let name = metadata.name(); match name { @@ -194,21 +197,34 @@ impl DataType { "uint16" => return Ok(Self::UInt16), "uint32" => return Ok(Self::UInt32), "uint64" => return Ok(Self::UInt64), + "float16" => return Ok(Self::Float16), "float32" => return Ok(Self::Float32), "float64" => return Ok(Self::Float64), + "bfloat16" => return Ok(Self::BFloat16), "complex64" => return Ok(Self::Complex64), "complex128" => return Ok(Self::Complex128), _ => {} }; - for plugin in inventory::iter:: { - if plugin.match_name(metadata.name()) { - return Ok(DataType::Extension(plugin.create(metadata)?)); + if name.starts_with('r') { + if let Ok(size_bits) = metadata.name()[1..].parse::() { + if size_bits % 8 == 0 { + let size_bytes = size_bits / 8; + return Ok(DataType::RawBits(size_bytes)); + } } } - Err(PluginCreateError::Unsupported { - name: metadata.name().to_string(), - }) + + Err(UnsupportedDataTypeError(name.to_string())) + + // for plugin in inventory::iter:: { + // if plugin.match_name(metadata.name()) { + // return Ok(DataType::Extension(plugin.create(metadata)?)); + // } + // } + // Err(PluginCreateError::Unsupported { + // name: metadata.name().to_string(), + // }) } /// Create a fill value from metadata. @@ -233,8 +249,10 @@ impl DataType { Self::UInt16 => Ok(FV::from(fill_value.try_as_uint::().ok_or_else(err)?)), Self::UInt32 => Ok(FV::from(fill_value.try_as_uint::().ok_or_else(err)?)), Self::UInt64 => Ok(FV::from(fill_value.try_as_uint::().ok_or_else(err)?)), + Self::Float16 => Ok(FV::from(fill_value.try_as_float16().ok_or_else(err)?)), Self::Float32 => Ok(FV::from(fill_value.try_as_float::().ok_or_else(err)?)), Self::Float64 => Ok(FV::from(fill_value.try_as_float::().ok_or_else(err)?)), + Self::BFloat16 => Ok(FV::from(fill_value.try_as_bfloat16().ok_or_else(err)?)), Self::Complex64 => { let (re, im) = fill_value.try_as_float_pair::().ok_or_else(err)?; Ok(FV::from(num::complex::Complex32::new(re, im))) @@ -243,7 +261,17 @@ impl DataType { let (re, im) = fill_value.try_as_float_pair::().ok_or_else(err)?; Ok(FV::from(num::complex::Complex64::new(re, im))) } - Self::Extension(extension) => extension.fill_value_from_metadata(fill_value), + Self::RawBits(size) => { + if let FillValueMetadata::ByteArray(bytes) = fill_value { + if bytes.len() == *size { + return Ok(FillValue::new(bytes.clone())); + } + } + Err(IncompatibleFillValueErrorMetadataError( + self.name().to_string(), + fill_value.clone(), + )) + } // Self::Extension(extension) => extension.fill_value_from_metadata(fill_value), } } @@ -282,12 +310,20 @@ impl DataType { DataType::UInt64 => { FillValueMetadata::Uint(u64::from_ne_bytes(bytes.try_into().unwrap())) } + DataType::Float16 => { + let fill_value = f16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); + FillValueMetadata::Float(float16_to_fill_value_float(fill_value)) + } DataType::Float32 => FillValueMetadata::Float(float_to_fill_value(f32::from_ne_bytes( bytes.try_into().unwrap(), ))), DataType::Float64 => FillValueMetadata::Float(float_to_fill_value(f64::from_ne_bytes( bytes.try_into().unwrap(), ))), + DataType::BFloat16 => { + let fill_value = bf16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); + FillValueMetadata::Float(bfloat16_to_fill_value_float(fill_value)) + } DataType::Complex64 => { let re = f32::from_ne_bytes(bytes[0..4].try_into().unwrap()); let im = f32::from_ne_bytes(bytes[4..8].try_into().unwrap()); @@ -298,7 +334,10 @@ impl DataType { let im = f64::from_ne_bytes(bytes[8..16].try_into().unwrap()); FillValueMetadata::Complex(float_to_fill_value(re), float_to_fill_value(im)) } - DataType::Extension(extension) => extension.metadata_fill_value(fill_value), + DataType::RawBits(size) => { + debug_assert_eq!(fill_value.as_ne_bytes().len(), *size); + FillValueMetadata::ByteArray(fill_value.as_ne_bytes().to_vec()) + } // DataType::Extension(extension) => extension.metadata_fill_value(fill_value), } } } @@ -318,8 +357,32 @@ where } } +fn float16_to_fill_value_float(f: f16) -> FillValueFloat { + if f.is_infinite() && f.is_sign_positive() { + FillValueFloatStringNonFinite::PosInfinity.into() + } else if f.is_infinite() && f.is_sign_negative() { + FillValueFloatStringNonFinite::NegInfinity.into() + } else if f.is_nan() { + FillValueFloatStringNonFinite::NaN.into() + } else { + f64::from(f).into() + } +} + +fn bfloat16_to_fill_value_float(f: bf16) -> FillValueFloat { + if f.is_infinite() && f.is_sign_positive() { + FillValueFloatStringNonFinite::PosInfinity.into() + } else if f.is_infinite() && f.is_sign_negative() { + FillValueFloatStringNonFinite::NegInfinity.into() + } else if f.is_nan() { + FillValueFloatStringNonFinite::NaN.into() + } else { + f64::from(f).into() + } +} + impl TryFrom for DataType { - type Error = PluginCreateError; + type Error = UnsupportedDataTypeError; fn try_from(metadata: Metadata) -> Result { DataType::from_metadata(&metadata) @@ -521,7 +584,6 @@ mod tests { ); } - #[cfg(feature = "float16")] #[test] fn data_type_float16() { use half::f16; @@ -573,7 +635,6 @@ mod tests { ); } - #[cfg(feature = "bfloat16")] #[test] fn data_type_bfloat16() { use half::bf16; @@ -673,7 +734,6 @@ mod tests { ); } - #[cfg(feature = "raw_bits")] #[test] fn data_type_r8() { let json = r#""r8""#; @@ -695,7 +755,6 @@ mod tests { ); } - #[cfg(feature = "raw_bits")] #[test] fn data_type_r16() { let json = r#""r16""#; @@ -753,7 +812,6 @@ mod tests { assert!(serde_json::from_str::(json).is_err()); } - #[cfg(feature = "raw_bits")] #[test] pub fn data_type_raw_bits1() { let json = r#""r16""#; @@ -762,7 +820,6 @@ mod tests { assert_eq!(data_type.size(), 2); } - #[cfg(feature = "raw_bits")] #[test] pub fn data_type_raw_bits2() { let json = r#" diff --git a/src/array/data_type/bfloat16.rs b/src/array/data_type/bfloat16.rs deleted file mode 100644 index 92b8f71c..00000000 --- a/src/array/data_type/bfloat16.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! `bfloat16` brain floating point data type: sign bit, 5 bits exponent, 10 bits mantissa. - -use half::bf16; - -use crate::{ - array::{ - data_type::DataTypePlugin, - fill_value_metadata::{FillValueFloat, FillValueFloatStringNonFinite, FillValueMetadata}, - FillValue, - }, - metadata::{ConfigurationInvalidError, Metadata}, - plugin::PluginCreateError, -}; - -use super::{DataTypeExtension, IncompatibleFillValueErrorMetadataError}; - -const IDENTIFIER: &str = "bfloat16"; - -// Register the data type. -inventory::submit! { - DataTypePlugin::new(IDENTIFIER, is_name_bfloat16, create_data_type_bfloat16) -} - -fn is_name_bfloat16(name: &str) -> bool { - name.eq(IDENTIFIER) -} - -fn create_data_type_bfloat16( - metadata: &Metadata, -) -> Result, PluginCreateError> { - if metadata.configuration_is_none_or_empty() { - let data_type = Bfloat16DataType; - Ok(Box::new(data_type)) - } else { - Err(ConfigurationInvalidError::new(IDENTIFIER, metadata.configuration().cloned()).into()) - } -} - -/// The `bfloat16` data type. -/// Brain floating point: sign bit, 5 bits exponent, 10 bits mantissa. -#[derive(Clone, Debug)] -pub struct Bfloat16DataType; - -impl DataTypeExtension for Bfloat16DataType { - fn identifier(&self) -> &'static str { - IDENTIFIER - } - - fn name(&self) -> String { - IDENTIFIER.to_string() - } - - fn size(&self) -> usize { - 2 - } - - fn metadata(&self) -> Metadata { - Metadata::new(IDENTIFIER) - } - - fn fill_value_from_metadata( - &self, - fill_value: &FillValueMetadata, - ) -> Result { - let float = match fill_value { - FillValueMetadata::Float(float) => { - use FillValueFloat as F; - match float { - F::Float(float) => Some(bf16::from_f64(*float)), - F::HexString(hex_string) => { - let bytes = hex_string.as_bytes(); - if bytes.len() == core::mem::size_of::() { - Some(bf16::from_be_bytes(bytes.try_into().unwrap())) - } else { - None - } - } - F::NonFinite(nonfinite) => { - use FillValueFloatStringNonFinite as NF; - Some(match nonfinite { - NF::PosInfinity => bf16::INFINITY, - NF::NegInfinity => bf16::NEG_INFINITY, - NF::NaN => bf16::NAN, - }) - } - } - } - _ => None, - }; - Ok(float - .ok_or(IncompatibleFillValueErrorMetadataError( - self.name().to_string(), - fill_value.clone(), - ))? - .into()) - } - - fn metadata_fill_value(&self, fill_value: &FillValue) -> FillValueMetadata { - assert_eq!(self.size(), fill_value.size()); - let fill_value = bf16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); - FillValueMetadata::Float(bfloat16_to_fill_value_float(fill_value)) - } -} - -fn bfloat16_to_fill_value_float(f: bf16) -> FillValueFloat { - if f.is_infinite() && f.is_sign_positive() { - FillValueFloatStringNonFinite::PosInfinity.into() - } else if f.is_infinite() && f.is_sign_negative() { - FillValueFloatStringNonFinite::NegInfinity.into() - } else if f.is_nan() { - FillValueFloatStringNonFinite::NaN.into() - } else { - f64::from(f).into() - } -} diff --git a/src/array/data_type/float16.rs b/src/array/data_type/float16.rs deleted file mode 100644 index ce618407..00000000 --- a/src/array/data_type/float16.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! `float16` IEEE 754 half-precision floating point data type: sign bit, 5 bits exponent, 10 bits mantissa - -use half::f16; - -use crate::{ - array::{ - data_type::DataTypePlugin, - fill_value_metadata::{FillValueFloat, FillValueFloatStringNonFinite, FillValueMetadata}, - FillValue, - }, - metadata::{ConfigurationInvalidError, Metadata}, - plugin::PluginCreateError, -}; - -use super::{DataTypeExtension, IncompatibleFillValueErrorMetadataError}; - -const IDENTIFIER: &str = "float16"; - -// Register the data type. -inventory::submit! { - DataTypePlugin::new(IDENTIFIER, is_name_float16, create_data_type_float16) -} - -fn is_name_float16(name: &str) -> bool { - name.eq(IDENTIFIER) -} - -fn create_data_type_float16( - metadata: &Metadata, -) -> Result, PluginCreateError> { - if metadata.configuration_is_none_or_empty() { - let data_type = Float16DataType; - Ok(Box::new(data_type)) - } else { - Err(ConfigurationInvalidError::new(IDENTIFIER, metadata.configuration().cloned()).into()) - } -} - -/// The `float16` data type. -/// IEEE 754 half-precision floating point: sign bit, 5 bits exponent, 10 bits mantissa. -#[derive(Clone, Debug)] -pub struct Float16DataType; - -impl DataTypeExtension for Float16DataType { - fn identifier(&self) -> &'static str { - IDENTIFIER - } - - fn name(&self) -> String { - IDENTIFIER.to_string() - } - - fn size(&self) -> usize { - 2 - } - - fn metadata(&self) -> Metadata { - Metadata::new(IDENTIFIER) - } - - fn fill_value_from_metadata( - &self, - fill_value: &FillValueMetadata, - ) -> Result { - let float = match fill_value { - FillValueMetadata::Float(float) => { - use FillValueFloat as F; - match float { - F::Float(float) => Some(f16::from_f64(*float)), - F::HexString(hex_string) => { - let bytes = hex_string.as_bytes(); - if bytes.len() == core::mem::size_of::() { - Some(f16::from_be_bytes(bytes.try_into().unwrap())) - } else { - None - } - } - F::NonFinite(nonfinite) => { - use FillValueFloatStringNonFinite as NF; - Some(match nonfinite { - NF::PosInfinity => f16::INFINITY, - NF::NegInfinity => f16::NEG_INFINITY, - NF::NaN => f16::NAN, - }) - } - } - } - _ => None, - }; - Ok(float - .ok_or(IncompatibleFillValueErrorMetadataError( - self.name().to_string(), - fill_value.clone(), - ))? - .into()) - } - - fn metadata_fill_value(&self, fill_value: &FillValue) -> FillValueMetadata { - assert_eq!(self.size(), fill_value.size()); - let fill_value = f16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); - FillValueMetadata::Float(float16_to_fill_value_float(fill_value)) - } -} - -fn float16_to_fill_value_float(f: f16) -> FillValueFloat { - if f.is_infinite() && f.is_sign_positive() { - FillValueFloatStringNonFinite::PosInfinity.into() - } else if f.is_infinite() && f.is_sign_negative() { - FillValueFloatStringNonFinite::NegInfinity.into() - } else if f.is_nan() { - FillValueFloatStringNonFinite::NaN.into() - } else { - f64::from(f).into() - } -} diff --git a/src/array/data_type/raw_bits.rs b/src/array/data_type/raw_bits.rs deleted file mode 100644 index f190493c..00000000 --- a/src/array/data_type/raw_bits.rs +++ /dev/null @@ -1,83 +0,0 @@ -//! `r*` raw bits data type. Variable size in bits given by *. - -// TODO: Make this a standard part of DataType and don't lock behind a feature - -use crate::{ - array::{data_type::DataTypePlugin, FillValue, FillValueMetadata}, - metadata::{ConfigurationInvalidError, Metadata}, - plugin::PluginCreateError, -}; - -use super::{DataTypeExtension, IncompatibleFillValueErrorMetadataError}; - -const IDENTIFIER: &str = "r*"; - -// Register the data type. -inventory::submit! { - DataTypePlugin::new(IDENTIFIER, is_name_raw_bits, create_data_type_raw_bits) -} - -fn is_name_raw_bits(name: &str) -> bool { - name.starts_with('r') && name[1..].parse::().is_ok() -} - -fn create_data_type_raw_bits( - metadata: &Metadata, -) -> Result, PluginCreateError> { - if metadata.configuration_is_none_or_empty() { - let size_bits = metadata.name()[1..].parse::().unwrap(); // Safe because if is_name_raw_bits - if size_bits % 8 == 0 { - let data_type = RawBitsDataType(size_bits / 8); - Ok(Box::new(data_type)) - } else { - Err(PluginCreateError::Unsupported { - name: metadata.name().to_string(), - }) - } - } else { - Err(ConfigurationInvalidError::new(IDENTIFIER, metadata.configuration().cloned()).into()) - } -} - -/// The `r*` raw bits data type. -/// Variable size in bits given by *. -/// -/// Variable size is limited to be a multiple of 8. -#[derive(Clone, Debug)] -pub struct RawBitsDataType(usize); - -impl DataTypeExtension for RawBitsDataType { - fn identifier(&self) -> &'static str { - IDENTIFIER - } - - fn name(&self) -> String { - format!("r{}", self.0 * 8) - } - - fn size(&self) -> usize { - self.0 - } - - fn metadata(&self) -> Metadata { - Metadata::new(&self.name()) - } - - fn fill_value_from_metadata( - &self, - fill_value: &FillValueMetadata, - ) -> Result { - match fill_value { - FillValueMetadata::ByteArray(bytes) => Ok(FillValue::new(bytes.clone())), - _ => Err(IncompatibleFillValueErrorMetadataError( - self.name().to_string(), - fill_value.clone(), - )), - } - } - - fn metadata_fill_value(&self, fill_value: &FillValue) -> FillValueMetadata { - assert_eq!(self.size(), fill_value.size()); - FillValueMetadata::ByteArray(fill_value.as_ne_bytes().to_vec()) - } -} diff --git a/src/array/fill_value.rs b/src/array/fill_value.rs index 60750cbf..8aa3b020 100644 --- a/src/array/fill_value.rs +++ b/src/array/fill_value.rs @@ -74,14 +74,12 @@ impl From for FillValue { } } -#[cfg(feature = "float16")] impl From for FillValue { fn from(value: half::f16) -> Self { FillValue(value.to_ne_bytes().to_vec()) } } -#[cfg(feature = "bfloat16")] impl From for FillValue { fn from(value: half::bf16) -> Self { FillValue(value.to_ne_bytes().to_vec()) diff --git a/src/array/fill_value_metadata.rs b/src/array/fill_value_metadata.rs index f397a7b1..ae403389 100644 --- a/src/array/fill_value_metadata.rs +++ b/src/array/fill_value_metadata.rs @@ -8,6 +8,7 @@ //! Fill value metadata is created with [`DataTypeExtension::metadata_fill_value`](crate::array::data_type::DataTypeExtension::metadata_fill_value). use derive_more::{Display, From}; +use half::{bf16, f16}; use num::traits::float::FloatCore; use serde::{Deserialize, Serialize}; @@ -238,6 +239,66 @@ impl FillValueMetadata { _ => None, } } + + /// Convert the fill value to a float16. + #[must_use] + pub fn try_as_float16(&self) -> Option { + match self { + FillValueMetadata::Float(float) => { + use FillValueFloat as F; + match float { + F::Float(float) => Some(f16::from_f64(*float)), + F::HexString(hex_string) => { + let bytes = hex_string.as_bytes(); + if let Ok(bytes) = bytes.try_into() { + Some(f16::from_be_bytes(bytes)) + } else { + None + } + } + F::NonFinite(nonfinite) => { + use FillValueFloatStringNonFinite as NF; + Some(match nonfinite { + NF::PosInfinity => f16::INFINITY, + NF::NegInfinity => f16::NEG_INFINITY, + NF::NaN => f16::NAN, + }) + } + } + } + _ => None, + } + } + + /// Convert the fill value to a float16. + #[must_use] + pub fn try_as_bfloat16(&self) -> Option { + match self { + FillValueMetadata::Float(float) => { + use FillValueFloat as F; + match float { + F::Float(float) => Some(bf16::from_f64(*float)), + F::HexString(hex_string) => { + let bytes = hex_string.as_bytes(); + if let Ok(bytes) = bytes.try_into() { + Some(bf16::from_be_bytes(bytes)) + } else { + None + } + } + F::NonFinite(nonfinite) => { + use FillValueFloatStringNonFinite as NF; + Some(match nonfinite { + NF::PosInfinity => bf16::INFINITY, + NF::NegInfinity => bf16::NEG_INFINITY, + NF::NaN => bf16::NAN, + }) + } + } + } + _ => None, + } + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 15dc6455..80da180c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ //! - [x] [ZEP0002 - Sharding codec](https://zarr.dev/zeps/draft/ZEP0002.html) ([under review](https://github.com/zarr-developers/zarr-specs/issues/254)) //! - [x] [ZEP0003 - Variable chunking](https://zarr.dev/zeps/draft/ZEP0003.html) ([draft](https://github.com/orgs/zarr-developers/discussions/52)) //! - [x] Stores: [`filesystem`](crate::storage::store::FilesystemStore), [`memory`](crate::storage::store::MemoryStore), [`http`](crate::storage::store::HTTPStore), [`zip`](crate::storage::storage_adapter::ZipStorageAdapter) -//! - [x] Data types: [core data types](crate::array::data_type::DataType), [`raw bits`](crate::array::data_type::RawBitsDataType), [`float16`](crate::array::data_type::Float16DataType), [`bfloat16`](crate::array::data_type::Bfloat16DataType) [(spec issue)](https://github.com/zarr-developers/zarr-specs/issues/130) +//! - [x] Data types: [core data types](crate::array::data_type::DataType), [`raw bits`](crate::array::data_type::DataType::RawBits), [`float16`](crate::array::data_type::DataType::Float16), [`bfloat16`](crate::array::data_type::DataType::BFloat16) [(spec issue)](https://github.com/zarr-developers/zarr-specs/issues/130) //! - [x] Chunk grids: [`regular`](crate::array::chunk_grid::RegularChunkGrid), [`rectangular`](crate::array::chunk_grid::RectangularChunkGrid) ([draft](https://github.com/orgs/zarr-developers/discussions/52)) //! - [x] Chunk key encoding: [`default`](crate::array::chunk_key_encoding::DefaultChunkKeyEncoding), [`v2`](crate::array::chunk_key_encoding::V2ChunkKeyEncoding) //! - [x] Codecs: [`blosc`](crate::array::codec::BloscCodec), [`bytes`](crate::array::codec::BytesCodec) [(spec issue)](https://github.com/zarr-developers/zarr-specs/pull/263), [`gzip`](crate::array::codec::GzipCodec), [`transpose`](crate::array::codec::TransposeCodec), [`zstd`](crate::array::codec::ZstdCodec) [(spec issue)](https://github.com/zarr-developers/zarr-specs/pull/256), [`sharding`](crate::array::codec::ShardingCodec), [`crc32c checksum`](crate::array::codec::Crc32cCodec) @@ -28,7 +28,6 @@ //! ## Features //! All features are enabled by default. //! - Codecs: `blosc`, `gzip`, `transpose`, `zstd`, `sharding`, `crc32c`. -//! - Data types: `raw_bits`, `float16`, `bfloat16`. //! - Stores: `http`, `zip`. //! - `ndarray`: adds [`ndarray`] utility functions to [`Array`](crate::array::Array). //! diff --git a/src/plugin.rs b/src/plugin.rs index 1f80cf76..8c3b4144 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -1,14 +1,13 @@ //! Plugin utilities for supporting [Zarr extension points](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#extension-points). //! //! A [`Plugin`] creates objects from [`Metadata`] (consisting of a name and optional configuration). -//! It is used to implement [Zarr extension points](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#extension-points), such as [data types](`crate::array::data_type`), [chunk grids][`crate::array::chunk_grid`], [chunk key encodings](`crate::array::chunk_key_encoding`), [codecs](`crate::array::codec`), and [storage transformers](`crate::storage::storage_transformer`). +//! It is used to implement [Zarr extension points](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#extension-points), such as [chunk grids][`crate::array::chunk_grid`], [chunk key encodings](`crate::array::chunk_key_encoding`), [codecs](`crate::array::codec`), and [storage transformers](`crate::storage::storage_transformer`). +//! +//! [Data types](`crate::array::data_type`) are not currently supported as an extension point. //! //! Plugins are registered at compile time using the [inventory] crate. //! At runtime, a name matching function is applied to identify which registered plugin is associated with the metadata. //! If a match is found, the plugin is created from the metadata. -//! -//! For example, the raw bits data type [`RawBitsDataType`](crate::array::data_type::RawBitsDataType) has the `"r*"` identifier and matches any `"rX"` name where `X` is a positive integer that is a multiple of 8. -//! The size of the raw bits data type is derived from the metadata name, but most plugins are configured by the metadata configuration. use thiserror::Error;