Merge pull request #10 from awxkee/dev

AR30 fixes, improvements YUV 4:0:0
awxkee · Nov 21, 2024 · e8bc878 · e8bc878
2 parents 9b5dd60 + 6714e8c
commit e8bc878
Show file tree

Hide file tree

Showing 13 changed files with 254 additions and 172 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -2,7 +2,7 @@ workspace = { members = ["app"] }
 
 [package]
 name = "yuvutils-rs"
-version = "0.5.4"
+version = "0.5.5"
 edition = "2021"
 description = "High performance utilities for YUV format handling and conversion."
 readme = "README.md"

diff --git a/app/src/main.rs b/app/src/main.rs
@@ -32,11 +32,11 @@ use std::fs::File;
 use std::io::Read;
 use std::time::Instant;
 use yuvutils_rs::{
-    ab30_to_rgb8, ar30_to_rgb8, ra30_to_rgb8, rgb8_to_ar30, rgb_to_sharp_yuv422, rgb_to_yuv420_p16,
-    rgb_to_yuv422_p16, rgb_to_yuv_nv12_p16, yuv422_p16_to_ab30, yuv422_p16_to_ar30,
-    yuv422_p16_to_ra30, yuv422_to_rgb, yuv444_p16_to_ar30, yuv_nv12_to_rgb_p16, Rgb30ByteOrder,
-    SharpYuvGammaTransfer, YuvBiPlanarImageMut, YuvBytesPacking, YuvChromaSubsampling,
-    YuvEndianness, YuvPlanarImageMut, YuvRange, YuvStandardMatrix,
+    ab30_to_rgb8, ar30_to_rgb8, ra30_to_rgb8, rgb8_to_ar30, rgb8_to_ra30, rgb_to_sharp_yuv422,
+    rgb_to_yuv420_p16, rgb_to_yuv422_p16, rgb_to_yuv_nv12_p16, yuv422_p16_to_ab30,
+    yuv422_p16_to_ar30, yuv422_p16_to_ra30, yuv422_to_rgb, yuv444_p16_to_ar30, yuv_nv12_to_rgb_p16,
+    Rgb30ByteOrder, SharpYuvGammaTransfer, YuvBiPlanarImageMut, YuvBytesPacking,
+    YuvChromaSubsampling, YuvEndianness, YuvPlanarImageMut, YuvRange, YuvStandardMatrix,
 };
 
 fn read_file_bytes(file_path: &str) -> Result<Vec<u8>, String> {
@@ -222,7 +222,7 @@ fn main() {
 
     let mut ar30 = vec![0u32; width as usize * height as usize];
 
-    rgb8_to_ar30(
+    rgb8_to_ra30(
         &mut ar30,
         width,
         Rgb30ByteOrder::Host,
@@ -246,7 +246,7 @@ fn main() {
     // )
     // .unwrap();
     rgba.fill(0);
-    ar30_to_rgb8(
+    ra30_to_rgb8(
         &ar30,
         width,
         Rgb30ByteOrder::Host,

diff --git a/src/images.rs b/src/images.rs
@@ -58,7 +58,7 @@ impl<T: Copy + Debug> BufferStoreMut<'_, T> {
 }
 
 #[derive(Debug, Clone)]
-/// Non representation of Bi-Planar YUV image
+/// Non-mutable representation of Bi-Planar YUV image
 pub struct YuvBiPlanarImage<'a, T>
 where
     T: Copy + Debug,

diff --git a/src/neon/yuv_to_rgba.rs b/src/neon/yuv_to_rgba.rs
@@ -72,7 +72,7 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
         match chroma_subsampling {
             YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => {
                 let u_values = vld1_u8(u_ptr.add(uv_x));
-                let v_values = vld1_u8(v_ptr.add( uv_x));
+                let v_values = vld1_u8(v_ptr.add(uv_x));
 
                 u_high_u8 = vzip2_u8(u_values, u_values);
                 v_high_u8 = vzip2_u8(v_values, v_values);
@@ -81,7 +81,7 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
             }
             YuvChromaSubsampling::Yuv444 => {
                 let u_values = vld1q_u8(u_ptr.add(uv_x));
-                let v_values = vld1q_u8(v_ptr.add( uv_x));
+                let v_values = vld1q_u8(v_ptr.add(uv_x));
 
                 u_high_u8 = vget_high_u8(u_values);
                 v_high_u8 = vget_high_u8(v_values);
@@ -197,17 +197,15 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
 
         match chroma_subsampling {
             YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => {
-                let u_values =
-                    vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32));
-                let v_values =
-                    vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add( uv_x) as *const u32));
+                let u_values = vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32));
+                let v_values = vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add(uv_x) as *const u32));
 
                 u_low_u8 = vzip1_u8(u_values, u_values);
                 v_low_u8 = vzip1_u8(v_values, v_values);
             }
             YuvChromaSubsampling::Yuv444 => {
                 let u_values = vld1_u8(u_ptr.add(uv_x));
-                let v_values = vld1_u8(v_ptr.add( uv_x));
+                let v_values = vld1_u8(v_ptr.add(uv_x));
 
                 u_low_u8 = u_values;
                 v_low_u8 = v_values;
@@ -286,4 +284,4 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
     }
 
     ProcessedOffset { cx, ux: uv_x }
-}
+}
diff --git a/src/neon/yuv_to_rgba_alpha.rs b/src/neon/yuv_to_rgba_alpha.rs
@@ -196,4 +196,4 @@ pub(crate) unsafe fn neon_yuv_to_rgba_alpha<const DESTINATION_CHANNELS: u8, cons
     }
 
     ProcessedOffset { cx, ux: uv_x }
-}
+}
diff --git a/src/rgb_ar30.rs b/src/rgb_ar30.rs
@@ -138,12 +138,12 @@ pub fn rgb8_to_ra30(
 ) -> Result<(), YuvError> {
     match byte_order {
         Rgb30ByteOrder::Host => rgb_to_ar30_impl::<
-            { Rgb30::Ar30 as usize },
+            { Rgb30::Ra30 as usize },
             { Rgb30ByteOrder::Host as usize },
             { YuvSourceChannels::Rgb as u8 },
         >(ar30, ar30_stride, rgb, rgb_stride, width, height),
         Rgb30ByteOrder::Network => rgb_to_ar30_impl::<
-            { Rgb30::Ar30 as usize },
+            { Rgb30::Ra30 as usize },
             { Rgb30ByteOrder::Network as usize },
             { YuvSourceChannels::Rgb as u8 },
         >(ar30, ar30_stride, rgb, rgb_stride, width, height),
@@ -208,12 +208,12 @@ pub fn rgba8_to_ra30(
 ) -> Result<(), YuvError> {
     match byte_order {
         Rgb30ByteOrder::Host => rgb_to_ar30_impl::<
-            { Rgb30::Ar30 as usize },
+            { Rgb30::Ra30 as usize },
             { Rgb30ByteOrder::Host as usize },
             { YuvSourceChannels::Rgba as u8 },
         >(ar30, ar30_stride, rgba, rgba_stride, width, height),
         Rgb30ByteOrder::Network => rgb_to_ar30_impl::<
-            { Rgb30::Ar30 as usize },
+            { Rgb30::Ra30 as usize },
             { Rgb30ByteOrder::Network as usize },
             { YuvSourceChannels::Rgba as u8 },
         >(ar30, ar30_stride, rgba, rgba_stride, width, height),

diff --git a/src/rgba_to_nv.rs b/src/rgba_to_nv.rs
@@ -89,7 +89,7 @@ fn rgbx_to_nv<const ORIGIN_CHANNELS: u8, const UV_ORDER: u8, const SAMPLING: u8>
     let _use_avx2 = std::arch::is_x86_feature_detected!("avx2");
     #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
     let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");
-    
+
     let width = bi_planar_image.width;
 
     #[allow(unused_variables)]

diff --git a/src/y_p16_to_rgb16.rs b/src/y_p16_to_rgb16.rs
@@ -53,12 +53,12 @@ fn yuv400_p16_to_rgbx<
     let max_colors = (1 << bit_depth) - 1;
 
     let channels = destination_channels.get_channels_count();
-    let range = get_yuv_range(bit_depth, range);
+    let chroma_range = get_yuv_range(bit_depth, range);
     let kr_kb = matrix.get_kr_kb();
     let transform = get_inverse_transform(
         max_colors,
-        range.range_y,
-        range.range_uv,
+        chroma_range.range_y,
+        chroma_range.range_uv,
         kr_kb.kr,
         kr_kb.kb,
     );
@@ -68,7 +68,7 @@ fn yuv400_p16_to_rgbx<
     let inverse_transform = transform.to_integers(PRECISION as u32);
     let y_coef = inverse_transform.y_coef;
 
-    let bias_y = range.bias_y as i32;
+    let bias_y = chroma_range.bias_y as i32;
 
     let iter;
     #[cfg(feature = "rayon")]
@@ -88,45 +88,63 @@ fn yuv400_p16_to_rgbx<
         );
     }
 
-    iter.for_each(|(rgba16, y_plane)| {
-        let mut _cx = 0usize;
+    match range {
+        YuvRange::Limited => {
+            iter.for_each(|(rgba16, y_plane)| {
+                let mut _cx = 0usize;
 
-        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-        {
-            unsafe {
-                let offset = neon_y_p16_to_rgba16_row::<
-                    DESTINATION_CHANNELS,
-                    ENDIANNESS,
-                    BYTES_POSITION,
-                    PRECISION,
-                >(
-                    y_plane.as_ptr(),
-                    rgba16.as_mut_ptr(),
-                    gray_image.width,
-                    &range,
-                    &inverse_transform,
-                    0,
-                    bit_depth as usize,
-                );
-                _cx = offset.cx;
-            }
-        }
+                #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
+                {
+                    unsafe {
+                        let offset = neon_y_p16_to_rgba16_row::<
+                            DESTINATION_CHANNELS,
+                            ENDIANNESS,
+                            BYTES_POSITION,
+                            PRECISION,
+                        >(
+                            y_plane.as_ptr(),
+                            rgba16.as_mut_ptr(),
+                            gray_image.width,
+                            &chroma_range,
+                            &inverse_transform,
+                            0,
+                            bit_depth as usize,
+                        );
+                        _cx = offset.cx;
+                    }
+                }
 
-        for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
-            let y_value = (y_src as i32 - bias_y) * y_coef;
+                for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
+                    let y_value = (y_src as i32 - bias_y) * y_coef;
 
-            let r = ((y_value + ROUNDING_CONST) >> PRECISION)
-                .min(max_colors as i32)
-                .max(0);
+                    let r = ((y_value + ROUNDING_CONST) >> PRECISION)
+                        .min(max_colors as i32)
+                        .max(0);
 
-            dst[destination_channels.get_r_channel_offset()] = r as u16;
-            dst[destination_channels.get_g_channel_offset()] = r as u16;
-            dst[destination_channels.get_b_channel_offset()] = r as u16;
-            if destination_channels.has_alpha() {
-                dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
-            }
+                    dst[destination_channels.get_r_channel_offset()] = r as u16;
+                    dst[destination_channels.get_g_channel_offset()] = r as u16;
+                    dst[destination_channels.get_b_channel_offset()] = r as u16;
+                    if destination_channels.has_alpha() {
+                        dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
+                    }
+                }
+            });
         }
-    });
+        YuvRange::Full => {
+            iter.for_each(|(rgba16, y_plane)| {
+                for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane) {
+                    let r = y_src;
+
+                    dst[destination_channels.get_r_channel_offset()] = r;
+                    dst[destination_channels.get_g_channel_offset()] = r;
+                    dst[destination_channels.get_b_channel_offset()] = r;
+                    if destination_channels.has_alpha() {
+                        dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
+                    }
+                }
+            });
+        }
+    }
 
     Ok(())
 }

diff --git a/src/y_p16_with_alpha_to_rgb16.rs b/src/y_p16_with_alpha_to_rgb16.rs
@@ -62,12 +62,12 @@ fn yuv400_p16_with_alpha_to_rgbx<
         "YUV400 with alpha cannot be called on target image without alpha"
     );
 
-    let range = get_yuv_range(bit_depth, range);
+    let chroma_range = get_yuv_range(bit_depth, range);
     let kr_kb = matrix.get_kr_kb();
     let transform = get_inverse_transform(
         max_colors,
-        range.range_y,
-        range.range_uv,
+        chroma_range.range_y,
+        chroma_range.range_uv,
         kr_kb.kr,
         kr_kb.kb,
     );
@@ -77,7 +77,7 @@ fn yuv400_p16_with_alpha_to_rgbx<
     let inverse_transform = transform.to_integers(PRECISION as u32);
     let y_coef = inverse_transform.y_coef;
 
-    let bias_y = range.bias_y as i32;
+    let bias_y = chroma_range.bias_y as i32;
 
     let iter;
     let y_iter;
@@ -103,23 +103,44 @@ fn yuv400_p16_with_alpha_to_rgbx<
             .chunks_exact(gray_alpha_image.a_stride as usize);
     }
 
-    iter.zip(y_iter)
-        .zip(a_iter)
-        .for_each(|((rgba16, y_plane16), a_plane16)| {
-            for ((&y_src, &a_src), rgba) in y_plane16
-                .iter()
-                .zip(a_plane16)
-                .zip(rgba16.chunks_exact_mut(channels))
-            {
-                let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
-                    .min(max_colors as i32)
-                    .max(0);
-                rgba[destination_channels.get_r_channel_offset()] = r as u16;
-                rgba[destination_channels.get_g_channel_offset()] = r as u16;
-                rgba[destination_channels.get_b_channel_offset()] = r as u16;
-                rgba[destination_channels.get_a_channel_offset()] = a_src;
-            }
-        });
+    match range {
+        YuvRange::Limited => {
+            iter.zip(y_iter)
+                .zip(a_iter)
+                .for_each(|((rgba16, y_plane16), a_plane16)| {
+                    for ((&y_src, &a_src), rgba) in y_plane16
+                        .iter()
+                        .zip(a_plane16)
+                        .zip(rgba16.chunks_exact_mut(channels))
+                    {
+                        let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
+                            .min(max_colors as i32)
+                            .max(0);
+                        rgba[destination_channels.get_r_channel_offset()] = r as u16;
+                        rgba[destination_channels.get_g_channel_offset()] = r as u16;
+                        rgba[destination_channels.get_b_channel_offset()] = r as u16;
+                        rgba[destination_channels.get_a_channel_offset()] = a_src;
+                    }
+                });
+        }
+        YuvRange::Full => {
+            iter.zip(y_iter)
+                .zip(a_iter)
+                .for_each(|((rgba16, y_plane16), a_plane16)| {
+                    for ((&y_src, &a_src), rgba) in y_plane16
+                        .iter()
+                        .zip(a_plane16)
+                        .zip(rgba16.chunks_exact_mut(channels))
+                    {
+                        let r = y_src;
+                        rgba[destination_channels.get_r_channel_offset()] = r;
+                        rgba[destination_channels.get_g_channel_offset()] = r;
+                        rgba[destination_channels.get_b_channel_offset()] = r;
+                        rgba[destination_channels.get_a_channel_offset()] = a_src;
+                    }
+                });
+        }
+    }
     Ok(())
 }