Added YUV400

awxkee · May 18, 2024 · 778327c · 778327c
1 parent 4408dae
commit 778327c
Show file tree

Hide file tree

Showing 6 changed files with 516 additions and 9 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "yuvutils-rs"
-version = "0.1.7"
+version = "0.1.8"
 edition = "2021"
 description = "Rust utilities for YUV format handling and conversion."
 readme = "README.md"

diff --git a/src/lib.rs b/src/lib.rs
@@ -4,6 +4,8 @@ mod yuv_nv12_p10;
 mod yuv_support;
 mod yuv_to_rgba;
 mod yuv_to_rgba_alpha;
+mod rgb_to_y;
+mod y_to_rgb;
 
 pub use yuv_support::YuvStandardMatrix;
 pub use yuv_support::YuvRange;
@@ -47,4 +49,11 @@ pub use yuv_to_rgba_alpha::yuv420_with_alpha_to_bgra;
 pub use yuv_to_rgba_alpha::yuv422_with_alpha_to_rgba;
 pub use yuv_to_rgba_alpha::yuv422_with_alpha_to_bgra;
 pub use yuv_to_rgba_alpha::yuv444_with_alpha_to_rgba;
-pub use yuv_to_rgba_alpha::yuv444_with_alpha_to_bgra;
+pub use yuv_to_rgba_alpha::yuv444_with_alpha_to_bgra;
+
+pub use rgb_to_y::rgb_to_yuv400;
+pub use rgb_to_y::bgra_to_yuv400;
+pub use rgb_to_y::rgba_to_yuv400;
+pub use y_to_rgb::yuv400_to_rgb;
+pub use y_to_rgb::yuv400_to_rgba;
+pub use y_to_rgb::yuv400_to_bgra;
diff --git a/src/rgb_to_y.rs b/src/rgb_to_y.rs
@@ -0,0 +1,269 @@
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::{
+    uint8x16_t, vcombine_u16, vcombine_u8, vdupq_n_s16, vdupq_n_s32, vget_low_s16, vget_low_u8,
+    vld3q_u8, vld4q_u8, vmaxq_s32, vmlal_high_s16, vmlal_s16, vmovl_high_u8, vmovl_u8, vqmovn_u16,
+    vqshrun_n_s32, vreinterpretq_s16_u16, vst1q_u8,
+};
+
+use crate::yuv_support::{
+    get_forward_transform, get_kr_kb, get_yuv_range, ToIntegerTransform, YuvRange,
+    YuvSourceChannels, YuvStandardMatrix,
+};
+
+// Chroma subsampling always assumed as YUV 400
+fn rgbx_to_y<const ORIGIN_CHANNELS: u8>(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into();
+    let channels = source_channels.get_channels_count();
+    let range = get_yuv_range(8, range);
+    let kr_kb = get_kr_kb(matrix);
+    let max_range_p8 = (2f32.powi(8) - 1f32) as u32;
+    let transform_precise = get_forward_transform(
+        max_range_p8,
+        range.range_y,
+        range.range_uv,
+        kr_kb.kr,
+        kr_kb.kb,
+    );
+    let transform = transform_precise.to_integers(8);
+    let precision_scale = (1 << 8) as f32;
+    let bias_y = ((range.bias_y as f32 + 0.5f32) * precision_scale) as i32;
+
+    let mut y_offset = 0usize;
+    let mut rgba_offset = 0usize;
+
+    for _ in 0..height as usize {
+        #[allow(unused_variables)]
+        #[allow(unused_mut)]
+        let mut cx = 0usize;
+
+        #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+        #[cfg(target_feature = "neon")]
+        unsafe {
+            let y_ptr = y_plane.as_mut_ptr();
+            let rgba_ptr = rgba.as_ptr();
+
+            let y_bias = vdupq_n_s32(bias_y);
+            let v_yr = vdupq_n_s16(transform.yr as i16);
+            let v_yg = vdupq_n_s16(transform.yg as i16);
+            let v_yb = vdupq_n_s16(transform.yb as i16);
+            let v_zeros = vdupq_n_s32(0i32);
+
+            while cx + 16 < width as usize {
+                let r_values_u8: uint8x16_t;
+                let g_values_u8: uint8x16_t;
+                let b_values_u8: uint8x16_t;
+
+                match source_channels {
+                    YuvSourceChannels::Rgb => {
+                        let rgb_values = vld3q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.0;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.2;
+                    }
+                    YuvSourceChannels::Rgba => {
+                        let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.0;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.2;
+                    }
+                    YuvSourceChannels::Bgra => {
+                        let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.2;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.0;
+                    }
+                }
+
+                let r_high = vreinterpretq_s16_u16(vmovl_high_u8(r_values_u8));
+                let g_high = vreinterpretq_s16_u16(vmovl_high_u8(g_values_u8));
+                let b_high = vreinterpretq_s16_u16(vmovl_high_u8(b_values_u8));
+
+                let r_h_low = vget_low_s16(r_high);
+                let g_h_low = vget_low_s16(g_high);
+                let b_h_low = vget_low_s16(b_high);
+
+                let mut y_h_high = vmlal_high_s16(y_bias, r_high, v_yr);
+                y_h_high = vmlal_high_s16(y_h_high, g_high, v_yg);
+                y_h_high = vmlal_high_s16(y_h_high, b_high, v_yb);
+                y_h_high = vmaxq_s32(y_h_high, v_zeros);
+
+                let mut y_h_low = vmlal_s16(y_bias, r_h_low, vget_low_s16(v_yr));
+                y_h_low = vmlal_s16(y_h_low, g_h_low, vget_low_s16(v_yg));
+                y_h_low = vmlal_s16(y_h_low, b_h_low, vget_low_s16(v_yb));
+                y_h_low = vmaxq_s32(y_h_low, v_zeros);
+
+                let y_high =
+                    vcombine_u16(vqshrun_n_s32::<8>(y_h_low), vqshrun_n_s32::<8>(y_h_high));
+
+                let r_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(r_values_u8)));
+                let g_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(g_values_u8)));
+                let b_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b_values_u8)));
+
+                let r_l_low = vget_low_s16(r_low);
+                let g_l_low = vget_low_s16(g_low);
+                let b_l_low = vget_low_s16(b_low);
+
+                let mut y_l_high = vmlal_high_s16(y_bias, r_low, v_yr);
+                y_l_high = vmlal_high_s16(y_l_high, g_low, v_yg);
+                y_l_high = vmlal_high_s16(y_l_high, b_low, v_yb);
+                y_l_high = vmaxq_s32(y_l_high, v_zeros);
+
+                let mut y_l_low = vmlal_s16(y_bias, r_l_low, vget_low_s16(v_yr));
+                y_l_low = vmlal_s16(y_l_low, g_l_low, vget_low_s16(v_yg));
+                y_l_low = vmlal_s16(y_l_low, b_l_low, vget_low_s16(v_yb));
+                y_l_low = vmaxq_s32(y_l_low, v_zeros);
+
+                let y_low = vcombine_u16(vqshrun_n_s32::<8>(y_l_low), vqshrun_n_s32::<8>(y_l_high));
+
+                let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high));
+                vst1q_u8(y_ptr.add(y_offset + cx), y);
+
+                cx += 16;
+            }
+        }
+
+        for x in cx..width as usize {
+            let px = x * channels;
+            let r = rgba[rgba_offset + px + source_channels.get_r_channel_offset()] as i32;
+            let g = rgba[rgba_offset + px + source_channels.get_g_channel_offset()] as i32;
+            let b = rgba[rgba_offset + px + source_channels.get_b_channel_offset()] as i32;
+            let y = (r * transform.yr + g * transform.yg + b * transform.yb + bias_y) >> 8;
+            y_plane[y_offset + x] = y as u8;
+        }
+
+        y_offset += y_stride as usize;
+        rgba_offset += rgba_stride as usize;
+    }
+}
+
+/// Convert RGB image data to YUV 400 planar format.
+///
+/// This function performs RGB to YUV conversion and stores the result in YUV400 planar format,
+/// with Y (luminance) plane
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `rgb` - The input RGB image data slice.
+/// * `rgb_stride` - The stride (bytes per row) for the RGB image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGB data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgb_to_yuv400(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    rgb: &[u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_y::<{ YuvSourceChannels::Rgb as u8 }>(
+        y_plane, y_stride, rgb, rgb_stride, width, height, range, matrix,
+    );
+}
+
+/// Convert RGBA image data to YUV 400 planar format.
+///
+/// This function performs RGBA to YUV conversion and stores the result in YUV400 planar format,
+/// with Y (luminance) plane
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `rgba` - The input RGBA image data slice.
+/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGBA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgba_to_yuv400(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_y::<{ YuvSourceChannels::Rgba as u8 }>(
+        y_plane,
+        y_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert BGRA image data to YUV 400 planar format.
+///
+/// This function performs BGRA to YUV conversion and stores the result in YUV420 planar format,
+/// with Y (luminance) plane
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `bgra` - The input BGRA image data slice.
+/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn bgra_to_yuv400(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    bgra: &[u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_y::<{ YuvSourceChannels::Bgra as u8 }>(
+        y_plane,
+        y_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}