Skip to content

Commit

Permalink
Merge pull request #10 from awxkee/dev
Browse files Browse the repository at this point in the history
AR30 fixes, improvements YUV 4:0:0
  • Loading branch information
awxkee authored Nov 21, 2024
2 parents 9b5dd60 + 6714e8c commit e8bc878
Show file tree
Hide file tree
Showing 13 changed files with 254 additions and 172 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["app"] }

[package]
name = "yuvutils-rs"
version = "0.5.4"
version = "0.5.5"
edition = "2021"
description = "High performance utilities for YUV format handling and conversion."
readme = "README.md"
Expand Down
14 changes: 7 additions & 7 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ use std::fs::File;
use std::io::Read;
use std::time::Instant;
use yuvutils_rs::{
ab30_to_rgb8, ar30_to_rgb8, ra30_to_rgb8, rgb8_to_ar30, rgb_to_sharp_yuv422, rgb_to_yuv420_p16,
rgb_to_yuv422_p16, rgb_to_yuv_nv12_p16, yuv422_p16_to_ab30, yuv422_p16_to_ar30,
yuv422_p16_to_ra30, yuv422_to_rgb, yuv444_p16_to_ar30, yuv_nv12_to_rgb_p16, Rgb30ByteOrder,
SharpYuvGammaTransfer, YuvBiPlanarImageMut, YuvBytesPacking, YuvChromaSubsampling,
YuvEndianness, YuvPlanarImageMut, YuvRange, YuvStandardMatrix,
ab30_to_rgb8, ar30_to_rgb8, ra30_to_rgb8, rgb8_to_ar30, rgb8_to_ra30, rgb_to_sharp_yuv422,
rgb_to_yuv420_p16, rgb_to_yuv422_p16, rgb_to_yuv_nv12_p16, yuv422_p16_to_ab30,
yuv422_p16_to_ar30, yuv422_p16_to_ra30, yuv422_to_rgb, yuv444_p16_to_ar30, yuv_nv12_to_rgb_p16,
Rgb30ByteOrder, SharpYuvGammaTransfer, YuvBiPlanarImageMut, YuvBytesPacking,
YuvChromaSubsampling, YuvEndianness, YuvPlanarImageMut, YuvRange, YuvStandardMatrix,
};

fn read_file_bytes(file_path: &str) -> Result<Vec<u8>, String> {
Expand Down Expand Up @@ -222,7 +222,7 @@ fn main() {

let mut ar30 = vec![0u32; width as usize * height as usize];

rgb8_to_ar30(
rgb8_to_ra30(
&mut ar30,
width,
Rgb30ByteOrder::Host,
Expand All @@ -246,7 +246,7 @@ fn main() {
// )
// .unwrap();
rgba.fill(0);
ar30_to_rgb8(
ra30_to_rgb8(
&ar30,
width,
Rgb30ByteOrder::Host,
Expand Down
2 changes: 1 addition & 1 deletion src/images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl<T: Copy + Debug> BufferStoreMut<'_, T> {
}

#[derive(Debug, Clone)]
/// Non representation of Bi-Planar YUV image
/// Non-mutable representation of Bi-Planar YUV image
pub struct YuvBiPlanarImage<'a, T>
where
T: Copy + Debug,
Expand Down
14 changes: 6 additions & 8 deletions src/neon/yuv_to_rgba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
match chroma_subsampling {
YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => {
let u_values = vld1_u8(u_ptr.add(uv_x));
let v_values = vld1_u8(v_ptr.add( uv_x));
let v_values = vld1_u8(v_ptr.add(uv_x));

u_high_u8 = vzip2_u8(u_values, u_values);
v_high_u8 = vzip2_u8(v_values, v_values);
Expand All @@ -81,7 +81,7 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
}
YuvChromaSubsampling::Yuv444 => {
let u_values = vld1q_u8(u_ptr.add(uv_x));
let v_values = vld1q_u8(v_ptr.add( uv_x));
let v_values = vld1q_u8(v_ptr.add(uv_x));

u_high_u8 = vget_high_u8(u_values);
v_high_u8 = vget_high_u8(v_values);
Expand Down Expand Up @@ -197,17 +197,15 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const

match chroma_subsampling {
YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => {
let u_values =
vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32));
let v_values =
vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add( uv_x) as *const u32));
let u_values = vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32));
let v_values = vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add(uv_x) as *const u32));

u_low_u8 = vzip1_u8(u_values, u_values);
v_low_u8 = vzip1_u8(v_values, v_values);
}
YuvChromaSubsampling::Yuv444 => {
let u_values = vld1_u8(u_ptr.add(uv_x));
let v_values = vld1_u8(v_ptr.add( uv_x));
let v_values = vld1_u8(v_ptr.add(uv_x));

u_low_u8 = u_values;
v_low_u8 = v_values;
Expand Down Expand Up @@ -286,4 +284,4 @@ pub(crate) unsafe fn neon_yuv_to_rgba_row<const DESTINATION_CHANNELS: u8, const
}

ProcessedOffset { cx, ux: uv_x }
}
}
2 changes: 1 addition & 1 deletion src/neon/yuv_to_rgba_alpha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,4 @@ pub(crate) unsafe fn neon_yuv_to_rgba_alpha<const DESTINATION_CHANNELS: u8, cons
}

ProcessedOffset { cx, ux: uv_x }
}
}
8 changes: 4 additions & 4 deletions src/rgb_ar30.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,12 @@ pub fn rgb8_to_ra30(
) -> Result<(), YuvError> {
match byte_order {
Rgb30ByteOrder::Host => rgb_to_ar30_impl::<
{ Rgb30::Ar30 as usize },
{ Rgb30::Ra30 as usize },
{ Rgb30ByteOrder::Host as usize },
{ YuvSourceChannels::Rgb as u8 },
>(ar30, ar30_stride, rgb, rgb_stride, width, height),
Rgb30ByteOrder::Network => rgb_to_ar30_impl::<
{ Rgb30::Ar30 as usize },
{ Rgb30::Ra30 as usize },
{ Rgb30ByteOrder::Network as usize },
{ YuvSourceChannels::Rgb as u8 },
>(ar30, ar30_stride, rgb, rgb_stride, width, height),
Expand Down Expand Up @@ -208,12 +208,12 @@ pub fn rgba8_to_ra30(
) -> Result<(), YuvError> {
match byte_order {
Rgb30ByteOrder::Host => rgb_to_ar30_impl::<
{ Rgb30::Ar30 as usize },
{ Rgb30::Ra30 as usize },
{ Rgb30ByteOrder::Host as usize },
{ YuvSourceChannels::Rgba as u8 },
>(ar30, ar30_stride, rgba, rgba_stride, width, height),
Rgb30ByteOrder::Network => rgb_to_ar30_impl::<
{ Rgb30::Ar30 as usize },
{ Rgb30::Ra30 as usize },
{ Rgb30ByteOrder::Network as usize },
{ YuvSourceChannels::Rgba as u8 },
>(ar30, ar30_stride, rgba, rgba_stride, width, height),
Expand Down
2 changes: 1 addition & 1 deletion src/rgba_to_nv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ fn rgbx_to_nv<const ORIGIN_CHANNELS: u8, const UV_ORDER: u8, const SAMPLING: u8>
let _use_avx2 = std::arch::is_x86_feature_detected!("avx2");
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");

let width = bi_planar_image.width;

#[allow(unused_variables)]
Expand Down
94 changes: 56 additions & 38 deletions src/y_p16_to_rgb16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ fn yuv400_p16_to_rgbx<
let max_colors = (1 << bit_depth) - 1;

let channels = destination_channels.get_channels_count();
let range = get_yuv_range(bit_depth, range);
let chroma_range = get_yuv_range(bit_depth, range);
let kr_kb = matrix.get_kr_kb();
let transform = get_inverse_transform(
max_colors,
range.range_y,
range.range_uv,
chroma_range.range_y,
chroma_range.range_uv,
kr_kb.kr,
kr_kb.kb,
);
Expand All @@ -68,7 +68,7 @@ fn yuv400_p16_to_rgbx<
let inverse_transform = transform.to_integers(PRECISION as u32);
let y_coef = inverse_transform.y_coef;

let bias_y = range.bias_y as i32;
let bias_y = chroma_range.bias_y as i32;

let iter;
#[cfg(feature = "rayon")]
Expand All @@ -88,45 +88,63 @@ fn yuv400_p16_to_rgbx<
);
}

iter.for_each(|(rgba16, y_plane)| {
let mut _cx = 0usize;
match range {
YuvRange::Limited => {
iter.for_each(|(rgba16, y_plane)| {
let mut _cx = 0usize;

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
unsafe {
let offset = neon_y_p16_to_rgba16_row::<
DESTINATION_CHANNELS,
ENDIANNESS,
BYTES_POSITION,
PRECISION,
>(
y_plane.as_ptr(),
rgba16.as_mut_ptr(),
gray_image.width,
&range,
&inverse_transform,
0,
bit_depth as usize,
);
_cx = offset.cx;
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
unsafe {
let offset = neon_y_p16_to_rgba16_row::<
DESTINATION_CHANNELS,
ENDIANNESS,
BYTES_POSITION,
PRECISION,
>(
y_plane.as_ptr(),
rgba16.as_mut_ptr(),
gray_image.width,
&chroma_range,
&inverse_transform,
0,
bit_depth as usize,
);
_cx = offset.cx;
}
}

for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
let y_value = (y_src as i32 - bias_y) * y_coef;
for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
let y_value = (y_src as i32 - bias_y) * y_coef;

let r = ((y_value + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
let r = ((y_value + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);

dst[destination_channels.get_r_channel_offset()] = r as u16;
dst[destination_channels.get_g_channel_offset()] = r as u16;
dst[destination_channels.get_b_channel_offset()] = r as u16;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
dst[destination_channels.get_r_channel_offset()] = r as u16;
dst[destination_channels.get_g_channel_offset()] = r as u16;
dst[destination_channels.get_b_channel_offset()] = r as u16;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
}
});
}
});
YuvRange::Full => {
iter.for_each(|(rgba16, y_plane)| {
for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane) {
let r = y_src;

dst[destination_channels.get_r_channel_offset()] = r;
dst[destination_channels.get_g_channel_offset()] = r;
dst[destination_channels.get_b_channel_offset()] = r;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
}
});
}
}

Ok(())
}
Expand Down
63 changes: 42 additions & 21 deletions src/y_p16_with_alpha_to_rgb16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ fn yuv400_p16_with_alpha_to_rgbx<
"YUV400 with alpha cannot be called on target image without alpha"
);

let range = get_yuv_range(bit_depth, range);
let chroma_range = get_yuv_range(bit_depth, range);
let kr_kb = matrix.get_kr_kb();
let transform = get_inverse_transform(
max_colors,
range.range_y,
range.range_uv,
chroma_range.range_y,
chroma_range.range_uv,
kr_kb.kr,
kr_kb.kb,
);
Expand All @@ -77,7 +77,7 @@ fn yuv400_p16_with_alpha_to_rgbx<
let inverse_transform = transform.to_integers(PRECISION as u32);
let y_coef = inverse_transform.y_coef;

let bias_y = range.bias_y as i32;
let bias_y = chroma_range.bias_y as i32;

let iter;
let y_iter;
Expand All @@ -103,23 +103,44 @@ fn yuv400_p16_with_alpha_to_rgbx<
.chunks_exact(gray_alpha_image.a_stride as usize);
}

iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
rgba[destination_channels.get_r_channel_offset()] = r as u16;
rgba[destination_channels.get_g_channel_offset()] = r as u16;
rgba[destination_channels.get_b_channel_offset()] = r as u16;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
match range {
YuvRange::Limited => {
iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
rgba[destination_channels.get_r_channel_offset()] = r as u16;
rgba[destination_channels.get_g_channel_offset()] = r as u16;
rgba[destination_channels.get_b_channel_offset()] = r as u16;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
}
YuvRange::Full => {
iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = y_src;
rgba[destination_channels.get_r_channel_offset()] = r;
rgba[destination_channels.get_g_channel_offset()] = r;
rgba[destination_channels.get_b_channel_offset()] = r;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
}
}
Ok(())
}

Expand Down
Loading

0 comments on commit e8bc878

Please sign in to comment.