Skip to content

Commit

Permalink
YUV 4:0:0 full/limited range improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 21, 2024
1 parent 96a224d commit 6714e8c
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 134 deletions.
94 changes: 56 additions & 38 deletions src/y_p16_to_rgb16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ fn yuv400_p16_to_rgbx<
let max_colors = (1 << bit_depth) - 1;

let channels = destination_channels.get_channels_count();
let range = get_yuv_range(bit_depth, range);
let chroma_range = get_yuv_range(bit_depth, range);
let kr_kb = matrix.get_kr_kb();
let transform = get_inverse_transform(
max_colors,
range.range_y,
range.range_uv,
chroma_range.range_y,
chroma_range.range_uv,
kr_kb.kr,
kr_kb.kb,
);
Expand All @@ -68,7 +68,7 @@ fn yuv400_p16_to_rgbx<
let inverse_transform = transform.to_integers(PRECISION as u32);
let y_coef = inverse_transform.y_coef;

let bias_y = range.bias_y as i32;
let bias_y = chroma_range.bias_y as i32;

let iter;
#[cfg(feature = "rayon")]
Expand All @@ -88,45 +88,63 @@ fn yuv400_p16_to_rgbx<
);
}

iter.for_each(|(rgba16, y_plane)| {
let mut _cx = 0usize;
match range {
YuvRange::Limited => {
iter.for_each(|(rgba16, y_plane)| {
let mut _cx = 0usize;

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
unsafe {
let offset = neon_y_p16_to_rgba16_row::<
DESTINATION_CHANNELS,
ENDIANNESS,
BYTES_POSITION,
PRECISION,
>(
y_plane.as_ptr(),
rgba16.as_mut_ptr(),
gray_image.width,
&range,
&inverse_transform,
0,
bit_depth as usize,
);
_cx = offset.cx;
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
unsafe {
let offset = neon_y_p16_to_rgba16_row::<
DESTINATION_CHANNELS,
ENDIANNESS,
BYTES_POSITION,
PRECISION,
>(
y_plane.as_ptr(),
rgba16.as_mut_ptr(),
gray_image.width,
&chroma_range,
&inverse_transform,
0,
bit_depth as usize,
);
_cx = offset.cx;
}
}

for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
let y_value = (y_src as i32 - bias_y) * y_coef;
for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane).skip(_cx) {
let y_value = (y_src as i32 - bias_y) * y_coef;

let r = ((y_value + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
let r = ((y_value + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);

dst[destination_channels.get_r_channel_offset()] = r as u16;
dst[destination_channels.get_g_channel_offset()] = r as u16;
dst[destination_channels.get_b_channel_offset()] = r as u16;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
dst[destination_channels.get_r_channel_offset()] = r as u16;
dst[destination_channels.get_g_channel_offset()] = r as u16;
dst[destination_channels.get_b_channel_offset()] = r as u16;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
}
});
}
});
YuvRange::Full => {
iter.for_each(|(rgba16, y_plane)| {
for (dst, &y_src) in rgba16.chunks_exact_mut(channels).zip(y_plane) {
let r = y_src;

dst[destination_channels.get_r_channel_offset()] = r;
dst[destination_channels.get_g_channel_offset()] = r;
dst[destination_channels.get_b_channel_offset()] = r;
if destination_channels.has_alpha() {
dst[destination_channels.get_a_channel_offset()] = max_colors as u16;
}
}
});
}
}

Ok(())
}
Expand Down
63 changes: 42 additions & 21 deletions src/y_p16_with_alpha_to_rgb16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ fn yuv400_p16_with_alpha_to_rgbx<
"YUV400 with alpha cannot be called on target image without alpha"
);

let range = get_yuv_range(bit_depth, range);
let chroma_range = get_yuv_range(bit_depth, range);
let kr_kb = matrix.get_kr_kb();
let transform = get_inverse_transform(
max_colors,
range.range_y,
range.range_uv,
chroma_range.range_y,
chroma_range.range_uv,
kr_kb.kr,
kr_kb.kb,
);
Expand All @@ -77,7 +77,7 @@ fn yuv400_p16_with_alpha_to_rgbx<
let inverse_transform = transform.to_integers(PRECISION as u32);
let y_coef = inverse_transform.y_coef;

let bias_y = range.bias_y as i32;
let bias_y = chroma_range.bias_y as i32;

let iter;
let y_iter;
Expand All @@ -103,23 +103,44 @@ fn yuv400_p16_with_alpha_to_rgbx<
.chunks_exact(gray_alpha_image.a_stride as usize);
}

iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
rgba[destination_channels.get_r_channel_offset()] = r as u16;
rgba[destination_channels.get_g_channel_offset()] = r as u16;
rgba[destination_channels.get_b_channel_offset()] = r as u16;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
match range {
YuvRange::Limited => {
iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = (((y_src as i32 - bias_y) * y_coef + ROUNDING_CONST) >> PRECISION)
.min(max_colors as i32)
.max(0);
rgba[destination_channels.get_r_channel_offset()] = r as u16;
rgba[destination_channels.get_g_channel_offset()] = r as u16;
rgba[destination_channels.get_b_channel_offset()] = r as u16;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
}
YuvRange::Full => {
iter.zip(y_iter)
.zip(a_iter)
.for_each(|((rgba16, y_plane16), a_plane16)| {
for ((&y_src, &a_src), rgba) in y_plane16
.iter()
.zip(a_plane16)
.zip(rgba16.chunks_exact_mut(channels))
{
let r = y_src;
rgba[destination_channels.get_r_channel_offset()] = r;
rgba[destination_channels.get_g_channel_offset()] = r;
rgba[destination_channels.get_b_channel_offset()] = r;
rgba[destination_channels.get_a_channel_offset()] = a_src;
}
});
}
}
Ok(())
}

Expand Down
134 changes: 79 additions & 55 deletions src/y_to_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,15 @@ fn y_to_rgbx<const DESTINATION_CHANNELS: u8>(
)?;
gray_image.check_constraints()?;

let range = get_yuv_range(8, range);
let chroma_range = get_yuv_range(8, range);
let kr_kb = matrix.get_kr_kb();
let transform = get_inverse_transform(255, range.range_y, range.range_uv, kr_kb.kr, kr_kb.kb);
let transform = get_inverse_transform(
255,
chroma_range.range_y,
chroma_range.range_uv,
kr_kb.kr,
kr_kb.kb,
);

#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
const PRECISION: i32 = 6;
Expand All @@ -81,7 +87,7 @@ fn y_to_rgbx<const DESTINATION_CHANNELS: u8>(
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");

let bias_y = range.bias_y as i32;
let bias_y = chroma_range.bias_y as i32;

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
Expand All @@ -105,34 +111,51 @@ fn y_to_rgbx<const DESTINATION_CHANNELS: u8>(
y_iter = y_plane.chunks_exact(y_stride as usize);
}

iter.zip(y_iter).for_each(|(rgba, y_plane)| {
let mut _cx = 0usize;
if range == YuvRange::Limited {
iter.zip(y_iter).for_each(|(rgba, y_plane)| {
let mut _cx = 0usize;

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
feature = "nightly_avx512"
))]
unsafe {
if _use_avx512 {
let processed = avx512_y_to_rgb_row::<DESTINATION_CHANNELS>(
&range,
&inverse_transform,
y_plane,
rgba,
_cx,
0,
0,
gray_image.width as usize,
);
_cx = processed;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
feature = "nightly_avx512"
))]
unsafe {
if _use_avx512 {
let processed = avx512_y_to_rgb_row::<DESTINATION_CHANNELS>(
&chroma_range,
&inverse_transform,
y_plane,
rgba,
_cx,
0,
0,
gray_image.width as usize,
);
_cx = processed;
}
}

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
if is_rdm_available {
let offset = neon_y_to_rgb_row::<DESTINATION_CHANNELS>(
&chroma_range,
&inverse_transform,
y_plane,
rgba,
_cx,
0,
0,
gray_image.width as usize,
);
_cx = offset;
}
}
}

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
if is_rdm_available {
let offset = neon_y_to_rgb_row::<DESTINATION_CHANNELS>(
&range,
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
unsafe {
let offset = wasm_y_to_rgb_row::<DESTINATION_CHANNELS>(
&chroma_range,
&inverse_transform,
y_plane,
rgba,
Expand All @@ -143,38 +166,39 @@ fn y_to_rgbx<const DESTINATION_CHANNELS: u8>(
);
_cx = offset;
}
}

#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
unsafe {
let offset = wasm_y_to_rgb_row::<DESTINATION_CHANNELS>(
&range,
&inverse_transform,
y_plane,
rgba,
_cx,
0,
0,
gray_image.width as usize,
);
_cx = offset;
}
let rgba_sliced = &mut rgba[(_cx * channels)..];
let y_sliced = &y_plane[_cx..];

let rgba_sliced = &mut rgba[(_cx * channels)..];
let y_sliced = &y_plane[_cx..];
for (y_src, rgba) in y_sliced.iter().zip(rgba_sliced.chunks_exact_mut(channels)) {
let y_value = (*y_src as i32 - bias_y) * y_coef;

for (y_src, rgba) in y_sliced.iter().zip(rgba_sliced.chunks_exact_mut(channels)) {
let y_value = (*y_src as i32 - bias_y) * y_coef;
let r = qrshr::<PRECISION, 8>(y_value);
rgba[destination_channels.get_r_channel_offset()] = r as u8;
rgba[destination_channels.get_g_channel_offset()] = r as u8;
rgba[destination_channels.get_b_channel_offset()] = r as u8;
if destination_channels.has_alpha() {
rgba[destination_channels.get_a_channel_offset()] = 255;
}
}
});
} else {
iter.zip(y_iter).for_each(|(rgba, y_plane)| {
let mut _cx = 0usize;
let rgba_sliced = &mut rgba[(_cx * channels)..];
let y_sliced = &y_plane[_cx..];

let r = qrshr::<PRECISION, 8>(y_value);
rgba[destination_channels.get_r_channel_offset()] = r as u8;
rgba[destination_channels.get_g_channel_offset()] = r as u8;
rgba[destination_channels.get_b_channel_offset()] = r as u8;
if destination_channels.has_alpha() {
rgba[destination_channels.get_a_channel_offset()] = 255;
for (y_src, rgba) in y_sliced.iter().zip(rgba_sliced.chunks_exact_mut(channels)) {
let r = *y_src;
rgba[destination_channels.get_r_channel_offset()] = r;
rgba[destination_channels.get_g_channel_offset()] = r;
rgba[destination_channels.get_b_channel_offset()] = r;
if destination_channels.has_alpha() {
rgba[destination_channels.get_a_channel_offset()] = 255;
}
}
}
});
});
}

Ok(())
}
Expand Down
Loading

0 comments on commit 6714e8c

Please sign in to comment.