Skip to content

Commit

Permalink
Improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 6, 2024
1 parent db8cbc0 commit e455cd6
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 293 deletions.
17 changes: 3 additions & 14 deletions src/avx2/yuy2_to_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,13 @@ pub fn yuy2_to_rgb_avx<const DST_CHANNELS: u8, const YUY2_TARGET: usize>(
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
yuy2_store: &[u8],
yuy2_offset: usize,
rgb: &mut [u8],
rgb_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
unsafe {
yuy2_to_rgb_avx_impl::<DST_CHANNELS, YUY2_TARGET>(
range,
transform,
yuy2_store,
yuy2_offset,
rgb,
rgb_offset,
width,
nav,
range, transform, yuy2_store, rgb, width, nav,
)
}
}
Expand All @@ -68,9 +59,7 @@ unsafe fn yuy2_to_rgb_avx_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
yuy2_store: &[u8],
yuy2_offset: usize,
rgb: &mut [u8],
rgb_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -95,8 +84,8 @@ unsafe fn yuy2_to_rgb_avx_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
let rounding_const = _mm256_set1_epi16(1 << 5);

for x in (_yuy2_x..max_x_32).step_by(32) {
let yuy2_offset = yuy2_offset + x * 4;
let dst_pos = rgb_offset + _cx * dst_chans.get_channels_count();
let yuy2_offset = x * 4;
let dst_pos = _cx * dst_chans.get_channels_count();
let dst_ptr = rgb.as_mut_ptr().add(dst_pos);

let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);
Expand Down
12 changes: 4 additions & 8 deletions src/avx2/yuy2_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,9 @@ use std::arch::x86_64::*;
#[target_feature(enable = "avx2")]
pub unsafe fn yuy2_to_yuv_avx<const SAMPLING: u8, const YUY2_TARGET: usize>(
y_plane: &mut [u8],
y_offset: usize,
u_plane: &mut [u8],
u_offset: usize,
v_plane: &mut [u8],
v_offset: usize,
yuy2_store: &[u8],
yuy2_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -59,10 +55,10 @@ pub unsafe fn yuy2_to_yuv_avx<const SAMPLING: u8, const YUY2_TARGET: usize>(
let max_x_32 = (width as usize / 2).saturating_sub(32);

for x in (_yuy2_x..max_x_32).step_by(32) {
let dst_offset = yuy2_offset + x * 4;
let u_pos = u_offset + _uv_x;
let v_pos = v_offset + _uv_x;
let y_pos = y_offset + _cx;
let dst_offset = x * 4;
let u_pos = _uv_x;
let v_pos = _uv_x;
let y_pos = _cx;

let yuy2_ptr = yuy2_store.as_ptr().add(dst_offset);

Expand Down
10 changes: 4 additions & 6 deletions src/neon/yuy2_to_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ pub fn yuy2_to_rgb_neon<const DST_CHANNELS: u8, const YUY2_TARGET: usize>(
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
yuy2_store: &[u8],
yuy2_offset: usize,
rgb: &mut [u8],
rgb_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -63,8 +61,8 @@ pub fn yuy2_to_rgb_neon<const DST_CHANNELS: u8, const YUY2_TARGET: usize>(
let v_alpha = vdupq_n_u8(255u8);

for x in (_yuy2_x..max_x_16).step_by(16) {
let dst_offset = yuy2_offset + x * 4;
let dst_pos = rgb_offset + _cx * dst_chans.get_channels_count();
let dst_offset = x * 4;
let dst_pos = _cx * dst_chans.get_channels_count();
let dst_ptr = rgb.as_mut_ptr().add(dst_pos);

let pixel_set = vld4q_u8(yuy2_store.as_ptr().add(dst_offset));
Expand Down Expand Up @@ -250,8 +248,8 @@ pub fn yuy2_to_rgb_neon<const DST_CHANNELS: u8, const YUY2_TARGET: usize>(
}

for x in (_yuy2_x..max_x_8).step_by(8) {
let dst_offset = yuy2_offset + x * 4;
let dst_pos = rgb_offset + _cx * dst_chans.get_channels_count();
let dst_offset = x * 4;
let dst_pos = _cx * dst_chans.get_channels_count();
let dst_ptr = rgb.as_mut_ptr().add(dst_pos);

let pixel_set = vld4_u8(yuy2_store.as_ptr().add(dst_offset));
Expand Down
20 changes: 8 additions & 12 deletions src/neon/yuy2_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,9 @@ use std::arch::aarch64::*;

pub fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
y_plane: &mut [u8],
y_offset: usize,
u_plane: &mut [u8],
u_offset: usize,
v_plane: &mut [u8],
v_offset: usize,
yuy2_store: &[u8],
yuy2_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -54,10 +50,10 @@ pub fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
let max_x_8 = (width as usize / 2).saturating_sub(8);

for x in (_yuy2_x..max_x_16).step_by(16) {
let dst_offset = yuy2_offset + x * 4;
let u_pos = u_offset + _uv_x;
let v_pos = v_offset + _uv_x;
let y_pos = y_offset + _cx;
let dst_offset = x * 4;
let u_pos = _uv_x;
let v_pos = _uv_x;
let y_pos = _cx;

let pixel_set = vld4q_u8(yuy2_store.as_ptr().add(dst_offset));
let mut y_first = match yuy2_source {
Expand Down Expand Up @@ -121,10 +117,10 @@ pub fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
}

for x in (_yuy2_x..max_x_8).step_by(8) {
let dst_offset = yuy2_offset + x * 4;
let u_pos = u_offset + _uv_x;
let v_pos = v_offset + _uv_x;
let y_pos = y_offset + _cx;
let dst_offset = x * 4;
let u_pos = _uv_x;
let v_pos = _uv_x;
let y_pos = _cx;

let pixel_set = vld4_u8(yuy2_store.as_ptr().add(dst_offset));
let mut y_first = match yuy2_source {
Expand Down
21 changes: 5 additions & 16 deletions src/sse/yuy2_to_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,13 @@ pub fn yuy2_to_rgb_sse<const DST_CHANNELS: u8, const YUY2_TARGET: usize>(
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
yuy2_store: &[u8],
yuy2_offset: usize,
rgb: &mut [u8],
rgb_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
unsafe {
yuy2_to_rgb_sse_impl::<DST_CHANNELS, YUY2_TARGET>(
range,
transform,
yuy2_store,
yuy2_offset,
rgb,
rgb_offset,
width,
nav,
range, transform, yuy2_store, rgb, width, nav,
)
}
}
Expand All @@ -65,9 +56,7 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
yuy2_store: &[u8],
yuy2_offset: usize,
rgb: &mut [u8],
rgb_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -94,8 +83,8 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
let zeros = _mm_setzero_si128();

for x in (_yuy2_x..max_x_16).step_by(16) {
let yuy2_offset = yuy2_offset + x * 4;
let dst_pos = rgb_offset + _cx * dst_chans.get_channels_count();
let yuy2_offset = x * 4;
let dst_pos = _cx * dst_chans.get_channels_count();
let dst_ptr = rgb.as_mut_ptr().add(dst_pos);

let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);
Expand Down Expand Up @@ -346,8 +335,8 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
}

for x in (_yuy2_x..max_x_8).step_by(8) {
let yuy2_offset = yuy2_offset + x * 4;
let dst_pos = rgb_offset + _cx * dst_chans.get_channels_count();
let yuy2_offset = x * 4;
let dst_pos = _cx * dst_chans.get_channels_count();
let dst_ptr = rgb.as_mut_ptr().add(dst_pos);

let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);
Expand Down
20 changes: 8 additions & 12 deletions src/sse/yuy2_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,9 @@ use std::arch::x86_64::*;
#[target_feature(enable = "sse4.1")]
pub unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
y_plane: &mut [u8],
y_offset: usize,
u_plane: &mut [u8],
u_offset: usize,
v_plane: &mut [u8],
v_offset: usize,
yuy2_store: &[u8],
yuy2_offset: usize,
width: u32,
nav: YuvToYuy2Navigation,
) -> YuvToYuy2Navigation {
Expand All @@ -59,10 +55,10 @@ pub unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>
let max_x_8 = (width as usize / 2).saturating_sub(8);

for x in (_yuy2_x..max_x_16).step_by(16) {
let yuy2_offset = yuy2_offset + x * 4;
let u_pos = u_offset + _uv_x;
let v_pos = v_offset + _uv_x;
let y_pos = y_offset + _cx;
let yuy2_offset = x * 4;
let u_pos = _uv_x;
let v_pos = _uv_x;
let y_pos = _cx;

let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);

Expand Down Expand Up @@ -133,10 +129,10 @@ pub unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>
}

for x in (_yuy2_x..max_x_8).step_by(8) {
let yuy2_offset = yuy2_offset + x * 4;
let u_pos = u_offset + _uv_x;
let v_pos = v_offset + _uv_x;
let y_pos = y_offset + _cx;
let yuy2_offset = x * 4;
let u_pos = _uv_x;
let v_pos = _uv_x;
let y_pos = _cx;

let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);

Expand Down
9 changes: 0 additions & 9 deletions src/yuy2_to_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
}

rgb_iter.zip(yuy2_iter).for_each(|(rgb_store, yuy2_store)| {
let rgb_offset = 0usize;
let yuy_offset = 0usize;

let mut _cx = 0usize;
let mut _yuy2_x = 0usize;

Expand All @@ -104,9 +101,7 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
&range,
&inverse_transform,
yuy2_store,
yuy_offset,
rgb_store,
rgb_offset,
width,
YuvToYuy2Navigation::new(_cx, 0, _yuy2_x),
);
Expand All @@ -118,9 +113,7 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
&range,
&inverse_transform,
yuy2_store,
yuy_offset,
rgb_store,
rgb_offset,
width,
YuvToYuy2Navigation::new(_cx, 0, _yuy2_x),
);
Expand All @@ -135,9 +128,7 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
&range,
&inverse_transform,
yuy2_store,
yuy_offset,
rgb_store,
rgb_offset,
width,
YuvToYuy2Navigation::new(_cx, 0, _yuy2_x),
);
Expand Down
2 changes: 1 addition & 1 deletion src/yuy2_to_rgb_p16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ fn yuy2_to_rgb_impl_p16<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize
rgb_stride: u32,
bit_depth: u32,
width: u32,
height: u32,
_: u32,
range: YuvRange,
matrix: YuvStandardMatrix,
) {
Expand Down
Loading

0 comments on commit e455cd6

Please sign in to comment.