Skip to content

Commit

Permalink
Manual clamp
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 25, 2024
1 parent eceee29 commit 6f10a64
Show file tree
Hide file tree
Showing 8 changed files with 682 additions and 63 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ Tests performed on the image 5763x3842

| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils RGB->YUV 4:2:0 | 4.09ms | 6.14ms |
| utils RGB->YUV 4:2:0 | 3.48ms | 6.14ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.88ms | 7.34ms |
| utils RGBA->YUV 4:2:0 | 4.32ms | 7.34ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms |
| utils RGBA->YUV 4:2:2 | 4.83ms | 7.08ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms |
Expand Down
12 changes: 6 additions & 6 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ fn main() {
// let mut bytes_16: Vec<u16> = src_bytes.iter().map(|&x| (x as u16) << 4).collect();

let start_time = Instant::now();
rgb_to_yuv_nv12(
&mut bi_planar_image,
rgb_to_yuv420(
&mut planar_image,
&src_bytes,
rgba_stride as u32,
YuvRange::Full,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
Expand Down Expand Up @@ -253,11 +253,11 @@ fn main() {
// let rgba_stride = width as usize * 4;
// let mut rgba = vec![0u8; height as usize * rgba_stride];

yuv_nv12_to_rgb(
&fixed_biplanar,
yuv420_to_rgb(
&fixed_planar,
&mut rgba,
rgba_stride as u32,
YuvRange::Full,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
Expand Down
2 changes: 2 additions & 0 deletions src/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mod rgb_to_ycgco_r;
mod rgb_to_yuv_p16;
mod rgba_to_nv;
mod rgba_to_yuv;
mod rgba_to_yuv420;
mod y_p16_to_rgba16;
mod y_to_rgb;
mod ycgco_to_rgb;
Expand All @@ -62,6 +63,7 @@ pub(crate) use rgb_to_ycgco_r::neon_rgb_to_ycgcor_row;
pub(crate) use rgb_to_yuv_p16::{neon_rgba_to_yuv_p16, neon_rgba_to_yuv_p16_rdm};
pub(crate) use rgba_to_nv::{neon_rgbx_to_nv_row, neon_rgbx_to_nv_row_rdm};
pub(crate) use rgba_to_yuv::{neon_rgba_to_yuv, neon_rgba_to_yuv_rdm};
pub(crate) use rgba_to_yuv420::{neon_rgba_to_yuv420, neon_rgba_to_yuv_rdm420};
pub(crate) use y_p16_to_rgba16::neon_y_p16_to_rgba16_row;
pub(crate) use y_to_rgb::{neon_y_to_rgb_row, neon_y_to_rgb_row_rdm};
pub(crate) use ycgco_to_rgb::neon_ycgco_to_rgb_row;
Expand Down
8 changes: 0 additions & 8 deletions src/neon/neon_simd_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,6 @@ pub(crate) unsafe fn vweight_laneq_x2<const LANE0: i32, const LANE1: i32>(
(lo, hi)
}

#[inline(always)]
pub(crate) unsafe fn vmullq_s16(v: int16x8_t, q: int16x8_t) -> (int32x4_t, int32x4_t) {
(
vmull_s16(vget_low_s16(v), vget_low_s16(q)),
vmull_high_s16(v, q),
)
}

#[inline(always)]
pub(crate) unsafe fn vmullq_laneq_s16<const LANE: i32>(
v: int16x8_t,
Expand Down
6 changes: 2 additions & 4 deletions src/neon/rgba_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
start_cx: usize,
start_ux: usize,
width: usize,
compute_uv_row: bool,
) -> ProcessedOffset {
let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into();
Expand Down Expand Up @@ -192,7 +191,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
vst1q_u8(v_ptr.get_unchecked_mut(ux..).as_mut_ptr(), cr);

ux += 16;
} else if (chroma_subsampling == YuvChromaSubsampling::Yuv420 && compute_uv_row)
} else if (chroma_subsampling == YuvChromaSubsampling::Yuv420)
|| (chroma_subsampling == YuvChromaSubsampling::Yuv422)
{
let r1 = vreinterpretq_s16_u16(vshlq_n_u16::<V_SCALE>(vrshrq_n_u16::<1>(vpaddlq_u8(
Expand Down Expand Up @@ -250,7 +249,6 @@ pub(crate) unsafe fn neon_rgba_to_yuv<
start_cx: usize,
start_ux: usize,
width: usize,
compute_uv_row: bool,
) -> ProcessedOffset {
let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into();
Expand Down Expand Up @@ -460,7 +458,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv<

ux += 16;
} else if chroma_subsampling == YuvChromaSubsampling::Yuv422
|| (chroma_subsampling == YuvChromaSubsampling::Yuv420 && compute_uv_row)
|| (chroma_subsampling == YuvChromaSubsampling::Yuv420)
{
let r1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vpaddlq_u8(r_values_u8)));
let g1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vpaddlq_u8(g_values_u8)));
Expand Down
Loading

0 comments on commit 6f10a64

Please sign in to comment.