diff --git a/README.md b/README.md index d74e88d..3913e79 100644 --- a/README.md +++ b/README.md @@ -78,9 +78,9 @@ Tests performed on the image 5763x3842 |------------------------|:----------:|:---------:| | utils RGB->YUV 4:2:0 | 3.23ms | 3.53ms | | libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms | -| utils RGBA->YUV 4:2:0 | 4.10ms | 5.47ms | +| utils RGBA->YUV 4:2:0 | 4.09ms | 5.47ms | | libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms | -| utils RGBA->YUV 4:2:2 | 4.50ms | 7.08ms | +| utils RGBA->YUV 4:2:2 | 4.46ms | 7.08ms | | libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms | | utils RGBA->YUV 4:4:4 | 4.77ms | 7.97ms | @@ -90,13 +90,13 @@ Tests performed on the image 5763x3842 |------------------------|:----------:|:---------:| | utils YUV NV12->RGB | 3.86ms | 6.24ms | | libyuv YUV NV12->RGB | 5.20ms | 45.28ms | -| utils YUV 4:2:0->RGB | 3.28ms | 5.25ms | +| utils YUV 4:2:0->RGB | 3.26ms | 5.25ms | | libyuv YUV 4:2:0->RGB | 5.70ms | 44.95ms | | utils YUV 4:2:0->RGBA | 3.77ms | 5.98ms | | libyuv YUV 4:2:0->RGBA | 6.13ms | 6.88ms | | utils YUV 4:2:2->RGBA | 4.88ms | 6.91ms | | libyuv YUV 4:2:2->RGBA | 5.91ms | 6.91ms | -| utils YUV 4:4:4->RGBA | 4.80ms | 7.20ms | +| utils YUV 4:4:4->RGBA | 4.79ms | 7.20ms | | libyuv YUV 4:4:4->RGBA | 4.82ms | 7.30ms | This project is licensed under either of diff --git a/src/neon/rgb_to_y.rs b/src/neon/rgb_to_y.rs index 51a8755..8087185 100644 --- a/src/neon/rgb_to_y.rs +++ b/src/neon/rgb_to_y.rs @@ -98,10 +98,7 @@ pub(crate) unsafe fn neon_rgb_to_y_row( y_high = vqrdmlahq_s16(y_high, b_high, v_yb); y_high = vmaxq_s16(y_high, v_zeros); - let y_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)), - i_cap_y, - ); + let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y); let r_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(r_values_u8))); let g_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(g_values_u8))); @@ -112,7 +109,7 @@ pub(crate) unsafe fn neon_rgb_to_y_row( y_low = vqrdmlahq_s16(y_low, b_low, v_yb); y_low = vmaxq_s16(y_low, v_zeros); - let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y); + let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y); let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high)); vst1q_u8(y_ptr.add(cx), y); diff --git a/src/neon/rgba_to_nv.rs b/src/neon/rgba_to_nv.rs index 38d65ae..c21428e 100644 --- a/src/neon/rgba_to_nv.rs +++ b/src/neon/rgba_to_nv.rs @@ -126,10 +126,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< y_high = vqrdmlahq_laneq_s16::<1>(y_high, g_high, v_weights); y_high = vqrdmlahq_laneq_s16::<2>(y_high, b_high, v_weights); - let y_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)), - i_cap_y, - ); + let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y); let r_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(r_values_u8))); let g_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(g_values_u8))); @@ -139,7 +136,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< y_low = vqrdmlahq_laneq_s16::<1>(y_low, g_low, v_weights); y_low = vqrdmlahq_laneq_s16::<2>(y_low, b_low, v_weights); - let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y); + let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y); let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high)); vst1q_u8(y_ptr.add(cx), y); @@ -150,7 +147,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< cb_high = vqrdmlahq_laneq_s16::<5>(cb_high, b_high, v_weights); let cb_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cb_high), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cb_high, i_bias_y)), i_cap_uv, ); @@ -159,7 +156,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< cr_high = vqrdmlahq_laneq_s16::<0>(cr_high, b_high, v_cr_b); let cr_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cr_high), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cr_high, i_bias_y)), i_cap_uv, ); @@ -167,19 +164,13 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< cb_low = vqrdmlahq_laneq_s16::<4>(cb_low, g_low, v_weights); cb_low = vqrdmlahq_laneq_s16::<5>(cb_low, b_low, v_weights); - let cb_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cb_low), i_bias_y)), - i_cap_uv, - ); + let cb_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cb_low, i_bias_y)), i_cap_uv); let mut cr_low = vqrdmlahq_laneq_s16::<6>(uv_bias, r_low, v_weights); cr_low = vqrdmlahq_laneq_s16::<7>(cr_low, g_low, v_weights); cr_low = vqrdmlahq_laneq_s16::<0>(cr_low, b_low, v_cr_b); - let cr_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cr_low), i_bias_y)), - i_cap_uv, - ); + let cr_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cr_low, i_bias_y)), i_cap_uv); let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high)); let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high)); @@ -213,7 +204,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< cbl = vqrdmlahq_laneq_s16::<5>(cbl, b1, v_weights); let cb = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)), i_cap_uv, )); @@ -222,7 +213,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm< crl = vqrdmlahq_laneq_s16::<0>(crl, b1, v_cr_b); let cr = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)), i_cap_uv, )); diff --git a/src/neon/rgba_to_yuv.rs b/src/neon/rgba_to_yuv.rs index 3eac948..1f8a795 100644 --- a/src/neon/rgba_to_yuv.rs +++ b/src/neon/rgba_to_yuv.rs @@ -125,10 +125,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< y_high = vqrdmlahq_laneq_s16::<1>(y_high, g0hi, v_weights); y_high = vqrdmlahq_laneq_s16::<2>(y_high, b0hi, v_weights); - let y_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)), - i_cap_y, - ); + let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y); let r_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(r_values_u8))); let g_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(g_values_u8))); @@ -138,7 +135,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< y_low = vqrdmlahq_laneq_s16::<1>(y_low, g_low, v_weights); y_low = vqrdmlahq_laneq_s16::<2>(y_low, b_low, v_weights); - let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y); + let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y); let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high)); vst1q_u8(y_ptr.get_unchecked_mut(cx..).as_mut_ptr(), y); @@ -149,7 +146,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< cb_high = vqrdmlahq_laneq_s16::<5>(cb_high, b0hi, v_weights); let cb_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cb_high), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cb_high, i_bias_y)), i_cap_uv, ); @@ -158,7 +155,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< cr_high = vqrdmlahq_laneq_s16::<0>(cr_high, b0hi, v_cr_b); let cr_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cr_high), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cr_high, i_bias_y)), i_cap_uv, ); @@ -166,19 +163,13 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< cb_low = vqrdmlahq_laneq_s16::<4>(cb_low, g_low, v_weights); cb_low = vqrdmlahq_laneq_s16::<5>(cb_low, b_low, v_weights); - let cb_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cb_low), i_bias_y)), - i_cap_uv, - ); + let cb_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cb_low, i_bias_y)), i_cap_uv); let mut cr_low = vqrdmlahq_laneq_s16::<6>(uv_bias, r_low, v_weights); cr_low = vqrdmlahq_laneq_s16::<7>(cr_low, g_low, v_weights); cr_low = vqrdmlahq_laneq_s16::<0>(cr_low, b_low, v_cr_b); - let cr_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cr_low), i_bias_y)), - i_cap_uv, - ); + let cr_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cr_low, i_bias_y)), i_cap_uv); let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high)); let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high)); @@ -204,7 +195,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< cbl = vqrdmlahq_laneq_s16::<5>(cbl, b1, v_weights); let cb = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)), i_cap_uv, )); @@ -213,7 +204,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm< crl = vqrdmlahq_laneq_s16::<0>(crl, b1, v_cr_b); let cr = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)), i_cap_uv, )); diff --git a/src/neon/rgba_to_yuv420.rs b/src/neon/rgba_to_yuv420.rs index a907b0f..78da13f 100644 --- a/src/neon/rgba_to_yuv420.rs +++ b/src/neon/rgba_to_yuv420.rs @@ -147,19 +147,13 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420(y0_high, g0hi, v_weights); y0_high = vqrdmlahq_laneq_s16::<2>(y0_high, b0hi, v_weights); - let y0_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y0_high), i_bias_y)), - i_cap_y, - ); + let y0_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y0_high, i_bias_y)), i_cap_y); let mut y1_high = vqrdmlahq_laneq_s16::<0>(y_bias, r1hi, v_weights); y1_high = vqrdmlahq_laneq_s16::<1>(y1_high, g1hi, v_weights); y1_high = vqrdmlahq_laneq_s16::<2>(y1_high, b1hi, v_weights); - let y1_high = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y1_high), i_bias_y)), - i_cap_y, - ); + let y1_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y1_high, i_bias_y)), i_cap_y); let r0_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(r_values0))); let g0_low = vreinterpretq_s16_u16(vshll_n_u8::(vget_low_u8(g_values0))); @@ -173,19 +167,13 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420(y0_low, g0_low, v_weights); y0_low = vqrdmlahq_laneq_s16::<2>(y0_low, b0_low, v_weights); - let y0_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y0_low), i_bias_y)), - i_cap_y, - ); + let y0_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y0_low, i_bias_y)), i_cap_y); let mut y1_low = vqrdmlahq_laneq_s16::<0>(y_bias, r1_low, v_weights); y1_low = vqrdmlahq_laneq_s16::<1>(y1_low, g1_low, v_weights); y1_low = vqrdmlahq_laneq_s16::<2>(y1_low, b1_low, v_weights); - let y1_low = vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((y1_low), i_bias_y)), - i_cap_y, - ); + let y1_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y1_low, i_bias_y)), i_cap_y); let y0 = vcombine_u8(vqmovn_u16(y0_low), vqmovn_u16(y0_high)); vst1q_u8(y_plane0.get_unchecked_mut(cx..).as_mut_ptr(), y0); @@ -205,7 +193,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420(cbl, b1, v_weights); let cb = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)), i_cap_uv, )); @@ -214,7 +202,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420(crl, b1, v_cr_b); let cr = vqmovn_u16(vminq_u16( - vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)), + vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)), i_cap_uv, ));