Skip to content

Commit

Permalink
Improvements, clippy
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 25, 2024
1 parent b666b69 commit d59a463
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 61 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ Tests performed on the image 5763x3842
|------------------------|:----------:|:---------:|
| utils RGB->YUV 4:2:0 | 3.23ms | 3.53ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.10ms | 5.47ms |
| utils RGBA->YUV 4:2:0 | 4.09ms | 5.47ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms |
| utils RGBA->YUV 4:2:2 | 4.50ms | 7.08ms |
| utils RGBA->YUV 4:2:2 | 4.46ms | 7.08ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms |
| utils RGBA->YUV 4:4:4 | 4.77ms | 7.97ms |

Expand All @@ -90,13 +90,13 @@ Tests performed on the image 5763x3842
|------------------------|:----------:|:---------:|
| utils YUV NV12->RGB | 3.86ms | 6.24ms |
| libyuv YUV NV12->RGB | 5.20ms | 45.28ms |
| utils YUV 4:2:0->RGB | 3.28ms | 5.25ms |
| utils YUV 4:2:0->RGB | 3.26ms | 5.25ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 44.95ms |
| utils YUV 4:2:0->RGBA | 3.77ms | 5.98ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 6.88ms |
| utils YUV 4:2:2->RGBA | 4.88ms | 6.91ms |
| libyuv YUV 4:2:2->RGBA | 5.91ms | 6.91ms |
| utils YUV 4:4:4->RGBA | 4.80ms | 7.20ms |
| utils YUV 4:4:4->RGBA | 4.79ms | 7.20ms |
| libyuv YUV 4:4:4->RGBA | 4.82ms | 7.30ms |

This project is licensed under either of
Expand Down
7 changes: 2 additions & 5 deletions src/neon/rgb_to_y.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,7 @@ pub(crate) unsafe fn neon_rgb_to_y_row<const ORIGIN_CHANNELS: u8>(
y_high = vqrdmlahq_s16(y_high, b_high, v_yb);
y_high = vmaxq_s16(y_high, v_zeros);

let y_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)),
i_cap_y,
);
let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y);

let r_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(r_values_u8)));
let g_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(g_values_u8)));
Expand All @@ -112,7 +109,7 @@ pub(crate) unsafe fn neon_rgb_to_y_row<const ORIGIN_CHANNELS: u8>(
y_low = vqrdmlahq_s16(y_low, b_low, v_yb);
y_low = vmaxq_s16(y_low, v_zeros);

let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y);
let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y);

let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high));
vst1q_u8(y_ptr.add(cx), y);
Expand Down
25 changes: 8 additions & 17 deletions src/neon/rgba_to_nv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
y_high = vqrdmlahq_laneq_s16::<1>(y_high, g_high, v_weights);
y_high = vqrdmlahq_laneq_s16::<2>(y_high, b_high, v_weights);

let y_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)),
i_cap_y,
);
let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y);

let r_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(r_values_u8)));
let g_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(g_values_u8)));
Expand All @@ -139,7 +136,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
y_low = vqrdmlahq_laneq_s16::<1>(y_low, g_low, v_weights);
y_low = vqrdmlahq_laneq_s16::<2>(y_low, b_low, v_weights);

let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y);
let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y);

let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high));
vst1q_u8(y_ptr.add(cx), y);
Expand All @@ -150,7 +147,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
cb_high = vqrdmlahq_laneq_s16::<5>(cb_high, b_high, v_weights);

let cb_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cb_high), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cb_high, i_bias_y)),
i_cap_uv,
);

Expand All @@ -159,27 +156,21 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
cr_high = vqrdmlahq_laneq_s16::<0>(cr_high, b_high, v_cr_b);

let cr_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cr_high), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cr_high, i_bias_y)),
i_cap_uv,
);

let mut cb_low = vqrdmlahq_laneq_s16::<3>(uv_bias, r_low, v_weights);
cb_low = vqrdmlahq_laneq_s16::<4>(cb_low, g_low, v_weights);
cb_low = vqrdmlahq_laneq_s16::<5>(cb_low, b_low, v_weights);

let cb_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cb_low), i_bias_y)),
i_cap_uv,
);
let cb_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cb_low, i_bias_y)), i_cap_uv);

let mut cr_low = vqrdmlahq_laneq_s16::<6>(uv_bias, r_low, v_weights);
cr_low = vqrdmlahq_laneq_s16::<7>(cr_low, g_low, v_weights);
cr_low = vqrdmlahq_laneq_s16::<0>(cr_low, b_low, v_cr_b);

let cr_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cr_low), i_bias_y)),
i_cap_uv,
);
let cr_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cr_low, i_bias_y)), i_cap_uv);
let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high));
let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high));

Expand Down Expand Up @@ -213,7 +204,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
cbl = vqrdmlahq_laneq_s16::<5>(cbl, b1, v_weights);

let cb = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)),
i_cap_uv,
));

Expand All @@ -222,7 +213,7 @@ pub(crate) unsafe fn neon_rgbx_to_nv_row_rdm<
crl = vqrdmlahq_laneq_s16::<0>(crl, b1, v_cr_b);

let cr = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)),
i_cap_uv,
));

Expand Down
25 changes: 8 additions & 17 deletions src/neon/rgba_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
y_high = vqrdmlahq_laneq_s16::<1>(y_high, g0hi, v_weights);
y_high = vqrdmlahq_laneq_s16::<2>(y_high, b0hi, v_weights);

let y_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y_high), i_bias_y)),
i_cap_y,
);
let y_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_high, i_bias_y)), i_cap_y);

let r_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(r_values_u8)));
let g_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(g_values_u8)));
Expand All @@ -138,7 +135,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
y_low = vqrdmlahq_laneq_s16::<1>(y_low, g_low, v_weights);
y_low = vqrdmlahq_laneq_s16::<2>(y_low, b_low, v_weights);

let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16((y_low), i_bias_y)), i_cap_y);
let y_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y_low, i_bias_y)), i_cap_y);

let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high));
vst1q_u8(y_ptr.get_unchecked_mut(cx..).as_mut_ptr(), y);
Expand All @@ -149,7 +146,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
cb_high = vqrdmlahq_laneq_s16::<5>(cb_high, b0hi, v_weights);

let cb_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cb_high), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cb_high, i_bias_y)),
i_cap_uv,
);

Expand All @@ -158,27 +155,21 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
cr_high = vqrdmlahq_laneq_s16::<0>(cr_high, b0hi, v_cr_b);

let cr_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cr_high), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cr_high, i_bias_y)),
i_cap_uv,
);

let mut cb_low = vqrdmlahq_laneq_s16::<3>(uv_bias, r_low, v_weights);
cb_low = vqrdmlahq_laneq_s16::<4>(cb_low, g_low, v_weights);
cb_low = vqrdmlahq_laneq_s16::<5>(cb_low, b_low, v_weights);

let cb_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cb_low), i_bias_y)),
i_cap_uv,
);
let cb_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cb_low, i_bias_y)), i_cap_uv);

let mut cr_low = vqrdmlahq_laneq_s16::<6>(uv_bias, r_low, v_weights);
cr_low = vqrdmlahq_laneq_s16::<7>(cr_low, g_low, v_weights);
cr_low = vqrdmlahq_laneq_s16::<0>(cr_low, b_low, v_cr_b);

let cr_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cr_low), i_bias_y)),
i_cap_uv,
);
let cr_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(cr_low, i_bias_y)), i_cap_uv);
let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high));
let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high));

Expand All @@ -204,7 +195,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
cbl = vqrdmlahq_laneq_s16::<5>(cbl, b1, v_weights);

let cb = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)),
i_cap_uv,
));

Expand All @@ -213,7 +204,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm<
crl = vqrdmlahq_laneq_s16::<0>(crl, b1, v_cr_b);

let cr = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)),
i_cap_uv,
));

Expand Down
24 changes: 6 additions & 18 deletions src/neon/rgba_to_yuv420.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,13 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
y0_high = vqrdmlahq_laneq_s16::<1>(y0_high, g0hi, v_weights);
y0_high = vqrdmlahq_laneq_s16::<2>(y0_high, b0hi, v_weights);

let y0_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y0_high), i_bias_y)),
i_cap_y,
);
let y0_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y0_high, i_bias_y)), i_cap_y);

let mut y1_high = vqrdmlahq_laneq_s16::<0>(y_bias, r1hi, v_weights);
y1_high = vqrdmlahq_laneq_s16::<1>(y1_high, g1hi, v_weights);
y1_high = vqrdmlahq_laneq_s16::<2>(y1_high, b1hi, v_weights);

let y1_high = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y1_high), i_bias_y)),
i_cap_y,
);
let y1_high = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y1_high, i_bias_y)), i_cap_y);

let r0_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(r_values0)));
let g0_low = vreinterpretq_s16_u16(vshll_n_u8::<V_SCALE>(vget_low_u8(g_values0)));
Expand All @@ -173,19 +167,13 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
y0_low = vqrdmlahq_laneq_s16::<1>(y0_low, g0_low, v_weights);
y0_low = vqrdmlahq_laneq_s16::<2>(y0_low, b0_low, v_weights);

let y0_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y0_low), i_bias_y)),
i_cap_y,
);
let y0_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y0_low, i_bias_y)), i_cap_y);

let mut y1_low = vqrdmlahq_laneq_s16::<0>(y_bias, r1_low, v_weights);
y1_low = vqrdmlahq_laneq_s16::<1>(y1_low, g1_low, v_weights);
y1_low = vqrdmlahq_laneq_s16::<2>(y1_low, b1_low, v_weights);

let y1_low = vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((y1_low), i_bias_y)),
i_cap_y,
);
let y1_low = vminq_u16(vreinterpretq_u16_s16(vmaxq_s16(y1_low, i_bias_y)), i_cap_y);

let y0 = vcombine_u8(vqmovn_u16(y0_low), vqmovn_u16(y0_high));
vst1q_u8(y_plane0.get_unchecked_mut(cx..).as_mut_ptr(), y0);
Expand All @@ -205,7 +193,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
cbl = vqrdmlahq_laneq_s16::<5>(cbl, b1, v_weights);

let cb = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((cbl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(cbl, i_bias_y)),
i_cap_uv,
));

Expand All @@ -214,7 +202,7 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
crl = vqrdmlahq_laneq_s16::<0>(crl, b1, v_cr_b);

let cr = vqmovn_u16(vminq_u16(
vreinterpretq_u16_s16(vmaxq_s16((crl), i_bias_y)),
vreinterpretq_u16_s16(vmaxq_s16(crl, i_bias_y)),
i_cap_uv,
));

Expand Down

0 comments on commit d59a463

Please sign in to comment.