Skip to content

Commit

Permalink
NEON improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 25, 2024
1 parent 79008a5 commit 36fd2cd
Show file tree
Hide file tree
Showing 16 changed files with 805 additions and 721 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,26 @@ Tests performed on the image 5763x3842
| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils RGB->YUV 4:2:0 | 4.09ms | 6.14ms |
| libyuv RGB->YUV 4:2:0 | 3.66ms | 33.87ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.88ms | 7.34ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms |
| utils RGBA->YUV 4:2:2 | 4.99ms | 7.08ms |
| utils RGBA->YUV 4:2:2 | 4.83ms | 7.08ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms |
| utils RGBA->YUV 4:4:4 | 5.37ms | 7.97ms |
| utils RGBA->YUV 4:4:4 | 5.34ms | 7.97ms |

### Decoding

| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils YUV NV12->RGB | 3.92ms | 6.48ms |
| utils YUV NV12->RGB | 3.86ms | 6.48ms |
| libyuv YUV NV12->RGB | 5.20ms | 45.28ms |
| utils YUV 4:2:0->RGB | 3.28ms | 5.44ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 44.95ms |
| utils YUV 4:2:0->RGBA | 3.85ms | 5.98ms |
| utils YUV 4:2:0->RGBA | 3.82ms | 5.98ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 6.88ms |
| utils YUV 4:2:2->RGBA | 4.94ms | 6.91ms |
| utils YUV 4:2:2->RGBA | 4.88ms | 6.91ms |
| libyuv YUV 4:2:2->RGBA | 5.91ms | 6.91ms |
| utils YUV 4:4:4->RGBA | 4.83ms | 7.20ms |
| utils YUV 4:4:4->RGBA | 4.80ms | 7.20ms |
| libyuv YUV 4:4:4->RGBA | 4.82ms | 7.30ms |

This project is licensed under either of
Expand Down
336 changes: 168 additions & 168 deletions app/benches/yuv8/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,175 +78,175 @@ pub fn criterion_benchmark(c: &mut Criterion) {

let fixed_planar = planar_image.to_fixed();

// let rgba_image = img.to_rgba8();

// c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| {
// rgb_to_yuv420(
// &mut test_planar,
// &src_bytes,
// stride as u32,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGB -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| unsafe {
// rs_RGB24ToI420(
// src_bytes.as_ptr(),
// stride as i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| {
// rgba_to_yuv420(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| unsafe {
// rs_ABGRToI420(
// rgba_image.as_ptr(),
// dimensions.0 as i32 * 4i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv422,
// );
// b.iter(|| {
// rgba_to_yuv422(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv422,
// );
// b.iter(|| unsafe {
// rs_ABGRToJ422(
// rgba_image.as_ptr(),
// dimensions.0 as i32 * 4i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv444,
// );
// b.iter(|| {
// rgba_to_yuv444(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("yuvutils YUV NV12 -> RGB", |b| {
// let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
// b.iter(|| {
// yuv_nv12_to_rgba(
// &fixed_bi_planar,
// &mut rgb_bytes,
// dimensions.0 * 4u32,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
let rgba_image = img.to_rgba8();

c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| {
rgb_to_yuv420(
&mut test_planar,
&src_bytes,
stride as u32,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGB -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| unsafe {
rs_RGB24ToI420(
src_bytes.as_ptr(),
stride as i32,
test_planar.y_plane.borrow_mut().as_mut_ptr(),
test_planar.y_stride as i32,
test_planar.u_plane.borrow_mut().as_mut_ptr(),
test_planar.u_stride as i32,
test_planar.v_plane.borrow_mut().as_mut_ptr(),
test_planar.v_stride as i32,
test_planar.width as i32,
test_planar.height as i32,
);
})
});

c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| {
rgba_to_yuv420(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| unsafe {
rs_ABGRToI420(
rgba_image.as_ptr(),
dimensions.0 as i32 * 4i32,
test_planar.y_plane.borrow_mut().as_mut_ptr(),
test_planar.y_stride as i32,
test_planar.u_plane.borrow_mut().as_mut_ptr(),
test_planar.u_stride as i32,
test_planar.v_plane.borrow_mut().as_mut_ptr(),
test_planar.v_stride as i32,
test_planar.width as i32,
test_planar.height as i32,
);
})
});

c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv422,
);
b.iter(|| {
rgba_to_yuv422(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv422,
);
b.iter(|| unsafe {
rs_ABGRToJ422(
rgba_image.as_ptr(),
dimensions.0 as i32 * 4i32,
test_planar.y_plane.borrow_mut().as_mut_ptr(),
test_planar.y_stride as i32,
test_planar.u_plane.borrow_mut().as_mut_ptr(),
test_planar.u_stride as i32,
test_planar.v_plane.borrow_mut().as_mut_ptr(),
test_planar.v_stride as i32,
test_planar.width as i32,
test_planar.height as i32,
);
})
});

c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv444,
);
b.iter(|| {
rgba_to_yuv444(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("yuvutils YUV NV12 -> RGB", |b| {
let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
b.iter(|| {
yuv_nv12_to_rgba(
&fixed_bi_planar,
&mut rgb_bytes,
dimensions.0 * 4u32,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});
//
// c.bench_function("livyuv YUV NV12 -> RGB", |b| {
// let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
// b.iter(|| unsafe {
// rs_NV21ToABGR(
// fixed_bi_planar.y_plane.as_ptr(),
// fixed_bi_planar.y_stride as i32,
// fixed_bi_planar.uv_plane.as_ptr(),
// fixed_bi_planar.uv_stride as i32,
// rgb_bytes.as_mut_ptr(),
// dimensions.0 as i32 * 4,
// fixed_bi_planar.width as i32,
// fixed_bi_planar.height as i32,
// );
// })
// });
c.bench_function("libyuv YUV NV12 -> RGB", |b| {
let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
b.iter(|| unsafe {
rs_NV21ToABGR(
fixed_bi_planar.y_plane.as_ptr(),
fixed_bi_planar.y_stride as i32,
fixed_bi_planar.uv_plane.as_ptr(),
fixed_bi_planar.uv_stride as i32,
rgb_bytes.as_mut_ptr(),
dimensions.0 as i32 * 4,
fixed_bi_planar.width as i32,
fixed_bi_planar.height as i32,
);
})
});

c.bench_function("yuvutils YUV 4:2:0 -> RGB", |b| {
let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize];
Expand Down
Loading

0 comments on commit 36fd2cd

Please sign in to comment.