Skip to content

Commit

Permalink
Merge pull request #13 from awxkee/dev
Browse files Browse the repository at this point in the history
Speed improvements, refactor
  • Loading branch information
awxkee authored Nov 25, 2024
2 parents 1ca3167 + a3e0a2e commit 1964bb9
Show file tree
Hide file tree
Showing 49 changed files with 3,032 additions and 2,314 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,27 @@ Tests performed on the image 5763x3842

| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils RGB->YUV 4:2:0 | 4.37ms | 6.14ms |
| libyuv RGB->YUV 4:2:0 | 3.66ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.88ms | 7.34ms |
| utils RGB->YUV 4:2:0 | 3.23ms | 3.53ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.09ms | 5.47ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms |
| utils RGBA->YUV 4:2:2 | 4.99ms | 7.08ms |
| utils RGBA->YUV 4:2:2 | 4.46ms | 7.08ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms |
| utils RGBA->YUV 4:4:4 | 5.37ms | 7.97ms |
| utils RGBA->YUV 4:4:4 | 4.77ms | 7.97ms |

### Decoding

| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils YUV NV12->RGB | 4.08ms | 6.48ms |
| utils YUV NV12->RGB | 3.86ms | 6.24ms |
| libyuv YUV NV12->RGB | 5.20ms | 45.28ms |
| utils YUV 4:2:0->RGB | 3.49ms | 5.44ms |
| utils YUV 4:2:0->RGB | 3.26ms | 5.25ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 44.95ms |
| utils YUV 4:2:0->RGBA | 4.02ms | 5.98ms |
| utils YUV 4:2:0->RGBA | 3.77ms | 5.98ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 6.88ms |
| utils YUV 4:2:2->RGBA | 5.39ms | 6.91ms |
| utils YUV 4:2:2->RGBA | 4.88ms | 6.91ms |
| libyuv YUV 4:2:2->RGBA | 5.91ms | 6.91ms |
| utils YUV 4:4:4->RGBA | 5.04ms | 7.20ms |
| utils YUV 4:4:4->RGBA | 4.79ms | 7.20ms |
| libyuv YUV 4:4:4->RGBA | 4.82ms | 7.30ms |

This project is licensed under either of
Expand Down
314 changes: 172 additions & 142 deletions app/benches/yuv8/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

use criterion::{criterion_group, criterion_main, Criterion};
use image::{GenericImageView, ImageReader};
use std::alloc::Layout;
use yuv_sys::{
rs_ABGRToI420, rs_ABGRToJ422, rs_I420ToABGR, rs_I420ToRGB24, rs_I422ToABGR, rs_I444ToABGR,
rs_NV21ToABGR, rs_RGB24ToI420,
Expand Down Expand Up @@ -78,145 +78,175 @@ pub fn criterion_benchmark(c: &mut Criterion) {

let fixed_planar = planar_image.to_fixed();

// let rgba_image = img.to_rgba8();
//
// c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| {
// rgb_to_yuv420(
// &mut test_planar,
// &src_bytes,
// stride as u32,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGB -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| unsafe {
// rs_RGB24ToI420(
// src_bytes.as_ptr(),
// stride as i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| {
// rgba_to_yuv420(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv420,
// );
// b.iter(|| unsafe {
// rs_ABGRToI420(
// rgba_image.as_ptr(),
// dimensions.0 as i32 * 4i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv422,
// );
// b.iter(|| {
// rgba_to_yuv422(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
//
// c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv422,
// );
// b.iter(|| unsafe {
// rs_ABGRToJ422(
// rgba_image.as_ptr(),
// dimensions.0 as i32 * 4i32,
// test_planar.y_plane.borrow_mut().as_mut_ptr(),
// test_planar.y_stride as i32,
// test_planar.u_plane.borrow_mut().as_mut_ptr(),
// test_planar.u_stride as i32,
// test_planar.v_plane.borrow_mut().as_mut_ptr(),
// test_planar.v_stride as i32,
// test_planar.width as i32,
// test_planar.height as i32,
// );
// })
// });
//
// c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| {
// let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
// dimensions.0,
// dimensions.1,
// YuvChromaSubsampling::Yuv444,
// );
// b.iter(|| {
// rgba_to_yuv444(
// &mut test_planar,
// &rgba_image,
// dimensions.0 * 4,
// YuvRange::Limited,
// YuvStandardMatrix::Bt601,
// )
// .unwrap();
// })
// });
let rgba_image = img.to_rgba8();

c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| {
rgb_to_yuv420(
&mut test_planar,
&src_bytes,
stride as u32,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGB -> YUV 4:2:0", |b| unsafe {
let layout_rgb =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 3, 16).unwrap();
let layout_y =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align(
(dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2,
16,
)
.unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgb);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_RGB24ToI420(
source_rgb,
stride as i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgb);
});

c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv420,
);
b.iter(|| {
rgba_to_yuv420(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| unsafe {
let layout_rgba =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 4, 16).unwrap();
let layout_y =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align(
(dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2,
16,
)
.unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgba);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_ABGRToI420(
source_rgb,
dimensions.0 as i32 * 4i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgba);
});

c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv422,
);
b.iter(|| {
rgba_to_yuv422(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv422,
);
b.iter(|| unsafe {
rs_ABGRToJ422(
rgba_image.as_ptr(),
dimensions.0 as i32 * 4i32,
test_planar.y_plane.borrow_mut().as_mut_ptr(),
test_planar.y_stride as i32,
test_planar.u_plane.borrow_mut().as_mut_ptr(),
test_planar.u_stride as i32,
test_planar.v_plane.borrow_mut().as_mut_ptr(),
test_planar.v_stride as i32,
test_planar.width as i32,
test_planar.height as i32,
);
})
});

c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| {
let mut test_planar = YuvPlanarImageMut::<u8>::alloc(
dimensions.0,
dimensions.1,
YuvChromaSubsampling::Yuv444,
);
b.iter(|| {
rgba_to_yuv444(
&mut test_planar,
&rgba_image,
dimensions.0 * 4,
YuvRange::Limited,
YuvStandardMatrix::Bt601,
)
.unwrap();
})
});

c.bench_function("yuvutils YUV NV12 -> RGB", |b| {
let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
Expand All @@ -231,8 +261,8 @@ pub fn criterion_benchmark(c: &mut Criterion) {
.unwrap();
})
});

c.bench_function("livyuv YUV NV12 -> RGB", |b| {
//
c.bench_function("libyuv YUV NV12 -> RGB", |b| {
let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize];
b.iter(|| unsafe {
rs_NV21ToABGR(
Expand Down
Loading

0 comments on commit 1964bb9

Please sign in to comment.