Skip to content

Commit

Permalink
AVX, SSE encoding/decoding improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Nov 25, 2024
1 parent 8d360d9 commit 52ca84d
Show file tree
Hide file tree
Showing 23 changed files with 358 additions and 976 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Tests performed on the image 5763x3842
|------------------------|:----------:|:---------:|
| utils RGB->YUV 4:2:0 | 3.48ms | 3.64ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 33.87ms |
| utils RGBA->YUV 4:2:0 | 4.32ms | 5.74ms |
| utils RGBA->YUV 4:2:0 | 4.32ms | 5.47ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 23.48ms |
| utils RGBA->YUV 4:2:2 | 4.83ms | 7.08ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 35.23ms |
Expand All @@ -88,9 +88,9 @@ Tests performed on the image 5763x3842

| | time(NEON) | Time(AVX) |
|------------------------|:----------:|:---------:|
| utils YUV NV12->RGB | 3.86ms | 6.48ms |
| utils YUV NV12->RGB | 3.86ms | 6.24ms |
| libyuv YUV NV12->RGB | 5.20ms | 45.28ms |
| utils YUV 4:2:0->RGB | 3.28ms | 5.34ms |
| utils YUV 4:2:0->RGB | 3.28ms | 5.25ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 44.95ms |
| utils YUV 4:2:0->RGBA | 3.82ms | 5.98ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 6.88ms |
Expand Down
132 changes: 70 additions & 62 deletions app/benches/yuv8/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use std::alloc::Layout;
use criterion::{criterion_group, criterion_main, Criterion};
use image::{GenericImageView, ImageReader};
use std::alloc::Layout;
use yuv_sys::{
rs_ABGRToI420, rs_ABGRToJ422, rs_I420ToABGR, rs_I420ToRGB24, rs_I422ToABGR, rs_I444ToABGR,
rs_NV21ToABGR, rs_RGB24ToI420,
Expand Down Expand Up @@ -98,37 +98,41 @@ pub fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("libyuv RGB -> YUV 4:2:0", |b| {
unsafe {
let layout_rgb = Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 3, 16).unwrap();
let layout_y = Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align((dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, 16).unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgb);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_RGB24ToI420(
source_rgb,
stride as i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgb);
c.bench_function("libyuv RGB -> YUV 4:2:0", |b| unsafe {
let layout_rgb =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 3, 16).unwrap();
let layout_y =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align(
(dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2,
16,
)
.unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgb);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_RGB24ToI420(
source_rgb,
stride as i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgb);
});

c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| {
Expand All @@ -149,37 +153,41 @@ pub fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| {
unsafe {
let layout_rgba = Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 4, 16).unwrap();
let layout_y = Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align((dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, 16).unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgba);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_ABGRToI420(
source_rgb,
dimensions.0 as i32 * 4i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgba);
}
c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| unsafe {
let layout_rgba =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 4, 16).unwrap();
let layout_y =
Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap();
let layout_uv = Layout::from_size_align(
(dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2,
16,
)
.unwrap();
let target_y = std::alloc::alloc(layout_y);
let target_u = std::alloc::alloc(layout_uv);
let target_v = std::alloc::alloc(layout_uv);
let source_rgb = std::alloc::alloc(layout_rgba);
for (x, src) in src_bytes.iter().enumerate() {
*source_rgb.add(x) = *src;
}
b.iter(|| {
rs_ABGRToI420(
source_rgb,
dimensions.0 as i32 * 4i32,
target_y,
dimensions.0 as i32,
target_u,
(dimensions.0 as i32 + 1) / 2,
target_v,
(dimensions.0 as i32 + 1) / 2,
dimensions.0 as i32,
dimensions.1 as i32,
);
});
std::alloc::dealloc(target_y, layout_y);
std::alloc::dealloc(target_u, layout_uv);
std::alloc::dealloc(target_v, layout_uv);
std::alloc::dealloc(source_rgb, layout_rgba);
});

c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| {
Expand Down
4 changes: 2 additions & 2 deletions src/avx2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod rgb_to_nv;
mod rgb_to_y;
mod rgb_to_ycgco;
mod rgba_to_yuv;
mod rgba_to_yuv420;
mod ycgco_to_rgb;
mod ycgco_to_rgba_alpha;
mod yuv_nv_to_rgba;
Expand All @@ -45,12 +46,12 @@ mod yuv_to_rgba_alpha;
mod yuv_to_yuv2;
mod yuy2_to_rgb;
mod yuy2_to_yuv;
mod rgba_to_yuv420;

pub(crate) use rgb_to_nv::avx2_rgba_to_nv;
pub(crate) use rgb_to_y::avx2_rgb_to_y_row;
pub(crate) use rgb_to_ycgco::avx2_rgb_to_ycgco_row;
pub(crate) use rgba_to_yuv::avx2_rgba_to_yuv;
pub(crate) use rgba_to_yuv420::avx2_rgba_to_yuv420;
pub(crate) use ycgco_to_rgb::avx2_ycgco_to_rgb_row;
pub(crate) use ycgco_to_rgba_alpha::avx2_ycgco_to_rgba_alpha;
pub(crate) use yuv_nv_to_rgba::avx2_yuv_nv_to_rgba_row;
Expand All @@ -63,4 +64,3 @@ pub(crate) use yuv_to_rgba_alpha::avx2_yuv_to_rgba_alpha;
pub(crate) use yuv_to_yuv2::yuv_to_yuy2_avx2_row;
pub(crate) use yuy2_to_rgb::yuy2_to_rgb_avx;
pub(crate) use yuy2_to_yuv::yuy2_to_yuv_avx;
pub(crate) use rgba_to_yuv420::avx2_rgba_to_yuv420;
Loading

0 comments on commit 52ca84d

Please sign in to comment.