Skip to content

Commit

Permalink
AVX + SSE improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed May 20, 2024
1 parent 021e33b commit e8eaf06
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
8 changes: 8 additions & 0 deletions src/intel_simd_support.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

#[cfg(target_arch = "x86_64")]
#[inline(always)]
pub unsafe fn demote_to_avx256_to_128(data: __m256i) -> __m128i {
let lo_lane = _mm256_castsi256_si128(data);
let hi_lane = _mm256_extracti128_si256::<1>(data);
return _mm_packus_epi16(lo_lane, hi_lane);
}

#[cfg(target_arch = "x86_64")]
#[inline(always)]
pub unsafe fn store_u8_rgb_avx2(ptr: *mut u8, r: __m256i, g: __m256i, b: __m256i, use_transient: bool) {
Expand Down
13 changes: 10 additions & 3 deletions src/yuv_to_rgba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,16 @@ unsafe fn avx2_process_row(
v_min_values,
));

let r_values = _mm256_packus_epi16(r_low, r_high);
let g_values = _mm256_packus_epi16(g_low, g_high);
let b_values = _mm256_packus_epi16(b_low, b_high);
let r_low_u8 = _mm256_castsi128_si256(demote_to_avx256_to_128(r_low));
let r_high_u8 = demote_to_avx256_to_128(r_high);
let g_low_u8 = _mm256_castsi128_si256(demote_to_avx256_to_128(g_low));
let g_high_u8 = demote_to_avx256_to_128(g_high);
let b_low_u8 = _mm256_castsi128_si256(demote_to_avx256_to_128(b_low));
let b_high_u8 = demote_to_avx256_to_128(b_high);

let r_values = _mm256_inserti128_si256::<1>(r_low_u8, r_high_u8);
let g_values = _mm256_inserti128_si256::<1>(g_low_u8, g_high_u8);
let b_values = _mm256_inserti128_si256::<1>(b_low_u8, b_high_u8);

let dst_shift = rgba_offset + cx * channels;

Expand Down

0 comments on commit e8eaf06

Please sign in to comment.