Skip to content

Commit

Permalink
Improve: Faster square roots on x86/Arm
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Nov 17, 2024
1 parent 5fed772 commit e09c611
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions include/simsimd/probability.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ SIMSIMD_PUBLIC void simsimd_js_f32_neon(simsimd_f32_t const *a, simsimd_f32_t co

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = SIMSIMD_SQRT(sum);
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -298,7 +298,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_neon(simsimd_f16_t const *a, simsimd_f16_t co

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = SIMSIMD_SQRT(sum);
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -404,8 +404,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_haswell(simsimd_f16_t const *a, simsimd_f16_t

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = _simsimd_reduce_f32x8_haswell(sum_vec);
sum *= log2_normalizer;
*result = sum / 2;
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -497,7 +497,9 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
if (n) goto simsimd_js_f32_skylake_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -587,7 +589,9 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
if (n) goto simsimd_js_f16_sapphire_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down

0 comments on commit e09c611

Please sign in to comment.