Skip to content

Commit

Permalink
Merge branch 'main' into main-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Aug 22, 2023
2 parents 8ddedc6 + db19ab4 commit 186be66
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
runs-on: ubuntu-20.04
environment:
name: pypi
url: https://pypi.org/p/stringzilla
url: https://pypi.org/p/simsimd
permissions:
id-token: write

Expand Down
42 changes: 21 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,27 @@ The signature defines the number of arguments:
The latter is intended for cases where the number of dimensions is hard-coded.
Constraints define the limitations on the number of dimensions an argument vector can have.

| Name | Signature | ISA Extesion | Constraints |
| :---------------------- | :-------: | :----------: | :------------: |
| `dot_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `dot_f32x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `cos_f16_sve` | ✳️✳️📏 | Arm SVE | |
| `cos_f16x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_i8x16_neon` | ✳️✳️📏 | Arm NEON | `d % 16 == 0` |
| `cos_f32x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_f16x16_avx512` | ✳️✳️📏 | x86 AVX-512 | `d % 16 == 0` |
| `cos_f32x4_avx2` | ✳️✳️📏 | x86 AVX2 | `d % 4 == 0` |
| `l2sq_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `l2sq_f16_sve` | ✳️✳️📏 | Arm SVE | |
| `hamming_b1x8_sve` | ✳️✳️📏 | Arm SVE | `d % 8 == 0` |
| `hamming_b1x128_sve` | ✳️✳️📏 | Arm SVE | `d % 128 == 0` |
| `hamming_b1x128_avx512` | ✳️✳️📏 | x86 AVX-512 | `d % 128 == 0` |
| `tanimoto_b1x8_naive` | ✳️✳️📏 | | `d % 8 == 0` |
| `tanimoto_maccs_naive` | ✳️✳️ | | `d == 166` |
| `tanimoto_maccs_neon` | ✳️✳️ | Arm NEON | `d == 166` |
| `tanimoto_maccs_sve` | ✳️✳️ | Arm SVE | `d == 166` |
| `tanimoto_maccs_avx512` | ✳️✳️ | x86 AVX-512 | `d == 166` |
| Name | Signature | ISA Extension | Constraints |
| :---------------------- | :-------: | :-----------: | :------------: |
| `dot_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `dot_f32x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `cos_f16_sve` | ✳️✳️📏 | Arm SVE | |
| `cos_f16x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_i8x16_neon` | ✳️✳️📏 | Arm NEON | `d % 16 == 0` |
| `cos_f32x4_neon` | ✳️✳️📏 | Arm NEON | `d % 4 == 0` |
| `cos_f16x16_avx512` | ✳️✳️📏 | x86 AVX-512 | `d % 16 == 0` |
| `cos_f32x4_avx2` | ✳️✳️📏 | x86 AVX2 | `d % 4 == 0` |
| `l2sq_f32_sve` | ✳️✳️📏 | Arm SVE | |
| `l2sq_f16_sve` | ✳️✳️📏 | Arm SVE | |
| `hamming_b1x8_sve` | ✳️✳️📏 | Arm SVE | `d % 8 == 0` |
| `hamming_b1x128_sve` | ✳️✳️📏 | Arm SVE | `d % 128 == 0` |
| `hamming_b1x128_avx512` | ✳️✳️📏 | x86 AVX-512 | `d % 128 == 0` |
| `tanimoto_b1x8_naive` | ✳️✳️📏 | | `d % 8 == 0` |
| `tanimoto_maccs_naive` | ✳️✳️ | | `d == 166` |
| `tanimoto_maccs_neon` | ✳️✳️ | Arm NEON | `d == 166` |
| `tanimoto_maccs_sve` | ✳️✳️ | Arm SVE | `d == 166` |
| `tanimoto_maccs_avx512` | ✳️✳️ | x86 AVX-512 | `d == 166` |

## Benchmarks

Expand Down
1 change: 1 addition & 0 deletions bench.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ int main(int argc, char** argv) {
register_<simsimd_f32_t>("dot_f32x4_neon", simsimd_dot_f32x4_neon);
register_<std::int16_t>("cos_f16x4_neon", simsimd_cos_f16x4_neon);
register_<std::int8_t>("cos_i8x16_neon", simsimd_cos_i8x16_neon);
register_<std::int8_t>("l2sq_i8x16_neon", simsimd_l2sq_i8x16_neon);
register_<simsimd_f32_t>("cos_f32x4_neon", simsimd_cos_f32x4_neon);
register_<std::uint8_t, 21>("tanimoto_maccs_neon", simsimd_tanimoto_maccs_neon);
#endif
Expand Down
21 changes: 21 additions & 0 deletions include/simsimd/simsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,27 @@ inline static simsimd_f32_t simsimd_cos_i8x16_neon(int8_t const* a, int8_t const
#endif
}

inline static simsimd_f32_t simsimd_l2sq_i8x16_neon(int8_t const* a, int8_t const* b, size_t d) {
#if defined(__ARM_NEON)
int32x4_t d2_vec = vdupq_n_s32(0);
for (size_t i = 0; i != d; i += 8) {
int16x8_t a_vec = vmovl_s8(vld1_s8(a + i));
int16x8_t b_vec = vmovl_s8(vld1_s8(b + i));
int16x8_t d2_part_vec = vsubq_s16(a_vec, b_vec);
d2_part_vec = vmulq_s16(d2_part_vec, d2_part_vec);
d2_vec = //
vaddq_s32(d2_vec, vaddq_s32(vmovl_s16(vget_high_s16(d2_part_vec)), vmovl_s16(vget_low_s16(d2_part_vec))));
}

int32x2_t d2_part = vadd_s32(vget_high_s32(d2_vec), vget_low_s32(d2_vec));
int32_t d2 = vget_lane_s32(vpadd_s32(d2_part, d2_part), 0);
return sqrt(d2);
#else
(void)a, (void)b, (void)d;
return -1;
#endif
}

inline static simsimd_f32_t simsimd_cos_f32x4_neon(simsimd_f32_t const* a, simsimd_f32_t const* b, size_t d) {
#if defined(__ARM_NEON)
float32x4_t ab_vec = vdupq_n_f32(0);
Expand Down

0 comments on commit 186be66

Please sign in to comment.