Skip to content

Commit

Permalink
Merge pull request #234 from ashvardanian/main-dev
Browse files Browse the repository at this point in the history
Fix: Jensen Shannon square roots (#233)
  • Loading branch information
ashvardanian authored Nov 18, 2024
2 parents c124410 + d14b654 commit 6e19cd8
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 198 deletions.
50 changes: 40 additions & 10 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,35 +168,65 @@ open target/criterion/report/index.html
## JavaScript
If you don't have NPM installed:
### NodeJS
If you don't have the environment configured, here are the [installation options](https://github.com/nvm-sh/nvm?tab=readme-ov-file#install--update-script) with different tools:
```sh
wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # Linux
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # MacOS
```
Install dependencies:
```sh
wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
nvm install 20
npm install -g typescript # Install the TypeScript compiler globally
npm install --save-dev @types/node # Install the Node.js type definitions as a dev dependency
```
Testing and benchmarking:
```sh
npm install -g typescript
npm run build-js
npm test
npm run bench
npm run build-js # Build the JavaScript code using TypeScript configurations
npm test # Run the test suite
npm run bench # Run the benchmark script
```
### Deno
If you don't have the environment configured, here are [installation options](https://docs.deno.com/runtime/getting_started/installation/) with different tools:
```sh
wget -qO- https://deno.land/x/install/install.sh | sh # Linux
curl -fsSL https://deno.land/install.sh | sh # MacOS
irm https://deno.land/install.ps1 | iex # Windows
```
Running with Deno:
Testing:
```sh
deno test --allow-read
```
Running with Bun:
### Bun
If you don't have the environment configured, here are the [installation options](https://bun.sh/docs/installation) with different tools:
```sh
npm install -g bun
bun test
wget -qO- https://bun.sh/install | bash # for Linux
curl -fsSL https://bun.sh/install | bash # for macOS and WSL
```
Testing:
```sh
bun install
bun test ./scripts/test.mjs
```
... wouldn't work for now.
## Swift
```sh
Expand Down
23 changes: 14 additions & 9 deletions include/simsimd/probability.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const* a, simsimd_f16_
d += ai * SIMSIMD_LOG((ai + epsilon) / (mi + epsilon)); \
d += bi * SIMSIMD_LOG((bi + epsilon) / (mi + epsilon)); \
} \
*result = (simsimd_distance_t)d / 2; \
*result = SIMSIMD_SQRT(((simsimd_distance_t)d / 2)); \
}

SIMSIMD_MAKE_KL(serial, f64, f64, SIMSIMD_DEREFERENCE, SIMSIMD_F32_DIVISION_EPSILON) // simsimd_kl_f64_serial
Expand Down Expand Up @@ -219,12 +219,13 @@ SIMSIMD_PUBLIC void simsimd_js_f32_neon(simsimd_f32_t const *a, simsimd_f32_t co
float32x4_t log_ratio_b_vec = _simsimd_log2_f32_neon(ratio_b_vec);
float32x4_t prod_a_vec = vmulq_f32(a_vec, log_ratio_a_vec);
float32x4_t prod_b_vec = vmulq_f32(b_vec, log_ratio_b_vec);

sum_vec = vaddq_f32(sum_vec, vaddq_f32(prod_a_vec, prod_b_vec));
if (n != 0) goto simsimd_js_f32_neon_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer;
*result = sum / 2;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -296,8 +297,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_neon(simsimd_f16_t const *a, simsimd_f16_t co
if (n) goto simsimd_js_f16_neon_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer;
*result = sum / 2;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -403,8 +404,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_haswell(simsimd_f16_t const *a, simsimd_f16_t

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = _simsimd_reduce_f32x8_haswell(sum_vec);
sum *= log2_normalizer;
*result = sum / 2;
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -496,7 +497,9 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
if (n) goto simsimd_js_f32_skylake_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -586,7 +589,9 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
if (n) goto simsimd_js_f16_sapphire_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down
41 changes: 26 additions & 15 deletions javascript/fallback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ export const jaccard = (a: Uint8Array, b: Uint8Array): number => {
};

/**
* @brief Computes the kullbackleibler similarity coefficient between two vectors.
* @brief Computes the Kullback-Leibler divergence between two probability distributions.
* @param {Float64Array|Float32Array} a - The first vector.
* @param {Float64Array|Float32Array} b - The second vector.
* @returns {number} The Jaccard similarity coefficient between vectors a and b.
Expand All @@ -182,38 +182,49 @@ export const kullbackleibler = (a: Float64Array | Float32Array, b: Float64Array
}

let divergence = 0.0;

for (let i = 0; i < a.length; i++) {
if (a[i] > 0) {
if (b[i] === 0) {
throw new Error(
"Division by zero encountered in KL divergence calculation"
);
}
divergence += a[i] * Math.log(a[i] / b[i]);
if (a[i] < 0 || b[i] < 0) {
throw new Error("Negative values are not allowed in probability distributions");
}
if (b[i] === 0) {
throw new Error(
"Division by zero encountered in KL divergence calculation"
);
}
divergence += a[i] * Math.log(a[i] / b[i]);
}

return divergence;
};

/**
* @brief Computes the jensenshannon similarity coefficient between two vectors.
* @param {Float64Array|Float32Array} a - The first vector.
* @param {Float64Array|Float32Array} b - The second vector.
* @returns {number} The Jaccard similarity coefficient between vectors a and b.
* @brief Computes the Jensen-Shannon distance between two probability distributions.
* @param {Float64Array|Float32Array} a - The first probability distribution.
* @param {Float64Array|Float32Array} b - The second probability distribution.
* @returns {number} The Jensen-Shannon distance between distributions a and b.
*/
export const jensenshannon = (a: Float64Array | Float32Array, b: Float64Array | Float32Array): number => {
if (a.length !== b.length) {
throw new Error("Arrays must be of the same length");
}

const m = a.map((value, index) => (value + b[index]) / 2);
let divergence = 0;
for (let i = 0; i < a.length; i++) {
if (a[i] < 0 || b[i] < 0) {
throw new Error("Negative values are not allowed in probability distributions");
}
const m = (a[i] + b[i]) / 2;
if (m > 0) {
if (a[i] > 0) divergence += a[i] * Math.log(a[i] / m);
if (b[i] > 0) divergence += b[i] * Math.log(b[i] / m);
}
}

const divergence = 0.5 * kullbackleibler(a, m) + 0.5 * kullbackleibler(b, m);
divergence /= 2;
return Math.sqrt(divergence);
};


export default {
sqeuclidean,
euclidean,
Expand Down
19 changes: 10 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
"devDependencies": {
"@types/bindings": "^1.5.5",
"@types/node": "^20.17.1",
"@types/node": "^20.17.6",
"node-gyp": "^10.0.1",
"prebuildify": "^6.0.0",
"typescript": "^5.3.3"
Expand Down
10 changes: 5 additions & 5 deletions rust/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -877,19 +877,19 @@ mod tests {
// Adding new tests for probability similarities
#[test]
fn test_js_f32() {
let a = &[0.1, 0.9, 0.0];
let b = &[0.2, 0.8, 0.0];
let a: &[f32; 3] = &[0.1, 0.9, 0.0];
let b: &[f32; 3] = &[0.2, 0.8, 0.0];

if let Some(result) = ProbabilitySimilarity::jensenshannon(a, b) {
println!("The result of js_f32 is {:.8}", result);
assert_almost_equal(0.01, result, 0.01); // Example value
assert_almost_equal(0.099, result, 0.01); // Example value
}
}

#[test]
fn test_kl_f32() {
let a = &[0.1, 0.9, 0.0];
let b = &[0.2, 0.8, 0.0];
let a: &[f32; 3] = &[0.1, 0.9, 0.0];
let b: &[f32; 3] = &[0.2, 0.8, 0.0];

if let Some(result) = ProbabilitySimilarity::kullbackleibler(a, b) {
println!("The result of kl_f32 is {:.8}", result);
Expand Down
Loading

0 comments on commit 6e19cd8

Please sign in to comment.