Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bloom filter #10

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ build/Release
# Dependency directory
# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
node_modules

test/data/
test/utils/
78 changes: 78 additions & 0 deletions bloomFilter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
'use strict';

class BloomFilter{

initialize(){
return Promise.resolve();
}

dispose(){
throw new Error('`dispose` not implemented');
}

add(key){
throw new Error('`add` not implemented');
}

has(key){
throw new Error('`has` not implemented');
}

/**
* From http://murmurhash.googlepages.com/
*/
MurmurHash (data, offset, seed) {
let len = data.length,
m = 0x5bd1e995,
r = 24,
h = seed ^ len,
len_4 = len >> 2;

for (let i = 0; i < len_4; i++) {
let i_4 = (i << 2) + offset,
k = data[i_4 + 3];

k = k << 8;
k = k | (data[i_4 + 2] & 0xff);
k = k << 8;
k = k | (data[i_4 + 1] & 0xff);
k = k << 8;
k = k | (data[i_4 + 0] & 0xff);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}

// avoid calculating modulo
let len_m = len_4 << 2,
left = len - len_m,
i_m = len_m + offset;

if (left != 0) {
if (left >= 3) {
h ^= data[i_m + 2] << 16;
}
if (left >= 2) {
h ^= data[i_m + 1] << 8;
}
if (left >= 1) {
h ^= data[i_m];
}

h *= m;
}

h ^= h >>> 13;
h *= m;
h ^= h >>> 15;

return h;
}


}


module.exports = BloomFilter;
86 changes: 70 additions & 16 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,50 @@ const URL = require('node-url-utils');
*/

function seenreq(options) {
let Repo = null;
const Normalizers = [];

options = options || {};
if(!options.repo || options.repo==='default' || options.repo==='memory'){
Repo = require('./lib/repo/default.js');
}else{
const moduleName = `seenreq-repo-${options.repo}`;
try{
Repo = require(moduleName);
}catch(e){
console.error(`\nCannot load module ${moduleName}, please run 'npm install ${moduleName}' and retry\n`);
throw e;
if(!options.type || options.type ==='default' || options.type === 'key-value'){
let Repo = null;

if(!options.repo || options.repo==='default' || options.repo==='memory'){
Repo = require('./lib/repo/default.js');
}else{
const moduleName = `seenreq-repo-${options.repo}`;
try{
Repo = require(moduleName);
}catch(e){
console.error(`\nCannot load module ${moduleName}, please run 'npm install ${moduleName}' and retry\n`);
throw e;
}
}
this.repo = new Repo(options);
}
else if( options.type==='bloomFilter'){

const BloomFilter = require('./lib/bloomFilter/default.js');

// default values
let maxKeys = 100000000;
let errorRate = 0.000001;

if(options.bloomFilter){
const tempMaxKeys = Number(options.bloomFilter.maxKeys);
const tempErrorRate = Number(options.bloomFilter.errorRate);

if(tempMaxKeys <= 0 || tempErrorRate >= 1 || tempErrorRate <= 0){
throw new Error(`Wrong setting for the Bloom Filter!`);
}
maxKeys = tempMaxKeys;
maxKey = tempErrorRate;
}

this.bloomFilter = new BloomFilter(maxKeys,errorRate);

}else{
throw new Error(`Cannot find type ${options.type}, please choose 'bloomFilter' or 'key-value'.`);
}


this.repo = new Repo(options);

const Normalizers = [];
if(!options.normalizer){
Normalizers.push(require('./lib/normalizer/default.js'));
}else{
Expand Down Expand Up @@ -56,6 +82,9 @@ function seenreq(options) {
* @return Promise if there is no callback
*/
seenreq.prototype.initialize = function(){
if(this.globalOptions.type === 'bloomFilter')
return this.bloomFilter.initialize();

return this.repo.initialize();
};

Expand Down Expand Up @@ -89,7 +118,8 @@ seenreq.prototype.normalize = function(req, options) {
[normalizedRequest.method, URL.normalize(normalizedRequest.uri, options)].join(' '), normalizedRequest.body
].join('\r\n');

const requestArgsSet = new Set(['uri','url','qs','method','headers','body','form','json','multipart','followRedirect','followAllRedirects', 'maxRedirects','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever']);
const requestArgsSet =
new Set(['uri','url','qs','method','headers','body','form','json','multipart','followRedirect','followAllRedirects', 'maxRedirects','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever']);

Object.keys(normalizedRequest).filter(key => !requestArgsSet.has(key) ).forEach(key=>options[key]=normalizedRequest[key]);
return {sign,options};
Expand All @@ -103,12 +133,36 @@ seenreq.prototype.exists = function(req, options) {
if (!(req instanceof Array)) {
req = [req];
}

const rs = req.map(r=>this.normalize(r,options));
if(this.globalOptions.type === 'bloomFilter'){
const result = [];
rs.forEach(item => {

const rupdate = item.options.rupdate;
delete item.options.rupdate;

if(this.bloomFilter.has(JSON.stringify(item))){
result.push(true);
}else{
result.push(false);
if(rupdate !== false){
this.bloomFilter.add(JSON.stringify(item));
}

}
});

return result.length == 1 ? result[0] : result;
}

return this.repo.exists(rs, options).then( rst => rst.length == 1 ? rst[0] : rst);
};

seenreq.prototype.dispose = function() {
if(this.globalOptions.type === 'bloomFilter')
return this.bloomFilter.dispose();

return this.repo.dispose();
};

Expand Down
84 changes: 84 additions & 0 deletions lib/bloomFilter/default.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
'use strict';
const crypto = require('crypto');
const BloomFilter = require('../../bloomFilter.js');

class DefaultBloomFilter extends BloomFilter{
constructor(maxKeys,errorRate){
super();
this.bitMap = [];
this.maxKeys = maxKeys;
this.errorRate = errorRate;

/* https://developer.aliyun.com/article/3607 */
this.bitSize = Math.ceil(maxKeys * (-Math.log(errorRate) / (Math.log(2) * Math.log(2))));
this.hashCount = Math.ceil(Math.log(2) * (this.bitSize / maxKeys));

this.keyCount = 0;
}

setBit(bit){
let numArr = Math.floor(bit / 31),
numBit = Math.floor(bit % 31);
this.bitMap[numArr] |= (1<<numBit);

return Promise.resolve();
}

getBit(bit){
let numArr = Math.floor(bit / 31),
numBit = Math.floor(bit % 31);
return this.bitMap[numArr] &= (1<<numBit);
}

has(_key){
const key = this.transformKey(_key);
let hash1 = this.MurmurHash(key, 0, 0),
hash2 = this.MurmurHash(key, 0, hash1);

for (let i = 0; i < this.hashCount; i++) {
if (!this.getBit(Math.abs(Math.floor((hash1 + i * hash2) % (this.bitSize))))) {
return false;
}
}

return true;
}

add(_key){
const key = this.transformKey(_key);
if (this.has(key)) {
return -1;
}

let hash1 = this.MurmurHash(key, 0, 0),
hash2 = this.MurmurHash(key, 0, hash1);

for (let i = 0; i < this.hashCount; i++) {
this.setBit(Math.abs(Math.floor((hash1 + i * hash2) % (this.bitSize))));
}

this.keyCount++;
}

transformKey(key){
const hash = (str) => {
const hashFn = crypto.createHash('md5');
hashFn.update(str);
return hashFn.digest('hex');
};

return hash(key);
}

dispose(callback) {
this.bitMap = null;
if(callback){
callback();
}else{
return Promise.resolve();
}
}

}

module.exports = DefaultBloomFilter;
Loading