bloomfilter
Advanced tools
Comparing version
@@ -24,3 +24,3 @@ (function(exports) { | ||
if (typedArrays) { | ||
const kbytes = 1 << Math.ceil(Math.log(Math.ceil(Math.log(m) / Math.LN2 / 8)) / Math.LN2); | ||
const kbytes = 1 << Math.ceil(Math.log2(Math.ceil(Math.log2(m) / 8))); | ||
const array = kbytes === 1 ? Uint8Array : kbytes === 2 ? Uint16Array : Uint32Array; | ||
@@ -57,4 +57,4 @@ const kbuffer = new ArrayBuffer(kbytes * k); | ||
const r = this._locations; | ||
let x; | ||
let y; | ||
let a; | ||
let b; | ||
@@ -80,17 +80,20 @@ // FNV-1a hash (64-bit). | ||
x = (v3 << 16) | v2; | ||
y = (v1 << 16) | v0; | ||
a = (v3 << 16) | v2; | ||
b = (v1 << 16) | v0; | ||
} | ||
x = (x % m); | ||
if (x < 0) x += m; | ||
y = (y % m); | ||
if (y < 0) y += m; | ||
a = (a % m); | ||
if (a < 0) a += m; | ||
b = (b % m); | ||
if (b < 0) b += m; | ||
// Force y to be odd to avoid catastrophic collisions where y is zero or divides m exactly (assuming m is a power of two). | ||
y |= 1; | ||
for (let i = 0; i < k; ++i) { | ||
r[i] = x; | ||
x = (x + y) % m; | ||
y = (y + i + 1) % m; | ||
// Use enhanced double hashing, i.e. r[i] = h1(v) + i*h2(v) + (i*i*i - i)/6 | ||
// Reference: | ||
// Dillinger, Peter C., and Panagiotis Manolios. "Bloom filters in probabilistic verification." | ||
// https://www.khoury.northeastern.edu/~pete/pub/bloom-filters-verification.pdf | ||
r[0] = a; | ||
for (let i = 1; i < k; ++i) { | ||
a = (a + b) % m; | ||
b = (b + i) % m; | ||
r[i] = a; | ||
} | ||
@@ -124,2 +127,6 @@ return r; | ||
BloomFilter.prototype.size = function() { | ||
return -this.m * Math.log(1 - this.countBits() / this.m) / this.k; | ||
}; | ||
BloomFilter.prototype.countBits = function() { | ||
const buckets = this.buckets; | ||
@@ -130,5 +137,39 @@ let bits = 0; | ||
} | ||
return -this.m * Math.log(1 - bits / this.m) / this.k; | ||
return bits; | ||
}; | ||
BloomFilter.prototype.error = function() { | ||
return Math.pow(this.countBits() / this.m, this.k); | ||
}; | ||
BloomFilter.union = function(a, b) { | ||
if (a.m === b.m && a.k === b.k) { | ||
const l = a.m >> 5; | ||
const c = typedArrays ? new Int32Array(l) : new Array(l); | ||
for (let i = 0; i < l; ++i) { | ||
c[i] = a.buckets[i] | b.buckets[i]; | ||
} | ||
return new BloomFilter(c, a.k); | ||
} | ||
throw new Error("Bloom filters must have identical {m, k}."); | ||
} | ||
BloomFilter.intersection = function(a, b) { | ||
if (a.m === b.m && a.k === b.k) { | ||
const l = a.m >> 5; | ||
const c = typedArrays ? new Int32Array(l) : new Array(l); | ||
for (let i = 0; i < l; ++i) { | ||
c[i] = a.buckets[i] & b.buckets[i]; | ||
} | ||
return new BloomFilter(c, a.k); | ||
} | ||
throw new Error("Bloom filters must have identical {m, k}."); | ||
} | ||
BloomFilter.withTargetError = function(n, error) { | ||
const m = Math.ceil(-n * Math.log2(error) / Math.LN2); | ||
const k = Math.ceil(Math.LN2 * m / n); | ||
return new BloomFilter(m, k); | ||
}; | ||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel | ||
@@ -135,0 +176,0 @@ function popcnt(v) { |
{ | ||
"name": "bloomfilter", | ||
"version": "0.0.20", | ||
"version": "0.0.21", | ||
"license": "BSD-3-Clause", | ||
@@ -5,0 +5,0 @@ "description": "Fast bloom filter in JavaScript.", |
@@ -11,3 +11,3 @@ Bloom Filter | ||
```javascript | ||
var bloom = new BloomFilter( | ||
const bloom = new BloomFilter( | ||
32 * 256, // number of bits to allocate. | ||
@@ -30,4 +30,4 @@ 16 // number of hash functions. | ||
// so we convert to a normal array first. | ||
var array = [].slice.call(bloom.buckets), | ||
json = JSON.stringify(array); | ||
const array = [].slice.call(bloom.buckets); | ||
const json = JSON.stringify(array); | ||
@@ -37,3 +37,7 @@ // Deserialisation. Note that the any array-like object is supported, but | ||
// performance. | ||
var bloom = new BloomFilter(array, 16); | ||
const bloom = new BloomFilter(array, 16); | ||
// Automatically pick {m, k} based on number of elements and target false | ||
// positive error rate. | ||
const bloom = BloomFilter.withTargetError(1_000_000, 1e-6); | ||
``` | ||
@@ -45,5 +49,5 @@ | ||
Although the bloom filter requires *k* hash functions, we can simulate this | ||
using only *three* hash functions. In fact, we get the second and third hash | ||
functions almost for free by iterating once more on the first hash using the | ||
FNV hash algorithm, and another time to get the third. | ||
using enhanced double hashing with a single 64-bit FNV-1a hash computation for | ||
performance. The 64-bit hash is split into two 32-bit halves to obtain the two | ||
independent hash functions required for enhanced double hashing. | ||
@@ -50,0 +54,0 @@ Thanks to Will Fitzgerald for his [help and inspiration][2] with the hashing |
8997
19.23%157
29.75%55
7.84%