@ably/bloomit
Advanced tools
Comparing version 1.3.1 to 1.4.0
@@ -16,3 +16,2 @@ import { HashableInput } from './utils'; | ||
private _length; | ||
private _seed; | ||
/** | ||
@@ -22,5 +21,4 @@ * Constructor | ||
* @param nbHashes - The number of hash functions used | ||
* @param seed - The filter seed. | ||
*/ | ||
constructor(size: number, nbHashes: number, seed?: number); | ||
constructor(size: number, nbHashes: number); | ||
/** | ||
@@ -32,3 +30,3 @@ * Create an optimal bloom filter providing the maximum of elements stored and the error rate desired | ||
*/ | ||
static create(nbItems: number, errorRate: number, seed?: number): BloomFilter; | ||
static create(nbItems: number, errorRate: number): BloomFilter; | ||
/** | ||
@@ -43,3 +41,3 @@ * Build a new Bloom Filter from an existing iterable with a fixed error rate | ||
*/ | ||
static from(items: Iterable<HashableInput>, errorRate: number, seed?: number): BloomFilter; | ||
static from(items: Iterable<HashableInput>, errorRate: number): BloomFilter; | ||
/** | ||
@@ -63,7 +61,2 @@ * Generate a bloom filter from a binary export. | ||
/** | ||
* Get the seed of the filter | ||
* @return The filter seed | ||
*/ | ||
get seed(): number; | ||
/** | ||
* Get the hash count of the filter | ||
@@ -70,0 +63,0 @@ * @return The filter hash count |
@@ -43,7 +43,4 @@ /* file : bloom-filter.ts | ||
* @param nbHashes - The number of hash functions used | ||
* @param seed - The filter seed. | ||
*/ | ||
constructor(size, nbHashes, seed = 0x1234567890) { | ||
const positiveIntSeed = Math.abs(Math.ceil(seed)); | ||
this._seed = positiveIntSeed; | ||
constructor(size, nbHashes) { | ||
this._size = size; | ||
@@ -60,6 +57,6 @@ this._nbHashes = nbHashes; | ||
*/ | ||
static create(nbItems, errorRate, seed = 0x1234567890) { | ||
static create(nbItems, errorRate) { | ||
const size = formulas_1.optimalFilterSize(nbItems, errorRate); | ||
const hashes = formulas_1.optimalHashes(size, nbItems); | ||
return new BloomFilter(size, hashes, seed); | ||
return new BloomFilter(size, hashes); | ||
} | ||
@@ -75,5 +72,5 @@ /** | ||
*/ | ||
static from(items, errorRate, seed = 0x1234567890) { | ||
static from(items, errorRate) { | ||
const array = Array.from(items); | ||
const filter = BloomFilter.create(array.length, errorRate, seed); | ||
const filter = BloomFilter.create(array.length, errorRate); | ||
array.forEach(element => filter.add(element)); | ||
@@ -89,8 +86,7 @@ return filter; | ||
static import(binaryBloomFilter) { | ||
const seedArray = binaryBloomFilter.slice(0, 8); | ||
const nbHashesArray = binaryBloomFilter.slice(8, 16); | ||
const lengthArray = binaryBloomFilter.slice(16, 24); | ||
const sizeArray = binaryBloomFilter.slice(24, 32); | ||
const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length); | ||
const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray), encoding_1.uint8ArrayToInt64(seedArray)); | ||
const nbHashesArray = binaryBloomFilter.slice(0, 8); | ||
const lengthArray = binaryBloomFilter.slice(8, 16); | ||
const sizeArray = binaryBloomFilter.slice(16, 24); | ||
const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length); | ||
const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray)); | ||
bloomFilter._length = encoding_1.uint8ArrayToInt64(lengthArray); | ||
@@ -115,9 +111,2 @@ bloomFilter._filter = filterArray; | ||
/** | ||
* Get the seed of the filter | ||
* @return The filter seed | ||
*/ | ||
get seed() { | ||
return this._seed; | ||
} | ||
/** | ||
* Get the hash count of the filter | ||
@@ -137,3 +126,3 @@ * @return The filter hash count | ||
add(element) { | ||
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed); | ||
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes); | ||
for (let i = 0; i < indexes.length; i++) { | ||
@@ -158,3 +147,3 @@ const indexToEdit = utils_1.getByteIndexInArray(indexes[i]); | ||
has(element) { | ||
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed); | ||
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes); | ||
for (let i = 0; i < indexes.length; i++) { | ||
@@ -196,8 +185,7 @@ if (!utils_1.getBitAtIndex(this._filter, indexes[i])) { | ||
export() { | ||
const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters | ||
exportArray.set(encoding_1.int64ToUint8Array(this.seed), 0); | ||
exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 8); | ||
exportArray.set(encoding_1.int64ToUint8Array(this.length), 16); | ||
exportArray.set(encoding_1.int64ToUint8Array(this.size), 24); | ||
const exportArrayStartIndex = 32; | ||
const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters | ||
exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 0); | ||
exportArray.set(encoding_1.int64ToUint8Array(this.length), 8); | ||
exportArray.set(encoding_1.int64ToUint8Array(this.size), 16); | ||
const exportArrayStartIndex = 24; | ||
for (let index = 0; index < this._filter.length; index += 1) { | ||
@@ -204,0 +192,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index]; |
@@ -23,3 +23,2 @@ /// <reference types="node" /> | ||
* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values. | ||
* @param seed the seed used for hashing | ||
* @return The results of the hash functions applied to the value (in hex or integer) | ||
@@ -31,16 +30,2 @@ * @memberof Utils | ||
/** | ||
* Apply Double Hashing to produce a n-hash | ||
* | ||
* This implementation used directly the value produced by the two hash functions instead of the functions themselves. | ||
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing. | ||
* @param n - The indice of the hash function we want to produce | ||
* @param hashA - The result of the first hash function applied to a value. | ||
* @param hashB - The result of the second hash function applied to a value. | ||
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter) | ||
* @return The result of hash_n applied to a value. | ||
* @memberof Utils | ||
* @author Thomas Minier | ||
*/ | ||
export declare function doubleHashing(n: number, hashA: number, hashB: number, size: number): number; | ||
/** | ||
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique | ||
@@ -50,7 +35,9 @@ * @param element - The element to hash | ||
* @param number - The number of indexes desired | ||
* @param seed - The seed used | ||
* @param maxIterations - throw if we exceed this without finding a result of the | ||
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults | ||
* to size*100 | ||
* @return A array of indexes | ||
* @author Arnaud Grall | ||
* @author Arnaud Grall, Simon Woolf | ||
*/ | ||
export declare function getDistinctIndices(element: HashableInput, size: number, number: number, seed?: number): Array<number>; | ||
export declare function getDistinctIndices(element: HashableInput, size: number, number: number, maxIterations?: number): Array<number>; | ||
/** | ||
@@ -57,0 +44,0 @@ * Return the amount of bytes needed to fit the input bits |
/* file : utils.ts | ||
MIT License | ||
Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall | ||
Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall | ||
Bloomit changes: Copyright 2021 Kolja Blauhut | ||
Ably-forks/bloomit changes: Copyright 2021 Simon Woolf | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
@@ -45,4 +49,6 @@ of this software and associated documentation files (the "Software"), to deal | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.doubleHashing = exports.hashTwice = void 0; | ||
exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.hashTwice = void 0; | ||
const farmhash = __importStar(require("farmhash")); | ||
const PRIME_STR = "'"; | ||
const PRIME_BUF = Buffer.from(PRIME_STR); | ||
/** | ||
@@ -52,3 +58,2 @@ * (64-bits only) Hash a value into two values (in hex or integer format) | ||
* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values. | ||
* @param seed the seed used for hashing | ||
* @return The results of the hash functions applied to the value (in hex or integer) | ||
@@ -69,21 +74,2 @@ * @memberof Utils | ||
/** | ||
* Apply Double Hashing to produce a n-hash | ||
* | ||
* This implementation used directly the value produced by the two hash functions instead of the functions themselves. | ||
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing. | ||
* @param n - The indice of the hash function we want to produce | ||
* @param hashA - The result of the first hash function applied to a value. | ||
* @param hashB - The result of the second hash function applied to a value. | ||
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter) | ||
* @return The result of hash_n applied to a value. | ||
* @memberof Utils | ||
* @author Thomas Minier | ||
*/ | ||
function doubleHashing(n, hashA, hashB, size) { | ||
// Cubic term avoids increased-probability-of-collision issue, see | ||
// http://peterd.org/pcd-diss.pdf s.6.5.4 | ||
return Math.abs((hashA + n * hashB + Math.floor((n ** 3 - n) / 6)) % size); | ||
} | ||
exports.doubleHashing = doubleHashing; | ||
/** | ||
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique | ||
@@ -93,16 +79,35 @@ * @param element - The element to hash | ||
* @param number - The number of indexes desired | ||
* @param seed - The seed used | ||
* @param maxIterations - throw if we exceed this without finding a result of the | ||
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults | ||
* to size*100 | ||
* @return A array of indexes | ||
* @author Arnaud Grall | ||
* @author Arnaud Grall, Simon Woolf | ||
*/ | ||
function getDistinctIndices(element, size, number, seed) { | ||
const { first, second } = hashTwice(element); | ||
function getDistinctIndices(element, size, number, maxIterations = size * 100) { | ||
let { first, second } = hashTwice(element); | ||
let index, i = 1; | ||
const indices = []; | ||
let n = 0; | ||
// Implements enhanced double hashing algorithm from | ||
// http://peterd.org/pcd-diss.pdf s.6.5.4 | ||
while (indices.length < number) { | ||
const index = doubleHashing(n, first, second, size); | ||
index = first % size; | ||
if (!indices.includes(index)) { | ||
indices.push(index); | ||
} | ||
n++; | ||
first = (first + second) % size; | ||
second = (second + i) % size; | ||
i++; | ||
if (i > size) { | ||
// Enhanced double hashing stops cycles of length less than `size` in the case where | ||
// size is coprime with the second hash. But you still get cycles of length `size`. | ||
// So if we reach there and haven't finished, append a prime to the input and | ||
// rehash. | ||
element = Buffer.isBuffer(element) | ||
? Buffer.concat([element, PRIME_BUF]) | ||
: element + PRIME_STR; | ||
({ first, second } = hashTwice(element)); | ||
} | ||
if (maxIterations && i > maxIterations) { | ||
throw new Error('max iterations exceeded'); | ||
} | ||
} | ||
@@ -109,0 +114,0 @@ return indices; |
{ | ||
"name": "@ably/bloomit", | ||
"version": "1.3.1", | ||
"version": "1.4.0", | ||
"description": "Space efficient bloom filter based on the bloom-filters npm package.", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
@@ -54,3 +54,2 @@ /* file : bloom-filter.ts | ||
private _length: number; | ||
private _seed: number; | ||
@@ -61,7 +60,4 @@ /** | ||
* @param nbHashes - The number of hash functions used | ||
* @param seed - The filter seed. | ||
*/ | ||
constructor(size: number, nbHashes: number, seed: number = 0x1234567890) { | ||
const positiveIntSeed = Math.abs(Math.ceil(seed)); | ||
this._seed = positiveIntSeed; | ||
constructor(size: number, nbHashes: number) { | ||
this._size = size; | ||
@@ -82,7 +78,6 @@ this._nbHashes = nbHashes; | ||
errorRate: number, | ||
seed: number = 0x1234567890 | ||
): BloomFilter { | ||
const size = optimalFilterSize(nbItems, errorRate); | ||
const hashes = optimalHashes(size, nbItems); | ||
return new BloomFilter(size, hashes, seed); | ||
return new BloomFilter(size, hashes); | ||
} | ||
@@ -102,6 +97,5 @@ | ||
errorRate: number, | ||
seed: number = 0x1234567890 | ||
): BloomFilter { | ||
const array = Array.from(items); | ||
const filter = BloomFilter.create(array.length, errorRate, seed); | ||
const filter = BloomFilter.create(array.length, errorRate); | ||
array.forEach(element => filter.add(element)); | ||
@@ -118,12 +112,10 @@ return filter; | ||
static import(binaryBloomFilter: Uint8Array): BloomFilter { | ||
const seedArray = binaryBloomFilter.slice(0, 8); | ||
const nbHashesArray = binaryBloomFilter.slice(8, 16); | ||
const lengthArray = binaryBloomFilter.slice(16, 24); | ||
const sizeArray = binaryBloomFilter.slice(24, 32); | ||
const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length); | ||
const nbHashesArray = binaryBloomFilter.slice(0, 8); | ||
const lengthArray = binaryBloomFilter.slice(8, 16); | ||
const sizeArray = binaryBloomFilter.slice(16, 24); | ||
const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length); | ||
const bloomFilter = new BloomFilter( | ||
uint8ArrayToInt64(sizeArray), | ||
uint8ArrayToInt64(nbHashesArray), | ||
uint8ArrayToInt64(seedArray) | ||
uint8ArrayToInt64(nbHashesArray) | ||
); | ||
@@ -153,10 +145,2 @@ | ||
/** | ||
* Get the seed of the filter | ||
* @return The filter seed | ||
*/ | ||
get seed(): number { | ||
return this._seed; | ||
} | ||
/** | ||
* Get the hash count of the filter | ||
@@ -180,4 +164,3 @@ * @return The filter hash count | ||
this._size, | ||
this._nbHashes, | ||
this._seed | ||
this._nbHashes | ||
); | ||
@@ -208,4 +191,3 @@ | ||
this._size, | ||
this._nbHashes, | ||
this._seed | ||
this._nbHashes | ||
); | ||
@@ -256,9 +238,8 @@ for (let i = 0; i < indexes.length; i++) { | ||
export(): Uint8Array { | ||
const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters | ||
exportArray.set(int64ToUint8Array(this.seed), 0); | ||
exportArray.set(int64ToUint8Array(this.nbHashes), 8); | ||
exportArray.set(int64ToUint8Array(this.length), 16); | ||
exportArray.set(int64ToUint8Array(this.size), 24); | ||
const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters | ||
exportArray.set(int64ToUint8Array(this.nbHashes), 0); | ||
exportArray.set(int64ToUint8Array(this.length), 8); | ||
exportArray.set(int64ToUint8Array(this.size), 16); | ||
const exportArrayStartIndex = 32; | ||
const exportArrayStartIndex = 24; | ||
for (let index = 0; index < this._filter.length; index += 1) { | ||
@@ -265,0 +246,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index]; |
/* file : utils.ts | ||
MIT License | ||
Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall | ||
Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall | ||
Bloomit changes: Copyright 2021 Kolja Blauhut | ||
Ably-forks/bloomit changes: Copyright 2021 Simon Woolf | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
@@ -29,2 +33,5 @@ of this software and associated documentation files (the "Software"), to deal | ||
const PRIME_STR = "'"; | ||
const PRIME_BUF = Buffer.from(PRIME_STR); | ||
/** | ||
@@ -55,3 +62,2 @@ * Utilitaries functions | ||
* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values. | ||
* @param seed the seed used for hashing | ||
* @return The results of the hash functions applied to the value (in hex or integer) | ||
@@ -74,26 +80,2 @@ * @memberof Utils | ||
/** | ||
* Apply Double Hashing to produce a n-hash | ||
* | ||
* This implementation used directly the value produced by the two hash functions instead of the functions themselves. | ||
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing. | ||
* @param n - The indice of the hash function we want to produce | ||
* @param hashA - The result of the first hash function applied to a value. | ||
* @param hashB - The result of the second hash function applied to a value. | ||
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter) | ||
* @return The result of hash_n applied to a value. | ||
* @memberof Utils | ||
* @author Thomas Minier | ||
*/ | ||
export function doubleHashing( | ||
n: number, | ||
hashA: number, | ||
hashB: number, | ||
size: number | ||
): number { | ||
// Cubic term avoids increased-probability-of-collision issue, see | ||
// http://peterd.org/pcd-diss.pdf s.6.5.4 | ||
return Math.abs((hashA + n*hashB + Math.floor((n**3 - n)/6)) % size); | ||
} | ||
/** | ||
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique | ||
@@ -103,5 +85,7 @@ * @param element - The element to hash | ||
* @param number - The number of indexes desired | ||
* @param seed - The seed used | ||
* @param maxIterations - throw if we exceed this without finding a result of the | ||
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults | ||
* to size*100 | ||
* @return A array of indexes | ||
* @author Arnaud Grall | ||
* @author Arnaud Grall, Simon Woolf | ||
*/ | ||
@@ -112,15 +96,34 @@ export function getDistinctIndices( | ||
number: number, | ||
seed?: number | ||
maxIterations: number = size * 100 | ||
): Array<number> { | ||
const {first, second} = hashTwice(element); | ||
const indices: Array<number> = []; | ||
let n = 0; | ||
while(indices.length < number) { | ||
const index = doubleHashing(n, first, second, size); | ||
if(!indices.includes(index)) { | ||
indices.push(index); | ||
} | ||
n++; | ||
} | ||
return indices; | ||
let {first, second} = hashTwice(element); | ||
let index, i = 1; | ||
const indices: Array<number> = []; | ||
// Implements enhanced double hashing algorithm from | ||
// http://peterd.org/pcd-diss.pdf s.6.5.4 | ||
while(indices.length < number) { | ||
index = first % size; | ||
if(!indices.includes(index)) { | ||
indices.push(index); | ||
} | ||
first = (first + second) % size; | ||
second = (second + i) % size; | ||
i++; | ||
if(i > size) { | ||
// Enhanced double hashing stops cycles of length less than `size` in the case where | ||
// size is coprime with the second hash. But you still get cycles of length `size`. | ||
// So if we reach there and haven't finished, append a prime to the input and | ||
// rehash. | ||
element = Buffer.isBuffer(element) | ||
? Buffer.concat([element, PRIME_BUF]) | ||
: element + PRIME_STR; | ||
({first, second} = hashTwice(element)); | ||
} | ||
if(maxIterations && i > maxIterations) { | ||
throw new Error('max iterations exceeded'); | ||
} | ||
} | ||
return indices; | ||
} | ||
@@ -127,0 +130,0 @@ |
@@ -32,7 +32,6 @@ /* file : bloom-filter-test.js | ||
const targetRate = 0.1; | ||
const seed = Math.random(); | ||
describe('construction', () => { | ||
it('should add element to the filter with #add', () => { | ||
const filter = BloomFilter.create(15, targetRate, seed); | ||
const filter = BloomFilter.create(15, targetRate); | ||
filter.add('alice'); | ||
@@ -100,6 +99,6 @@ filter.add('bob'); | ||
describe('#export', () => { | ||
const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate, seed); | ||
const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate); | ||
it('should export a bloom filter to a Uint8Array', () => { | ||
const exported = filter.export(); | ||
exported.length.should.equal(filter._filter.length + 4 * 8); | ||
exported.length.should.equal(filter._filter.length + 3 * 8); | ||
}); | ||
@@ -110,3 +109,2 @@ | ||
const newFilter = BloomFilter.import(exported); | ||
newFilter._seed.should.equal(filter._seed); | ||
newFilter.size.should.equal(filter.size); | ||
@@ -113,0 +111,0 @@ newFilter.length.should.equal(filter.length); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
24
56597
1248