bloom-filters
Advanced tools
Comparing version 2.0.0 to 3.0.0
@@ -17,1 +17,2 @@ export { default as BaseFilter } from './base-filter'; | ||
export { default as DeprecatedHashing } from './hashing/deprecated_hashing'; | ||
export { default as ScalableBloomFilter } from './bloom/scalable-bloom-filter'; |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : api.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
@@ -30,3 +30,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.DeprecatedHashing = exports.Hashing = exports.Cell = exports.InvertibleBloomFilter = exports.CuckooFilter = exports.MinHashFactory = exports.MinHash = exports.TopK = exports.HyperLogLog = exports.CountMinSketch = exports.PartitionedBloomFilter = exports.CountingBloomFilter = exports.XorFilter = exports.BitSet = exports.BloomFilter = exports.BaseFilter = void 0; | ||
exports.ScalableBloomFilter = exports.DeprecatedHashing = exports.Hashing = exports.Cell = exports.InvertibleBloomFilter = exports.CuckooFilter = exports.MinHashFactory = exports.MinHash = exports.TopK = exports.HyperLogLog = exports.CountMinSketch = exports.PartitionedBloomFilter = exports.CountingBloomFilter = exports.XorFilter = exports.BitSet = exports.BloomFilter = exports.BaseFilter = void 0; | ||
var base_filter_1 = require("./base-filter"); | ||
@@ -64,1 +64,3 @@ Object.defineProperty(exports, "BaseFilter", { enumerable: true, get: function () { return __importDefault(base_filter_1).default; } }); | ||
Object.defineProperty(exports, "DeprecatedHashing", { enumerable: true, get: function () { return __importDefault(deprecated_hashing_1).default; } }); | ||
var scalable_bloom_filter_1 = require("./bloom/scalable-bloom-filter"); | ||
Object.defineProperty(exports, "ScalableBloomFilter", { enumerable: true, get: function () { return __importDefault(scalable_bloom_filter_1).default; } }); |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : base-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
@@ -27,0 +27,0 @@ return (mod && mod.__esModule) ? mod : { "default": mod }; |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : bloom-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -212,5 +212,4 @@ var extendStatics = function (d, b) { | ||
(0, exportable_1.Field)(function (f) { return f.export(); }, function (data) { | ||
// create the bitset from new and old array-based exported structure | ||
if (Array.isArray(data)) { | ||
// Create the bitset from new and old exported structure | ||
// create a new BitSet from the specified array | ||
var bs_1 = new bit_set_1.default(data.length); | ||
@@ -217,0 +216,0 @@ data.forEach(function (val, index) { |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : counting-bloom-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -27,0 +27,0 @@ var extendStatics = function (d, b) { |
import BaseFilter from '../base-filter'; | ||
import ClassicFilter from '../interfaces/classic-filter'; | ||
import { HashableInput } from '../utils'; | ||
import BitSet from './bit-set'; | ||
/** | ||
@@ -22,5 +23,4 @@ * A Partitioned Bloom Filter is a variation of a classic Bloom filter. | ||
_m: number; | ||
_filter: Array<Array<number>>; | ||
_filter: Array<BitSet>; | ||
_capacity: number; | ||
_length: number; | ||
/** | ||
@@ -63,6 +63,2 @@ * Constructor | ||
/** | ||
* Get the number of elements currently in the filter | ||
*/ | ||
get length(): number; | ||
/** | ||
* Get the filter's load factor | ||
@@ -69,0 +65,0 @@ */ |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : partitioned-bloom-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -60,2 +60,3 @@ var extendStatics = function (d, b) { | ||
var utils_1 = require("../utils"); | ||
var bit_set_1 = __importDefault(require("./bit-set")); | ||
/** | ||
@@ -123,5 +124,3 @@ * Return the optimal number of hashes needed for a given error rate and load factor | ||
_this._m = Math.ceil(_this._size / _this._nbHashes); | ||
_this._filter = (0, utils_1.allocateArray)(_this._nbHashes, function () { | ||
return (0, utils_1.allocateArray)(_this._m, 0); | ||
}); | ||
_this._filter = (0, utils_1.allocateArray)(_this._nbHashes, function () { return new bit_set_1.default(_this._m); }); | ||
_this._capacity = | ||
@@ -131,3 +130,2 @@ capacity !== undefined | ||
: computeNumberOfItems(_this._size, loadFactor, nbHashes); | ||
_this._length = 0; | ||
return _this; | ||
@@ -187,12 +185,2 @@ } | ||
}); | ||
Object.defineProperty(PartitionedBloomFilter.prototype, "length", { | ||
/** | ||
* Get the number of elements currently in the filter | ||
*/ | ||
get: function () { | ||
return this._length; | ||
}, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
Object.defineProperty(PartitionedBloomFilter.prototype, "loadFactor", { | ||
@@ -220,5 +208,4 @@ /** | ||
for (var i = 0; i < this._nbHashes; i++) { | ||
this._filter[i][indexes[i]] = 1; | ||
this._filter[i].add(indexes[i]); | ||
} | ||
this._length++; | ||
}; | ||
@@ -240,3 +227,3 @@ /** | ||
for (var i = 0; i < this._nbHashes; i++) { | ||
if (!this._filter[i][indexes[i]]) { | ||
if (!this._filter[i].has(indexes[i])) { | ||
return false; | ||
@@ -270,3 +257,2 @@ } | ||
this._nbHashes !== other._nbHashes || | ||
this._length !== other._length || | ||
this._loadFactor !== other._loadFactor) { | ||
@@ -276,3 +262,3 @@ return false; | ||
return this._filter.every(function (array, outerIndex) { | ||
return other._filter[outerIndex].every(function (item, innerIndex) { return array[innerIndex] === item; }); | ||
return other._filter[outerIndex].equals(array); | ||
}); | ||
@@ -286,3 +272,3 @@ }; | ||
var values = this._filter.map(function (bucket) { | ||
return bucket.reduce(function (a, b) { return a + b; }, 0); | ||
return bucket.bitCount(); | ||
}); | ||
@@ -310,3 +296,19 @@ var used = values.reduce(function (a, b) { return a + b; }, 0); | ||
__decorate([ | ||
(0, exportable_1.Field)(), | ||
(0, exportable_1.Field)(function (sets) { return sets.map(function (s) { return s.export(); }); }, function (array) { | ||
return array.map(function (data) { | ||
// create the bitset from new and old array-based exported structure | ||
if (Array.isArray(data)) { | ||
var bs_1 = new bit_set_1.default(data.length); | ||
data.forEach(function (val, index) { | ||
if (val !== 0) { | ||
bs_1.add(index); | ||
} | ||
}); | ||
return bs_1; | ||
} | ||
else { | ||
return bit_set_1.default.import(data); | ||
} | ||
}); | ||
}), | ||
__metadata("design:type", Array) | ||
@@ -318,6 +320,2 @@ ], PartitionedBloomFilter.prototype, "_filter", void 0); | ||
], PartitionedBloomFilter.prototype, "_capacity", void 0); | ||
__decorate([ | ||
(0, exportable_1.Field)(), | ||
__metadata("design:type", Number) | ||
], PartitionedBloomFilter.prototype, "_length", void 0); | ||
PartitionedBloomFilter = PartitionedBloomFilter_1 = __decorate([ | ||
@@ -324,0 +322,0 @@ (0, exportable_1.AutoExportable)('PartitionedBloomFilter', ['_seed']), |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : bucket.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
@@ -27,0 +27,0 @@ if (k2 === undefined) k2 = k; |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : cuckoo-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -27,0 +27,0 @@ var extendStatics = function (d, b) { |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : exportable.ts | ||
@@ -26,3 +27,2 @@ MIT License | ||
/* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-return, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-argument */ | ||
'use strict'; | ||
var __read = (this && this.__read) || function (o, n) { | ||
@@ -29,0 +29,0 @@ var m = typeof Symbol === "function" && o[Symbol.iterator]; |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : formulas.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
@@ -27,0 +27,0 @@ exports.optimalHashes = exports.optimalFilterSize = void 0; |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file: cell.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -27,0 +27,0 @@ var extendStatics = function (d, b) { |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : invertible-bloom-lookup-tables.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -27,0 +27,0 @@ var extendStatics = function (d, b) { |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file: classic-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
Object.defineProperty(exports, "__esModule", { value: true }); |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file: counting-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
Object.defineProperty(exports, "__esModule", { value: true }); |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file: writable-filter.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
Object.defineProperty(exports, "__esModule", { value: true }); |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : count-min-sketch.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __extends = (this && this.__extends) || (function () { | ||
@@ -27,0 +27,0 @@ var extendStatics = function (d, b) { |
@@ -52,27 +52,2 @@ "use strict"; | ||
}; | ||
var __read = (this && this.__read) || function (o, n) { | ||
var m = typeof Symbol === "function" && o[Symbol.iterator]; | ||
if (!m) return o; | ||
var i = m.call(o), r, ar = [], e; | ||
try { | ||
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value); | ||
} | ||
catch (error) { e = { error: error }; } | ||
finally { | ||
try { | ||
if (r && !r.done && (m = i["return"])) m.call(i); | ||
} | ||
finally { if (e) throw e.error; } | ||
} | ||
return ar; | ||
}; | ||
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { | ||
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { | ||
if (ar || !(i in from)) { | ||
if (!ar) ar = Array.prototype.slice.call(from, 0, i); | ||
ar[i] = from[i]; | ||
} | ||
} | ||
return to.concat(ar || Array.prototype.slice.call(from)); | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
@@ -86,3 +61,2 @@ return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
var utils_1 = require("../utils"); | ||
var lodash_1 = require("lodash"); | ||
/** | ||
@@ -154,3 +128,4 @@ * An error thrown when we try to compute the Jaccard Similarity with an empty MinHash | ||
for (var i = 0; i < this._nbHashes; i++) { | ||
this._signature[i] = Math.min(this._signature[i], applyHashFunction(value, this._hashFunctions[i])); | ||
var hash = applyHashFunction(value, this._hashFunctions[i]); | ||
this._signature[i] = Math.min(this._signature[i], hash); | ||
} | ||
@@ -168,3 +143,12 @@ }; | ||
}); | ||
this_1._signature[i] = Math.min.apply(Math, __spreadArray([this_1._signature[i]], __read(candidateSignatures), false)); | ||
// get the minimum of the candidate Signatures | ||
// dont supply too much parameters to Math.min or Math.max with risk of getting stack error | ||
// so we compute an iterative minimum | ||
var min = candidateSignatures[0]; | ||
for (var i_1 = 1; i_1 < candidateSignatures.length; i_1++) { | ||
if (min > candidateSignatures[i_1]) { | ||
min = candidateSignatures[i_1]; | ||
} | ||
} | ||
this_1._signature[i] = Math.min(this_1._signature[i], min); | ||
}; | ||
@@ -185,3 +169,11 @@ var this_1 = this; | ||
} | ||
return ((0, lodash_1.intersection)(this._signature, other._signature).length / this._nbHashes); | ||
// fix: we need to check for the number of equal signatures, not uniq equal signatures | ||
// lodash intersection ends with a uniq set of values | ||
var count = 0; | ||
for (var i = 0; i < this._nbHashes; i++) { | ||
if (this._signature[i] === other._signature[i]) { | ||
count++; | ||
} | ||
} | ||
return count / this._nbHashes; | ||
}; | ||
@@ -188,0 +180,0 @@ __decorate([ |
@@ -0,1 +1,2 @@ | ||
"use strict"; | ||
/* file : utils.ts | ||
@@ -24,3 +25,2 @@ MIT License | ||
*/ | ||
'use strict'; | ||
var __values = (this && this.__values) || function(o) { | ||
@@ -27,0 +27,0 @@ var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0; |
{ | ||
"name": "bloom-filters", | ||
"version": "2.0.0", | ||
"version": "3.0.0", | ||
"description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash", | ||
@@ -26,8 +26,14 @@ "main": "dist/api.js", | ||
"bloom filter", | ||
"invertible bloom filter", | ||
"probabilistic", | ||
"datastructure", | ||
"partitionned bloom filter", | ||
"scalable bloom filter", | ||
"counting bloom filter", | ||
"invertible bloom filter", | ||
"count min sketch", | ||
"cuckoo", | ||
"xor-filter" | ||
"xor", | ||
"minhash", | ||
"top-k", | ||
"hyperloglog" | ||
], | ||
@@ -34,0 +40,0 @@ "author": "Thomas Minier <thomas.minier@protonmail.com>", |
124
README.md
@@ -10,7 +10,8 @@ # Bloom-Filters [![Master](https://github.com/callidon/bloom-filters/actions/workflows/npm_test_doc.yml/badge.svg)](https://github.com/Callidon/bloom-filters/actions) | ||
❗️**Compatibility**❗️ | ||
* Be carefull when migrating from a version to another. | ||
* Bug fixes were introduced in `1.3.7` and from `1.3.9` to `2.0.0` for hashing and indexing data. Then, you **must re-build completely your filters from start** to be compatible with the new versions. | ||
* To keep the `breaking changes` rule of npm versions we will make now new `majored versions` since 1.3.9 whenever a modification is done on the hashing/indexing system or breaks the current API. | ||
❗️**Compatibility**❗️ | ||
- Be carefull when migrating from a version to another. | ||
- Bug fixes were introduced in `1.3.7` and from `1.3.9` to `2.0.0+` for hashing and indexing data. Then, you **must re-build completely your filters from start** to be compatible with the new versions. | ||
- To keep the `breaking changes` rule of npm versions we will make now new `majored versions` since 1.3.9 whenever a modification is done on the hashing/indexing system or breaks the current API. | ||
# Table of contents | ||
@@ -22,2 +23,3 @@ | ||
- [Partitioned Bloom Filter](#partitioned-bloom-filter) | ||
- [Scalable Bloom Filter](#scalable-bloom-filter) | ||
- [Cuckoo Filter](#cuckoo-filter) | ||
@@ -64,4 +66,4 @@ - [Counting Bloom Filter](#counting-bloom-filter) | ||
- `add(element: string) -> void`: add an element into the filter. | ||
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `add(element: HashableInput) -> void`: add an element into the filter. | ||
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `equals(other: BloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -109,4 +111,4 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
- `add(element: string) -> void`: add an element into the filter. | ||
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `add(element: HashableInput) -> void`: add an element into the filter. | ||
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `equals(other: PartitionedBloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -141,2 +143,37 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
### Scalable Bloom Filter | ||
A Scalable Bloom Filter is a variant of Bloom Filters that can adapt dynamically to the | ||
number of elements stored, while assuring a maximum false positive probability | ||
**Reference:** ALMEIDA, Paulo Sérgio, BAQUERO, Carlos, PREGUIÇA, Nuno, et al. Scalable bloom filters. Information Processing Letters, 2007, vol. 101, no 6, p. 255-261. | ||
([Full text article](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.725.390&rep=rep1&type=pdf)) | ||
This filter use internally [Paritionned Bloom Filters](#partitioned-bloom-filter). | ||
#### Methods | ||
- `add(element: HashableInput) -> void`: add an element into the filter. | ||
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `equals(other: ScalableBloomFilter) -> boolean`: Test if two filters are equals. | ||
- `capacity():number` -> return the total capacity of this filter | ||
- `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
```javascript | ||
const {ScalableBloomFilter} = require('bloom-filters') | ||
// by default it creates an ideally scalable bloom filter for 8 elements with an error rate of 0.01 and a load factor of 0.5 | ||
const filter = new ScalableBloomFilter() | ||
filter.add('alice') | ||
filter.add('bob') | ||
filter.add('carl') | ||
for (let i = 0; i < 10000; i++) { | ||
filter.add('elem:' + i) | ||
} | ||
filter.has('somethingwrong') // false | ||
filter.capacity() // total capacity | ||
filter.rate() // current rate of the current internal filter used | ||
``` | ||
### Cuckoo Filter | ||
@@ -151,5 +188,5 @@ | ||
- `add(element: string) -> void`: add an element into the filter. | ||
- `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `add(element: HashableInput) -> void`: add an element into the filter. | ||
- `remove(element: HashableInput) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `equals(other: CuckooFilter) -> boolean`: Test if two filters are equals. | ||
@@ -193,5 +230,5 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
- `add(element: string) -> void`: add an element into the filter. | ||
- `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `add(element: HashableInput) -> void`: add an element into the filter. | ||
- `remove(element: HashableInput) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
- `equals(other: CountingBloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -241,4 +278,4 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
- `update(element: string, count = 1) -> void`: add `count` occurences of an element into the sketch. | ||
- `count(element: string) -> number`: estimate the number of occurences of an element. | ||
- `update(element: HashableInput, count = 1) -> void`: add `count` occurences of an element into the sketch. | ||
- `count(element: HashableInput) -> number`: estimate the number of occurences of an element. | ||
- `merge(other: CountMinSketch) -> CountMinSketch`: merge occurences of two sketches. | ||
@@ -285,3 +322,3 @@ - `equals(other: CountMinSketch) -> boolean`: Test if two sketchs are equals. | ||
- `update(element: string) -> void`: add a new occurence of an element to the sketch. | ||
- `update(element: HashableInput) -> void`: add a new occurence of an element to the sketch. | ||
- `count() -> number`: estimate the number of distinct elements in the sketch. | ||
@@ -480,3 +517,3 @@ - `merge(other: HyperLogLog) -> HyperLogLog`: merge occurences of two sketches. | ||
A XOR Filter is a better space-efficient probabilistic data structure than Bloom Filters. | ||
A XOR Filter is a better space-efficient probabilistic data structure than Bloom Filters. | ||
Very usefull for space efficiency of readonly sets. | ||
@@ -487,4 +524,2 @@ | ||
#### Methods | ||
@@ -496,5 +531,6 @@ | ||
--- | ||
* Extended input types: `type XorHashableInput = HashableInput | Long` | ||
* We use Buffers internally which are exported/imported to/from `base64` strings. | ||
- Extended input types: `type XorHashableInput = HashableInput | Long` | ||
- We use Buffers internally which are exported/imported to/from `base64` strings. | ||
```javascript | ||
@@ -516,3 +552,2 @@ const {XorFilter} = require('bloom-filters') | ||
## Export and import | ||
@@ -557,2 +592,3 @@ | ||
In the case you want to use your own hash functions, you can use your own Hashing class by extending the default one. Example: | ||
```js | ||
@@ -572,2 +608,3 @@ const {BloomFilter, Hashing} = require('bloom-filters') | ||
``` | ||
See `test/utils-test.js` "_Use different hash functions_" describe close. | ||
@@ -581,13 +618,12 @@ | ||
* Tests are performed using [mocha](https://github.com/mochajs/mocha) and [nyc](https://github.com/istanbuljs/nyc) (code coverage) on node 12.x, 14.x, 15.x and 16.x for the moment. | ||
* Linting and formatting are made using `prettier` and `eslint` | ||
- Tests are performed using [mocha](https://github.com/mochajs/mocha) and [nyc](https://github.com/istanbuljs/nyc) (code coverage) on node 12.x, 14.x, 15.x and 16.x for the moment. | ||
- Linting and formatting are made using `prettier` and `eslint` | ||
When submitting pull requests please follow the following guidance: | ||
* please, PRs on the develop branch. PRs on the master will be refused without comment. | ||
* add tests when possible in `./test/*-test.js` or by creating `*-test.js` files. | ||
* functions, methods, variables and types must be documented using typedoc annotations | ||
* run `yarn lint` to see linting errors | ||
* run `yarn fix` to fix errors that can be auto-fixed by gts. | ||
* run `yarn test` (build, lint and run the mocha tests suite with code coverage) | ||
- Please open pull requests on the develop branch. **Direct contributions to the master branch will be refused without comments** | ||
- Add tests when possible in the `test` folder. | ||
- Functions, methods, variables and types must be documented using typedoc annotations | ||
- Run `yarn test` (build, lint and run the mocha tests suite) | ||
## References | ||
@@ -603,17 +639,19 @@ | ||
- [Invertible Bloom Filters](http://www.sysnet.ucsd.edu/sysnet/miscpapers/EppGooUye-SIGCOMM-11.pdf): Eppstein, D., Goodrich, M. T., Uyeda, F., & Varghese, G. (2011). _What's the difference?: efficient set reconciliation without prior context._ ACM SIGCOMM Computer Communication Review, 41(4), 218-229. | ||
- [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258) Thomas Mueller Graf, Daniel Lemire, Journal of Experimental Algorithmics 25 (1), 2020. DOI: 10.1145/3376122 | ||
- [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258) Thomas Mueller Graf, Daniel Lemire, Journal of Experimental Algorithmics 25 (1), 2020. DOI: 10.1145/3376122 | ||
- [Scalable Bloom Filters](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.725.390&rep=rep1&type=pdf) ALMEIDA, Paulo Sérgio, BAQUERO, Carlos, PREGUIÇA, Nuno, et al. _Scalable bloom filters_. Information Processing Letters, 2007, vol. 101, no 6, p. 255-261. | ||
## Changelog | ||
| **Version** | **Release date** | **Major changes** | | ||
| ----------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||
| `v2.0.0` | 02/2022 | - Use correctly double hashing [#issue43](https://github.com/Callidon/bloom-filters/issues/43). <br/> - Move all hashing related functions to its specific Hash class in a component of the BaseFilter class. It also allows for overriding the serizalize function for using custom hash functions <br/> - Add [#PR44](https://github.com/Callidon/bloom-filters/pull/44) optimizing the BloomFilter internal storage with Uint arrays. <br/> - Disable 10.x, 15.x node tests. <br/> - Add XorFilter [#29](https://github.com/Callidon/bloom-filters/issues/29) <br/> - Add `.nextInt32()` function to get a new random seeded int 32-bits from the current seed. <br/> - Make all properties public for allowing developpers to override everything. | | ||
| `v1.3.0` | 10/04/2020 | Added the MinHash set | | ||
| `v1.2.0` | 08/04/2020 | Add the TopK class | | ||
| `v1.1.0` | 03/04/2020 | Add the HyperLogLog sketch | | ||
| `v1.0.0` | 23/03/2020 | Rework the whole library using TypeScript, unify the API and fix the documentation | | ||
| `v0.8.0` | 11/11/2019 | Fix some issues with the cuckoo filter (performances). Fix the global API. It allows now to customize each Filter. If you want to use the old API, use the `.create()` or `.from()` functions to match the old api. | | ||
| `v0.7.1` | 11/09/2019 | Add the Counting Bloom Filter | | ||
| `v0.7.0` | 01/07/2019 | Move to [XXHASH](https://cyan4973.github.io/xxHash/) for hashing elements in the library. One property has been added into the exported json `_seed` which is used to seed every hash of every elements. Update Invertible Bloom Filters with #add, #has, #delete, #listEntries, #substract, #Static.decode methods. Updated the way to get distinct indices which could have collisions in many cases. | | ||
| `v0.6.1` | 18/06/2019 | Add Invertible Bloom Filters (only #encode/#substract/#Static.decode methods) | | ||
| **Version** | **Release date** | **Major changes** | | ||
| ----------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||
| `v2.1.0` | 03/2022 | - Add Scalable Bloom filters <br/> - Use array of BitSet for Partitionned Bloom Filter <br/> - Fix wrong MinHash comparison | | ||
| `v2.0.0` | 02/2022 | - Use correctly double hashing [#issue43](https://github.com/Callidon/bloom-filters/issues/43). <br/> - Move all hashing related functions to its specific Hash class in a component of the BaseFilter class. It also allows for overriding the serizalize function for using custom hash functions <br/> - Add [#PR44](https://github.com/Callidon/bloom-filters/pull/44) optimizing the BloomFilter internal storage with Uint arrays. <br/> - Disable 10.x, 15.x node tests. <br/> - Add XorFilter [#29](https://github.com/Callidon/bloom-filters/issues/29) <br/> - Add `.nextInt32()` function to get a new random seeded int 32-bits from the current seed. <br/> - Make all properties public for allowing developpers to override everything. | | ||
| `v1.3.0` | 10/04/2020 | Added the MinHash set | | ||
| `v1.2.0` | 08/04/2020 | Add the TopK class | | ||
| `v1.1.0` | 03/04/2020 | Add the HyperLogLog sketch | | ||
| `v1.0.0` | 23/03/2020 | Rework the whole library using TypeScript, unify the API and fix the documentation | | ||
| `v0.8.0` | 11/11/2019 | Fix some issues with the cuckoo filter (performances). Fix the global API. It allows now to customize each Filter. If you want to use the old API, use the `.create()` or `.from()` functions to match the old api. | | ||
| `v0.7.1` | 11/09/2019 | Add the Counting Bloom Filter | | ||
| `v0.7.0` | 01/07/2019 | Move to [XXHASH](https://cyan4973.github.io/xxHash/) for hashing elements in the library. One property has been added into the exported json `_seed` which is used to seed every hash of every elements. Update Invertible Bloom Filters with #add, #has, #delete, #listEntries, #substract, #Static.decode methods. Updated the way to get distinct indices which could have collisions in many cases. | | ||
| `v0.6.1` | 18/06/2019 | Add Invertible Bloom Filters (only #encode/#substract/#Static.decode methods) | | ||
@@ -620,0 +658,0 @@ ## License |
@@ -25,4 +25,2 @@ /* file : api.ts | ||
'use strict' | ||
export {default as BaseFilter} from './base-filter' | ||
@@ -44,1 +42,2 @@ export {default as BloomFilter} from './bloom/bloom-filter' | ||
export {default as DeprecatedHashing} from './hashing/deprecated_hashing' | ||
export {default as ScalableBloomFilter} from './bloom/scalable-bloom-filter' |
@@ -25,4 +25,2 @@ /* file : base-filter.ts | ||
'use strict' | ||
import seedrandom from 'seedrandom' | ||
@@ -29,0 +27,0 @@ import Hashing from './hashing/hashing' |
@@ -25,4 +25,2 @@ /* file : bloom-filter.ts | ||
'use strict' | ||
import ClassicFilter from '../interfaces/classic-filter' | ||
@@ -58,5 +56,4 @@ import BaseFilter from '../base-filter' | ||
data => { | ||
// create the bitset from new and old array-based exported structure | ||
if (Array.isArray(data)) { | ||
// Create the bitset from new and old exported structure | ||
// create a new BitSet from the specified array | ||
const bs = new BitSet(data.length) | ||
@@ -63,0 +60,0 @@ data.forEach((val: number, index: number) => { |
@@ -25,4 +25,2 @@ /* file : counting-bloom-filter.ts | ||
'use strict' | ||
import BaseFilter from '../base-filter' | ||
@@ -29,0 +27,0 @@ import WritableFilter from '../interfaces/writable-filter' |
@@ -25,4 +25,2 @@ /* file : partitioned-bloom-filter.ts | ||
'use strict' | ||
import BaseFilter from '../base-filter' | ||
@@ -32,2 +30,3 @@ import ClassicFilter from '../interfaces/classic-filter' | ||
import {HashableInput, allocateArray} from '../utils' | ||
import BitSet from './bit-set' | ||
@@ -112,8 +111,23 @@ /** | ||
public _m: number | ||
@Field( | ||
(sets: BitSet[]) => sets.map(s => s.export()), | ||
(array: Array<{size: number; content: string} | number[]>) => | ||
array.map((data: {size: number; content: string} | number[]) => { | ||
// create the bitset from new and old array-based exported structure | ||
if (Array.isArray(data)) { | ||
const bs = new BitSet(data.length) | ||
data.forEach((val: number, index: number) => { | ||
if (val !== 0) { | ||
bs.add(index) | ||
} | ||
}) | ||
return bs | ||
} else { | ||
return BitSet.import(data as {size: number; content: string}) | ||
} | ||
}) | ||
) | ||
public _filter: Array<BitSet> | ||
@Field() | ||
public _filter: Array<Array<number>> | ||
@Field() | ||
public _capacity: number | ||
@Field() | ||
public _length: number | ||
/** | ||
@@ -137,5 +151,3 @@ * Constructor | ||
this._m = Math.ceil(this._size / this._nbHashes) | ||
this._filter = allocateArray(this._nbHashes, () => | ||
allocateArray(this._m, 0) | ||
) | ||
this._filter = allocateArray(this._nbHashes, () => new BitSet(this._m)) | ||
this._capacity = | ||
@@ -145,3 +157,2 @@ capacity !== undefined | ||
: computeNumberOfItems(this._size, loadFactor, nbHashes) | ||
this._length = 0 | ||
} | ||
@@ -207,9 +218,2 @@ | ||
/** | ||
* Get the number of elements currently in the filter | ||
*/ | ||
public get length(): number { | ||
return this._length | ||
} | ||
/** | ||
* Get the filter's load factor | ||
@@ -238,5 +242,4 @@ */ | ||
for (let i = 0; i < this._nbHashes; i++) { | ||
this._filter[i][indexes[i]] = 1 | ||
this._filter[i].add(indexes[i]) | ||
} | ||
this._length++ | ||
} | ||
@@ -264,3 +267,3 @@ | ||
for (let i = 0; i < this._nbHashes; i++) { | ||
if (!this._filter[i][indexes[i]]) { | ||
if (!this._filter[i].has(indexes[i])) { | ||
return false | ||
@@ -297,3 +300,2 @@ } | ||
this._nbHashes !== other._nbHashes || | ||
this._length !== other._length || | ||
this._loadFactor !== other._loadFactor | ||
@@ -304,5 +306,3 @@ ) { | ||
return this._filter.every((array, outerIndex) => | ||
other._filter[outerIndex].every( | ||
(item, innerIndex) => array[innerIndex] === item | ||
) | ||
other._filter[outerIndex].equals(array) | ||
) | ||
@@ -317,3 +317,3 @@ } | ||
const values = this._filter.map(bucket => { | ||
return bucket.reduce((a, b) => a + b, 0) | ||
return bucket.bitCount() | ||
}) | ||
@@ -320,0 +320,0 @@ const used = values.reduce((a, b) => a + b, 0) |
@@ -25,4 +25,2 @@ /* file : bucket.ts | ||
'use strict' | ||
import {eq, indexOf} from 'lodash' | ||
@@ -29,0 +27,0 @@ import * as utils from '../utils' |
@@ -25,4 +25,2 @@ /* file : cuckoo-filter.ts | ||
'use strict' | ||
import WritableFilter from '../interfaces/writable-filter' | ||
@@ -29,0 +27,0 @@ import BaseFilter from '../base-filter' |
@@ -27,4 +27,2 @@ /* file : exportable.ts | ||
'use strict' | ||
import 'reflect-metadata' | ||
@@ -31,0 +29,0 @@ |
@@ -25,4 +25,2 @@ /* file : formulas.ts | ||
'use strict' | ||
/** | ||
@@ -29,0 +27,0 @@ * Various formulas used with Bloom Filters |
@@ -25,4 +25,2 @@ /* file: cell.ts | ||
'use strict' | ||
import {xorBuffer} from '../utils' | ||
@@ -29,0 +27,0 @@ import {AutoExportable, Field, Parameter} from '../exportable' |
@@ -25,4 +25,2 @@ /* file : invertible-bloom-lookup-tables.ts | ||
'use strict' | ||
import BaseFilter from '../base-filter' | ||
@@ -29,0 +27,0 @@ import WritableFilter from '../interfaces/writable-filter' |
@@ -25,4 +25,2 @@ /* file: classic-filter.ts | ||
'use strict' | ||
/** | ||
@@ -29,0 +27,0 @@ * A classic probabilistic data-structure, which supports insertions of elements and membership queries. |
@@ -25,4 +25,2 @@ /* file: counting-filter.ts | ||
'use strict' | ||
/** | ||
@@ -29,0 +27,0 @@ * A filter that can count occurences of items and estimate their frequencies. |
@@ -25,4 +25,2 @@ /* file: writable-filter.ts | ||
'use strict' | ||
import ClassicFilter from './classic-filter' | ||
@@ -29,0 +27,0 @@ |
@@ -25,4 +25,2 @@ /* file : count-min-sketch.ts | ||
'use strict' | ||
import BaseFilter from '../base-filter' | ||
@@ -29,0 +27,0 @@ import CountingFilter from '../interfaces/counting-filter' |
@@ -28,3 +28,2 @@ /* file: min-hash.ts | ||
import {allocateArray} from '../utils' | ||
import {intersection} from 'lodash' | ||
@@ -113,6 +112,4 @@ /** | ||
for (let i = 0; i < this._nbHashes; i++) { | ||
this._signature[i] = Math.min( | ||
this._signature[i], | ||
applyHashFunction(value, this._hashFunctions[i]) | ||
) | ||
const hash = applyHashFunction(value, this._hashFunctions[i]) | ||
this._signature[i] = Math.min(this._signature[i], hash) | ||
} | ||
@@ -130,3 +127,12 @@ } | ||
) | ||
this._signature[i] = Math.min(this._signature[i], ...candidateSignatures) | ||
// get the minimum of the candidate Signatures | ||
// dont supply too much parameters to Math.min or Math.max with risk of getting stack error | ||
// so we compute an iterative minimum | ||
let min = candidateSignatures[0] | ||
for (let i = 1; i < candidateSignatures.length; i++) { | ||
if (min > candidateSignatures[i]) { | ||
min = candidateSignatures[i] | ||
} | ||
} | ||
this._signature[i] = Math.min(this._signature[i], min) | ||
} | ||
@@ -146,6 +152,12 @@ } | ||
} | ||
return ( | ||
intersection(this._signature, other._signature).length / this._nbHashes | ||
) | ||
// fix: we need to check for the number of equal signatures, not uniq equal signatures | ||
// lodash intersection ends with a uniq set of values | ||
let count = 0 | ||
for (let i = 0; i < this._nbHashes; i++) { | ||
if (this._signature[i] === other._signature[i]) { | ||
count++ | ||
} | ||
} | ||
return count / this._nbHashes | ||
} | ||
} |
@@ -25,4 +25,2 @@ /* file : utils.ts | ||
'use strict' | ||
/** | ||
@@ -29,0 +27,0 @@ * Utilitaries functions |
@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {BloomFilter} = require('../dist/api.js') |
@@ -25,4 +25,2 @@ /* file : bucket-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const Bucket = require('../dist/cuckoo/bucket.js').default |
@@ -25,4 +25,2 @@ /* | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {expect} = require('chai') |
@@ -25,4 +25,2 @@ /* file : count-min-sketch-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {CountMinSketch} = require('../dist/api.js') |
@@ -25,4 +25,2 @@ /* file : counting-bloom-filter-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {CountingBloomFilter} = require('../dist/api.js') |
@@ -25,4 +25,2 @@ /* file : cuckoo-filter-test.js | ||
'use strict' | ||
const chai = require('chai') | ||
@@ -29,0 +27,0 @@ chai.should() |
@@ -25,4 +25,2 @@ /* file : hyperloglog-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {HyperLogLog} = require('../dist/api.js') |
@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ require('chai').expect() |
@@ -25,4 +25,2 @@ /* file : min-hash-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -37,9 +35,27 @@ const {MinHashFactory, MinHash} = require('../dist/api.js') | ||
let factory, | ||
setA, | ||
setB, | ||
maxValue = 0, | ||
nbHashes | ||
try { | ||
const max = 10000 | ||
setA = range(1, max) | ||
setB = range(1, max).map(x => (x % 2 === 0 ? x : x * 2)) | ||
const allInOne = [...setA, ...setB] | ||
for (let i of allInOne) { | ||
if (maxValue < i) { | ||
maxValue = i | ||
} | ||
} | ||
nbHashes = 50 | ||
factory = new MinHashFactory(nbHashes, maxValue) | ||
} catch (error) { | ||
console.error(error) | ||
throw new Error( | ||
'An error occured when creating the min hash factory: ' + error | ||
) | ||
} | ||
describe('MinHash', () => { | ||
const setA = range(1, 500) | ||
const setB = range(1, 500).map(x => (x % 2 === 0 ? x : x * 2)) | ||
const maxValue = Math.max(...setA, ...setB) | ||
const nbHashes = 10 | ||
const factory = new MinHashFactory(nbHashes, maxValue) | ||
describe('#isEmpty', () => { | ||
@@ -46,0 +62,0 @@ it('should return True when the MinHash signeture is empty', () => { |
@@ -25,4 +25,2 @@ /* file : partitioned-bloom-filter-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -39,3 +37,2 @@ const {PartitionedBloomFilter} = require('../dist/api.js') | ||
filter.add('bob') | ||
filter.length.should.equal(2) | ||
}) | ||
@@ -49,3 +46,2 @@ | ||
filter.has('carl').should.equal(true) | ||
filter.length.should.equal(data.length) | ||
filter.rate().should.be.closeTo(targetRate, 0.1) | ||
@@ -142,5 +138,4 @@ }) | ||
exported._loadFactor.should.equal(filter._loadFactor) | ||
exported._m.should.equal(filter._m) | ||
exported._nbHashes.should.equal(filter._nbHashes) | ||
exported._filter.should.deep.equal(filter._filter) | ||
exported._filter.should.deep.equal(filter._filter.map(f => f.export())) | ||
}) | ||
@@ -147,0 +142,0 @@ |
@@ -25,4 +25,2 @@ /* file : topk-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {TopK} = require('../dist/api.js') |
@@ -25,4 +25,2 @@ /* file : utils-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const { |
@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js | ||
'use strict' | ||
require('chai').should() | ||
@@ -29,0 +27,0 @@ const {XorFilter} = require('../dist/api.js') |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
578595
100
13489
644