Socket
Socket
Sign inDemoInstall

bloom-filters

Package Overview
Dependencies
Maintainers
2
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bloom-filters - npm Package Compare versions

Comparing version 2.0.0 to 3.0.0

dist/bloom/scalable-bloom-filter.d.ts

1

dist/api.d.ts

@@ -17,1 +17,2 @@ export { default as BaseFilter } from './base-filter';

export { default as DeprecatedHashing } from './hashing/deprecated_hashing';
export { default as ScalableBloomFilter } from './bloom/scalable-bloom-filter';

6

dist/api.js

@@ -0,1 +1,2 @@

"use strict";
/* file : api.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -30,3 +30,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

Object.defineProperty(exports, "__esModule", { value: true });
exports.DeprecatedHashing = exports.Hashing = exports.Cell = exports.InvertibleBloomFilter = exports.CuckooFilter = exports.MinHashFactory = exports.MinHash = exports.TopK = exports.HyperLogLog = exports.CountMinSketch = exports.PartitionedBloomFilter = exports.CountingBloomFilter = exports.XorFilter = exports.BitSet = exports.BloomFilter = exports.BaseFilter = void 0;
exports.ScalableBloomFilter = exports.DeprecatedHashing = exports.Hashing = exports.Cell = exports.InvertibleBloomFilter = exports.CuckooFilter = exports.MinHashFactory = exports.MinHash = exports.TopK = exports.HyperLogLog = exports.CountMinSketch = exports.PartitionedBloomFilter = exports.CountingBloomFilter = exports.XorFilter = exports.BitSet = exports.BloomFilter = exports.BaseFilter = void 0;
var base_filter_1 = require("./base-filter");

@@ -64,1 +64,3 @@ Object.defineProperty(exports, "BaseFilter", { enumerable: true, get: function () { return __importDefault(base_filter_1).default; } });

Object.defineProperty(exports, "DeprecatedHashing", { enumerable: true, get: function () { return __importDefault(deprecated_hashing_1).default; } });
var scalable_bloom_filter_1 = require("./bloom/scalable-bloom-filter");
Object.defineProperty(exports, "ScalableBloomFilter", { enumerable: true, get: function () { return __importDefault(scalable_bloom_filter_1).default; } });

@@ -0,1 +1,2 @@

"use strict";
/* file : base-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -27,0 +27,0 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

@@ -0,1 +1,2 @@

"use strict";
/* file : bloom-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -212,5 +212,4 @@ var extendStatics = function (d, b) {

(0, exportable_1.Field)(function (f) { return f.export(); }, function (data) {
// create the bitset from new and old array-based exported structure
if (Array.isArray(data)) {
// Create the bitset from new and old exported structure
// create a new BitSet from the specified array
var bs_1 = new bit_set_1.default(data.length);

@@ -217,0 +216,0 @@ data.forEach(function (val, index) {

@@ -0,1 +1,2 @@

"use strict";
/* file : counting-bloom-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -27,0 +27,0 @@ var extendStatics = function (d, b) {

import BaseFilter from '../base-filter';
import ClassicFilter from '../interfaces/classic-filter';
import { HashableInput } from '../utils';
import BitSet from './bit-set';
/**

@@ -22,5 +23,4 @@ * A Partitioned Bloom Filter is a variation of a classic Bloom filter.

_m: number;
_filter: Array<Array<number>>;
_filter: Array<BitSet>;
_capacity: number;
_length: number;
/**

@@ -63,6 +63,2 @@ * Constructor

/**
* Get the number of elements currently in the filter
*/
get length(): number;
/**
* Get the filter's load factor

@@ -69,0 +65,0 @@ */

@@ -0,1 +1,2 @@

"use strict";
/* file : partitioned-bloom-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -60,2 +60,3 @@ var extendStatics = function (d, b) {

var utils_1 = require("../utils");
var bit_set_1 = __importDefault(require("./bit-set"));
/**

@@ -123,5 +124,3 @@ * Return the optimal number of hashes needed for a given error rate and load factor

_this._m = Math.ceil(_this._size / _this._nbHashes);
_this._filter = (0, utils_1.allocateArray)(_this._nbHashes, function () {
return (0, utils_1.allocateArray)(_this._m, 0);
});
_this._filter = (0, utils_1.allocateArray)(_this._nbHashes, function () { return new bit_set_1.default(_this._m); });
_this._capacity =

@@ -131,3 +130,2 @@ capacity !== undefined

: computeNumberOfItems(_this._size, loadFactor, nbHashes);
_this._length = 0;
return _this;

@@ -187,12 +185,2 @@ }

});
Object.defineProperty(PartitionedBloomFilter.prototype, "length", {
/**
* Get the number of elements currently in the filter
*/
get: function () {
return this._length;
},
enumerable: false,
configurable: true
});
Object.defineProperty(PartitionedBloomFilter.prototype, "loadFactor", {

@@ -220,5 +208,4 @@ /**

for (var i = 0; i < this._nbHashes; i++) {
this._filter[i][indexes[i]] = 1;
this._filter[i].add(indexes[i]);
}
this._length++;
};

@@ -240,3 +227,3 @@ /**

for (var i = 0; i < this._nbHashes; i++) {
if (!this._filter[i][indexes[i]]) {
if (!this._filter[i].has(indexes[i])) {
return false;

@@ -270,3 +257,2 @@ }

this._nbHashes !== other._nbHashes ||
this._length !== other._length ||
this._loadFactor !== other._loadFactor) {

@@ -276,3 +262,3 @@ return false;

return this._filter.every(function (array, outerIndex) {
return other._filter[outerIndex].every(function (item, innerIndex) { return array[innerIndex] === item; });
return other._filter[outerIndex].equals(array);
});

@@ -286,3 +272,3 @@ };

var values = this._filter.map(function (bucket) {
return bucket.reduce(function (a, b) { return a + b; }, 0);
return bucket.bitCount();
});

@@ -310,3 +296,19 @@ var used = values.reduce(function (a, b) { return a + b; }, 0);

__decorate([
(0, exportable_1.Field)(),
(0, exportable_1.Field)(function (sets) { return sets.map(function (s) { return s.export(); }); }, function (array) {
return array.map(function (data) {
// create the bitset from new and old array-based exported structure
if (Array.isArray(data)) {
var bs_1 = new bit_set_1.default(data.length);
data.forEach(function (val, index) {
if (val !== 0) {
bs_1.add(index);
}
});
return bs_1;
}
else {
return bit_set_1.default.import(data);
}
});
}),
__metadata("design:type", Array)

@@ -318,6 +320,2 @@ ], PartitionedBloomFilter.prototype, "_filter", void 0);

], PartitionedBloomFilter.prototype, "_capacity", void 0);
__decorate([
(0, exportable_1.Field)(),
__metadata("design:type", Number)
], PartitionedBloomFilter.prototype, "_length", void 0);
PartitionedBloomFilter = PartitionedBloomFilter_1 = __decorate([

@@ -324,0 +322,0 @@ (0, exportable_1.AutoExportable)('PartitionedBloomFilter', ['_seed']),

@@ -0,1 +1,2 @@

"use strict";
/* file : bucket.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {

@@ -27,0 +27,0 @@ if (k2 === undefined) k2 = k;

@@ -0,1 +1,2 @@

"use strict";
/* file : cuckoo-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -27,0 +27,0 @@ var extendStatics = function (d, b) {

@@ -0,1 +1,2 @@

"use strict";
/* file : exportable.ts

@@ -26,3 +27,2 @@ MIT License

/* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-return, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-argument */
'use strict';
var __read = (this && this.__read) || function (o, n) {

@@ -29,0 +29,0 @@ var m = typeof Symbol === "function" && o[Symbol.iterator];

@@ -0,1 +1,2 @@

"use strict";
/* file : formulas.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
Object.defineProperty(exports, "__esModule", { value: true });

@@ -27,0 +27,0 @@ exports.optimalHashes = exports.optimalFilterSize = void 0;

@@ -0,1 +1,2 @@

"use strict";
/* file: cell.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -27,0 +27,0 @@ var extendStatics = function (d, b) {

@@ -0,1 +1,2 @@

"use strict";
/* file : invertible-bloom-lookup-tables.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -27,0 +27,0 @@ var extendStatics = function (d, b) {

@@ -0,1 +1,2 @@

"use strict";
/* file: classic-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
Object.defineProperty(exports, "__esModule", { value: true });

@@ -0,1 +1,2 @@

"use strict";
/* file: counting-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
Object.defineProperty(exports, "__esModule", { value: true });

@@ -0,1 +1,2 @@

"use strict";
/* file: writable-filter.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
Object.defineProperty(exports, "__esModule", { value: true });

@@ -0,1 +1,2 @@

"use strict";
/* file : count-min-sketch.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __extends = (this && this.__extends) || (function () {

@@ -27,0 +27,0 @@ var extendStatics = function (d, b) {

@@ -52,27 +52,2 @@ "use strict";

};
var __read = (this && this.__read) || function (o, n) {
var m = typeof Symbol === "function" && o[Symbol.iterator];
if (!m) return o;
var i = m.call(o), r, ar = [], e;
try {
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
}
catch (error) { e = { error: error }; }
finally {
try {
if (r && !r.done && (m = i["return"])) m.call(i);
}
finally { if (e) throw e.error; }
}
return ar;
};
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -86,3 +61,2 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

var utils_1 = require("../utils");
var lodash_1 = require("lodash");
/**

@@ -154,3 +128,4 @@ * An error thrown when we try to compute the Jaccard Similarity with an empty MinHash

for (var i = 0; i < this._nbHashes; i++) {
this._signature[i] = Math.min(this._signature[i], applyHashFunction(value, this._hashFunctions[i]));
var hash = applyHashFunction(value, this._hashFunctions[i]);
this._signature[i] = Math.min(this._signature[i], hash);
}

@@ -168,3 +143,12 @@ };

});
this_1._signature[i] = Math.min.apply(Math, __spreadArray([this_1._signature[i]], __read(candidateSignatures), false));
// get the minimum of the candidate Signatures
// dont supply too much parameters to Math.min or Math.max with risk of getting stack error
// so we compute an iterative minimum
var min = candidateSignatures[0];
for (var i_1 = 1; i_1 < candidateSignatures.length; i_1++) {
if (min > candidateSignatures[i_1]) {
min = candidateSignatures[i_1];
}
}
this_1._signature[i] = Math.min(this_1._signature[i], min);
};

@@ -185,3 +169,11 @@ var this_1 = this;

}
return ((0, lodash_1.intersection)(this._signature, other._signature).length / this._nbHashes);
// fix: we need to check for the number of equal signatures, not uniq equal signatures
// lodash intersection ends with a uniq set of values
var count = 0;
for (var i = 0; i < this._nbHashes; i++) {
if (this._signature[i] === other._signature[i]) {
count++;
}
}
return count / this._nbHashes;
};

@@ -188,0 +180,0 @@ __decorate([

@@ -0,1 +1,2 @@

"use strict";
/* file : utils.ts

@@ -24,3 +25,2 @@ MIT License

*/
'use strict';
var __values = (this && this.__values) || function(o) {

@@ -27,0 +27,0 @@ var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;

{
"name": "bloom-filters",
"version": "2.0.0",
"version": "3.0.0",
"description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash",

@@ -26,8 +26,14 @@ "main": "dist/api.js",

"bloom filter",
"invertible bloom filter",
"probabilistic",
"datastructure",
"partitionned bloom filter",
"scalable bloom filter",
"counting bloom filter",
"invertible bloom filter",
"count min sketch",
"cuckoo",
"xor-filter"
"xor",
"minhash",
"top-k",
"hyperloglog"
],

@@ -34,0 +40,0 @@ "author": "Thomas Minier <thomas.minier@protonmail.com>",

@@ -10,7 +10,8 @@ # Bloom-Filters [![Master](https://github.com/callidon/bloom-filters/actions/workflows/npm_test_doc.yml/badge.svg)](https://github.com/Callidon/bloom-filters/actions)

❗️**Compatibility**❗️
* Be carefull when migrating from a version to another.
* Bug fixes were introduced in `1.3.7` and from `1.3.9` to `2.0.0` for hashing and indexing data. Then, you **must re-build completely your filters from start** to be compatible with the new versions.
* To keep the `breaking changes` rule of npm versions we will make now new `majored versions` since 1.3.9 whenever a modification is done on the hashing/indexing system or breaks the current API.
❗️**Compatibility**❗️
- Be carefull when migrating from a version to another.
- Bug fixes were introduced in `1.3.7` and from `1.3.9` to `2.0.0+` for hashing and indexing data. Then, you **must re-build completely your filters from start** to be compatible with the new versions.
- To keep the `breaking changes` rule of npm versions we will make now new `majored versions` since 1.3.9 whenever a modification is done on the hashing/indexing system or breaks the current API.
# Table of contents

@@ -22,2 +23,3 @@

- [Partitioned Bloom Filter](#partitioned-bloom-filter)
- [Scalable Bloom Filter](#scalable-bloom-filter)
- [Cuckoo Filter](#cuckoo-filter)

@@ -64,4 +66,4 @@ - [Counting Bloom Filter](#counting-bloom-filter)

- `add(element: string) -> void`: add an element into the filter.
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `add(element: HashableInput) -> void`: add an element into the filter.
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `equals(other: BloomFilter) -> boolean`: Test if two filters are equals.

@@ -109,4 +111,4 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate).

- `add(element: string) -> void`: add an element into the filter.
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `add(element: HashableInput) -> void`: add an element into the filter.
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `equals(other: PartitionedBloomFilter) -> boolean`: Test if two filters are equals.

@@ -141,2 +143,37 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate).

### Scalable Bloom Filter
A Scalable Bloom Filter is a variant of Bloom Filters that can adapt dynamically to the
number of elements stored, while assuring a maximum false positive probability
**Reference:** ALMEIDA, Paulo Sérgio, BAQUERO, Carlos, PREGUIÇA, Nuno, et al. Scalable bloom filters. Information Processing Letters, 2007, vol. 101, no 6, p. 255-261.
([Full text article](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.725.390&rep=rep1&type=pdf))
This filter use internally [Paritionned Bloom Filters](#partitioned-bloom-filter).
#### Methods
- `add(element: HashableInput) -> void`: add an element into the filter.
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `equals(other: ScalableBloomFilter) -> boolean`: Test if two filters are equals.
- `capacity():number` -> return the total capacity of this filter
- `rate() -> number`: compute the filter's false positive rate (or error rate).
```javascript
const {ScalableBloomFilter} = require('bloom-filters')
// by default it creates an ideally scalable bloom filter for 8 elements with an error rate of 0.01 and a load factor of 0.5
const filter = new ScalableBloomFilter()
filter.add('alice')
filter.add('bob')
filter.add('carl')
for (let i = 0; i < 10000; i++) {
filter.add('elem:' + i)
}
filter.has('somethingwrong') // false
filter.capacity() // total capacity
filter.rate() // current rate of the current internal filter used
```
### Cuckoo Filter

@@ -151,5 +188,5 @@

- `add(element: string) -> void`: add an element into the filter.
- `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise.
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `add(element: HashableInput) -> void`: add an element into the filter.
- `remove(element: HashableInput) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise.
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `equals(other: CuckooFilter) -> boolean`: Test if two filters are equals.

@@ -193,5 +230,5 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate).

- `add(element: string) -> void`: add an element into the filter.
- `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise.
- `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `add(element: HashableInput) -> void`: add an element into the filter.
- `remove(element: HashableInput) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise.
- `has(element: HashableInput) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter.
- `equals(other: CountingBloomFilter) -> boolean`: Test if two filters are equals.

@@ -241,4 +278,4 @@ - `rate() -> number`: compute the filter's false positive rate (or error rate).

- `update(element: string, count = 1) -> void`: add `count` occurences of an element into the sketch.
- `count(element: string) -> number`: estimate the number of occurences of an element.
- `update(element: HashableInput, count = 1) -> void`: add `count` occurences of an element into the sketch.
- `count(element: HashableInput) -> number`: estimate the number of occurences of an element.
- `merge(other: CountMinSketch) -> CountMinSketch`: merge occurences of two sketches.

@@ -285,3 +322,3 @@ - `equals(other: CountMinSketch) -> boolean`: Test if two sketchs are equals.

- `update(element: string) -> void`: add a new occurence of an element to the sketch.
- `update(element: HashableInput) -> void`: add a new occurence of an element to the sketch.
- `count() -> number`: estimate the number of distinct elements in the sketch.

@@ -480,3 +517,3 @@ - `merge(other: HyperLogLog) -> HyperLogLog`: merge occurences of two sketches.

A XOR Filter is a better space-efficient probabilistic data structure than Bloom Filters.
A XOR Filter is a better space-efficient probabilistic data structure than Bloom Filters.
Very usefull for space efficiency of readonly sets.

@@ -487,4 +524,2 @@

#### Methods

@@ -496,5 +531,6 @@

---
* Extended input types: `type XorHashableInput = HashableInput | Long`
* We use Buffers internally which are exported/imported to/from `base64` strings.
- Extended input types: `type XorHashableInput = HashableInput | Long`
- We use Buffers internally which are exported/imported to/from `base64` strings.
```javascript

@@ -516,3 +552,2 @@ const {XorFilter} = require('bloom-filters')

## Export and import

@@ -557,2 +592,3 @@

In the case you want to use your own hash functions, you can use your own Hashing class by extending the default one. Example:
```js

@@ -572,2 +608,3 @@ const {BloomFilter, Hashing} = require('bloom-filters')

```
See `test/utils-test.js` "_Use different hash functions_" describe close.

@@ -581,13 +618,12 @@

* Tests are performed using [mocha](https://github.com/mochajs/mocha) and [nyc](https://github.com/istanbuljs/nyc) (code coverage) on node 12.x, 14.x, 15.x and 16.x for the moment.
* Linting and formatting are made using `prettier` and `eslint`
- Tests are performed using [mocha](https://github.com/mochajs/mocha) and [nyc](https://github.com/istanbuljs/nyc) (code coverage) on node 12.x, 14.x, 15.x and 16.x for the moment.
- Linting and formatting are made using `prettier` and `eslint`
When submitting pull requests please follow the following guidance:
* please, PRs on the develop branch. PRs on the master will be refused without comment.
* add tests when possible in `./test/*-test.js` or by creating `*-test.js` files.
* functions, methods, variables and types must be documented using typedoc annotations
* run `yarn lint` to see linting errors
* run `yarn fix` to fix errors that can be auto-fixed by gts.
* run `yarn test` (build, lint and run the mocha tests suite with code coverage)
- Please open pull requests on the develop branch. **Direct contributions to the master branch will be refused without comments**
- Add tests when possible in the `test` folder.
- Functions, methods, variables and types must be documented using typedoc annotations
- Run `yarn test` (build, lint and run the mocha tests suite)
## References

@@ -603,17 +639,19 @@

- [Invertible Bloom Filters](http://www.sysnet.ucsd.edu/sysnet/miscpapers/EppGooUye-SIGCOMM-11.pdf): Eppstein, D., Goodrich, M. T., Uyeda, F., & Varghese, G. (2011). _What's the difference?: efficient set reconciliation without prior context._ ACM SIGCOMM Computer Communication Review, 41(4), 218-229.
- [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258) Thomas Mueller Graf, Daniel Lemire, Journal of Experimental Algorithmics 25 (1), 2020. DOI: 10.1145/3376122
- [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258) Thomas Mueller Graf, Daniel Lemire, Journal of Experimental Algorithmics 25 (1), 2020. DOI: 10.1145/3376122
- [Scalable Bloom Filters](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.725.390&rep=rep1&type=pdf) ALMEIDA, Paulo Sérgio, BAQUERO, Carlos, PREGUIÇA, Nuno, et al. _Scalable bloom filters_. Information Processing Letters, 2007, vol. 101, no 6, p. 255-261.
## Changelog
| **Version** | **Release date** | **Major changes** |
| ----------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `v2.0.0` | 02/2022 | - Use correctly double hashing [#issue43](https://github.com/Callidon/bloom-filters/issues/43). <br/> - Move all hashing related functions to its specific Hash class in a component of the BaseFilter class. It also allows for overriding the serizalize function for using custom hash functions <br/> - Add [#PR44](https://github.com/Callidon/bloom-filters/pull/44) optimizing the BloomFilter internal storage with Uint arrays. <br/> - Disable 10.x, 15.x node tests. <br/> - Add XorFilter [#29](https://github.com/Callidon/bloom-filters/issues/29) <br/> - Add `.nextInt32()` function to get a new random seeded int 32-bits from the current seed. <br/> - Make all properties public for allowing developpers to override everything. |
| `v1.3.0` | 10/04/2020 | Added the MinHash set |
| `v1.2.0` | 08/04/2020 | Add the TopK class |
| `v1.1.0` | 03/04/2020 | Add the HyperLogLog sketch |
| `v1.0.0` | 23/03/2020 | Rework the whole library using TypeScript, unify the API and fix the documentation |
| `v0.8.0` | 11/11/2019 | Fix some issues with the cuckoo filter (performances). Fix the global API. It allows now to customize each Filter. If you want to use the old API, use the `.create()` or `.from()` functions to match the old api. |
| `v0.7.1` | 11/09/2019 | Add the Counting Bloom Filter |
| `v0.7.0` | 01/07/2019 | Move to [XXHASH](https://cyan4973.github.io/xxHash/) for hashing elements in the library. One property has been added into the exported json `_seed` which is used to seed every hash of every elements. Update Invertible Bloom Filters with #add, #has, #delete, #listEntries, #substract, #Static.decode methods. Updated the way to get distinct indices which could have collisions in many cases. |
| `v0.6.1` | 18/06/2019 | Add Invertible Bloom Filters (only #encode/#substract/#Static.decode methods) |
| **Version** | **Release date** | **Major changes** |
| ----------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `v2.1.0` | 03/2022 | - Add Scalable Bloom filters <br/> - Use array of BitSet for Partitionned Bloom Filter <br/> - Fix wrong MinHash comparison |
| `v2.0.0` | 02/2022 | - Use correctly double hashing [#issue43](https://github.com/Callidon/bloom-filters/issues/43). <br/> - Move all hashing related functions to its specific Hash class in a component of the BaseFilter class. It also allows for overriding the serizalize function for using custom hash functions <br/> - Add [#PR44](https://github.com/Callidon/bloom-filters/pull/44) optimizing the BloomFilter internal storage with Uint arrays. <br/> - Disable 10.x, 15.x node tests. <br/> - Add XorFilter [#29](https://github.com/Callidon/bloom-filters/issues/29) <br/> - Add `.nextInt32()` function to get a new random seeded int 32-bits from the current seed. <br/> - Make all properties public for allowing developpers to override everything. |
| `v1.3.0` | 10/04/2020 | Added the MinHash set |
| `v1.2.0` | 08/04/2020 | Add the TopK class |
| `v1.1.0` | 03/04/2020 | Add the HyperLogLog sketch |
| `v1.0.0` | 23/03/2020 | Rework the whole library using TypeScript, unify the API and fix the documentation |
| `v0.8.0` | 11/11/2019 | Fix some issues with the cuckoo filter (performances). Fix the global API. It allows now to customize each Filter. If you want to use the old API, use the `.create()` or `.from()` functions to match the old api. |
| `v0.7.1` | 11/09/2019 | Add the Counting Bloom Filter |
| `v0.7.0` | 01/07/2019 | Move to [XXHASH](https://cyan4973.github.io/xxHash/) for hashing elements in the library. One property has been added into the exported json `_seed` which is used to seed every hash of every elements. Update Invertible Bloom Filters with #add, #has, #delete, #listEntries, #substract, #Static.decode methods. Updated the way to get distinct indices which could have collisions in many cases. |
| `v0.6.1` | 18/06/2019 | Add Invertible Bloom Filters (only #encode/#substract/#Static.decode methods) |

@@ -620,0 +658,0 @@ ## License

@@ -25,4 +25,2 @@ /* file : api.ts

'use strict'
export {default as BaseFilter} from './base-filter'

@@ -44,1 +42,2 @@ export {default as BloomFilter} from './bloom/bloom-filter'

export {default as DeprecatedHashing} from './hashing/deprecated_hashing'
export {default as ScalableBloomFilter} from './bloom/scalable-bloom-filter'

@@ -25,4 +25,2 @@ /* file : base-filter.ts

'use strict'
import seedrandom from 'seedrandom'

@@ -29,0 +27,0 @@ import Hashing from './hashing/hashing'

@@ -25,4 +25,2 @@ /* file : bloom-filter.ts

'use strict'
import ClassicFilter from '../interfaces/classic-filter'

@@ -58,5 +56,4 @@ import BaseFilter from '../base-filter'

data => {
// create the bitset from new and old array-based exported structure
if (Array.isArray(data)) {
// Create the bitset from new and old exported structure
// create a new BitSet from the specified array
const bs = new BitSet(data.length)

@@ -63,0 +60,0 @@ data.forEach((val: number, index: number) => {

@@ -25,4 +25,2 @@ /* file : counting-bloom-filter.ts

'use strict'
import BaseFilter from '../base-filter'

@@ -29,0 +27,0 @@ import WritableFilter from '../interfaces/writable-filter'

@@ -25,4 +25,2 @@ /* file : partitioned-bloom-filter.ts

'use strict'
import BaseFilter from '../base-filter'

@@ -32,2 +30,3 @@ import ClassicFilter from '../interfaces/classic-filter'

import {HashableInput, allocateArray} from '../utils'
import BitSet from './bit-set'

@@ -112,8 +111,23 @@ /**

public _m: number
@Field(
(sets: BitSet[]) => sets.map(s => s.export()),
(array: Array<{size: number; content: string} | number[]>) =>
array.map((data: {size: number; content: string} | number[]) => {
// create the bitset from new and old array-based exported structure
if (Array.isArray(data)) {
const bs = new BitSet(data.length)
data.forEach((val: number, index: number) => {
if (val !== 0) {
bs.add(index)
}
})
return bs
} else {
return BitSet.import(data as {size: number; content: string})
}
})
)
public _filter: Array<BitSet>
@Field()
public _filter: Array<Array<number>>
@Field()
public _capacity: number
@Field()
public _length: number
/**

@@ -137,5 +151,3 @@ * Constructor

this._m = Math.ceil(this._size / this._nbHashes)
this._filter = allocateArray(this._nbHashes, () =>
allocateArray(this._m, 0)
)
this._filter = allocateArray(this._nbHashes, () => new BitSet(this._m))
this._capacity =

@@ -145,3 +157,2 @@ capacity !== undefined

: computeNumberOfItems(this._size, loadFactor, nbHashes)
this._length = 0
}

@@ -207,9 +218,2 @@

/**
* Get the number of elements currently in the filter
*/
public get length(): number {
return this._length
}
/**
* Get the filter's load factor

@@ -238,5 +242,4 @@ */

for (let i = 0; i < this._nbHashes; i++) {
this._filter[i][indexes[i]] = 1
this._filter[i].add(indexes[i])
}
this._length++
}

@@ -264,3 +267,3 @@

for (let i = 0; i < this._nbHashes; i++) {
if (!this._filter[i][indexes[i]]) {
if (!this._filter[i].has(indexes[i])) {
return false

@@ -297,3 +300,2 @@ }

this._nbHashes !== other._nbHashes ||
this._length !== other._length ||
this._loadFactor !== other._loadFactor

@@ -304,5 +306,3 @@ ) {

return this._filter.every((array, outerIndex) =>
other._filter[outerIndex].every(
(item, innerIndex) => array[innerIndex] === item
)
other._filter[outerIndex].equals(array)
)

@@ -317,3 +317,3 @@ }

const values = this._filter.map(bucket => {
return bucket.reduce((a, b) => a + b, 0)
return bucket.bitCount()
})

@@ -320,0 +320,0 @@ const used = values.reduce((a, b) => a + b, 0)

@@ -25,4 +25,2 @@ /* file : bucket.ts

'use strict'
import {eq, indexOf} from 'lodash'

@@ -29,0 +27,0 @@ import * as utils from '../utils'

@@ -25,4 +25,2 @@ /* file : cuckoo-filter.ts

'use strict'
import WritableFilter from '../interfaces/writable-filter'

@@ -29,0 +27,0 @@ import BaseFilter from '../base-filter'

@@ -27,4 +27,2 @@ /* file : exportable.ts

'use strict'
import 'reflect-metadata'

@@ -31,0 +29,0 @@

@@ -25,4 +25,2 @@ /* file : formulas.ts

'use strict'
/**

@@ -29,0 +27,0 @@ * Various formulas used with Bloom Filters

@@ -25,4 +25,2 @@ /* file: cell.ts

'use strict'
import {xorBuffer} from '../utils'

@@ -29,0 +27,0 @@ import {AutoExportable, Field, Parameter} from '../exportable'

@@ -25,4 +25,2 @@ /* file : invertible-bloom-lookup-tables.ts

'use strict'
import BaseFilter from '../base-filter'

@@ -29,0 +27,0 @@ import WritableFilter from '../interfaces/writable-filter'

@@ -25,4 +25,2 @@ /* file: classic-filter.ts

'use strict'
/**

@@ -29,0 +27,0 @@ * A classic probabilistic data-structure, which supports insertions of elements and membership queries.

@@ -25,4 +25,2 @@ /* file: counting-filter.ts

'use strict'
/**

@@ -29,0 +27,0 @@ * A filter that can count occurences of items and estimate their frequencies.

@@ -25,4 +25,2 @@ /* file: writable-filter.ts

'use strict'
import ClassicFilter from './classic-filter'

@@ -29,0 +27,0 @@

@@ -25,4 +25,2 @@ /* file : count-min-sketch.ts

'use strict'
import BaseFilter from '../base-filter'

@@ -29,0 +27,0 @@ import CountingFilter from '../interfaces/counting-filter'

@@ -28,3 +28,2 @@ /* file: min-hash.ts

import {allocateArray} from '../utils'
import {intersection} from 'lodash'

@@ -113,6 +112,4 @@ /**

for (let i = 0; i < this._nbHashes; i++) {
this._signature[i] = Math.min(
this._signature[i],
applyHashFunction(value, this._hashFunctions[i])
)
const hash = applyHashFunction(value, this._hashFunctions[i])
this._signature[i] = Math.min(this._signature[i], hash)
}

@@ -130,3 +127,12 @@ }

)
this._signature[i] = Math.min(this._signature[i], ...candidateSignatures)
// get the minimum of the candidate Signatures
// dont supply too much parameters to Math.min or Math.max with risk of getting stack error
// so we compute an iterative minimum
let min = candidateSignatures[0]
for (let i = 1; i < candidateSignatures.length; i++) {
if (min > candidateSignatures[i]) {
min = candidateSignatures[i]
}
}
this._signature[i] = Math.min(this._signature[i], min)
}

@@ -146,6 +152,12 @@ }

}
return (
intersection(this._signature, other._signature).length / this._nbHashes
)
// fix: we need to check for the number of equal signatures, not uniq equal signatures
// lodash intersection ends with a uniq set of values
let count = 0
for (let i = 0; i < this._nbHashes; i++) {
if (this._signature[i] === other._signature[i]) {
count++
}
}
return count / this._nbHashes
}
}

@@ -25,4 +25,2 @@ /* file : utils.ts

'use strict'
/**

@@ -29,0 +27,0 @@ * Utilitaries functions

@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {BloomFilter} = require('../dist/api.js')

@@ -25,4 +25,2 @@ /* file : bucket-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const Bucket = require('../dist/cuckoo/bucket.js').default

@@ -25,4 +25,2 @@ /*

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {expect} = require('chai')

@@ -25,4 +25,2 @@ /* file : count-min-sketch-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {CountMinSketch} = require('../dist/api.js')

@@ -25,4 +25,2 @@ /* file : counting-bloom-filter-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {CountingBloomFilter} = require('../dist/api.js')

@@ -25,4 +25,2 @@ /* file : cuckoo-filter-test.js

'use strict'
const chai = require('chai')

@@ -29,0 +27,0 @@ chai.should()

@@ -25,4 +25,2 @@ /* file : hyperloglog-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {HyperLogLog} = require('../dist/api.js')

@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ require('chai').expect()

@@ -25,4 +25,2 @@ /* file : min-hash-test.js

'use strict'
require('chai').should()

@@ -37,9 +35,27 @@ const {MinHashFactory, MinHash} = require('../dist/api.js')

let factory,
setA,
setB,
maxValue = 0,
nbHashes
try {
const max = 10000
setA = range(1, max)
setB = range(1, max).map(x => (x % 2 === 0 ? x : x * 2))
const allInOne = [...setA, ...setB]
for (let i of allInOne) {
if (maxValue < i) {
maxValue = i
}
}
nbHashes = 50
factory = new MinHashFactory(nbHashes, maxValue)
} catch (error) {
console.error(error)
throw new Error(
'An error occured when creating the min hash factory: ' + error
)
}
describe('MinHash', () => {
const setA = range(1, 500)
const setB = range(1, 500).map(x => (x % 2 === 0 ? x : x * 2))
const maxValue = Math.max(...setA, ...setB)
const nbHashes = 10
const factory = new MinHashFactory(nbHashes, maxValue)
describe('#isEmpty', () => {

@@ -46,0 +62,0 @@ it('should return True when the MinHash signeture is empty', () => {

@@ -25,4 +25,2 @@ /* file : partitioned-bloom-filter-test.js

'use strict'
require('chai').should()

@@ -39,3 +37,2 @@ const {PartitionedBloomFilter} = require('../dist/api.js')

filter.add('bob')
filter.length.should.equal(2)
})

@@ -49,3 +46,2 @@

filter.has('carl').should.equal(true)
filter.length.should.equal(data.length)
filter.rate().should.be.closeTo(targetRate, 0.1)

@@ -142,5 +138,4 @@ })

exported._loadFactor.should.equal(filter._loadFactor)
exported._m.should.equal(filter._m)
exported._nbHashes.should.equal(filter._nbHashes)
exported._filter.should.deep.equal(filter._filter)
exported._filter.should.deep.equal(filter._filter.map(f => f.export()))
})

@@ -147,0 +142,0 @@

@@ -25,4 +25,2 @@ /* file : topk-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {TopK} = require('../dist/api.js')

@@ -25,4 +25,2 @@ /* file : utils-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {

@@ -25,4 +25,2 @@ /* file : bloom-filter-test.js

'use strict'
require('chai').should()

@@ -29,0 +27,0 @@ const {XorFilter} = require('../dist/api.js')

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc