Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@ably/bloomit

Package Overview
Dependencies
Maintainers
5
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@ably/bloomit - npm Package Compare versions

Comparing version 1.3.1 to 1.4.0

test/utils-test.js

13

dist/bloom-filter.d.ts

@@ -16,3 +16,2 @@ import { HashableInput } from './utils';

private _length;
private _seed;
/**

@@ -22,5 +21,4 @@ * Constructor

* @param nbHashes - The number of hash functions used
* @param seed - The filter seed.
*/
constructor(size: number, nbHashes: number, seed?: number);
constructor(size: number, nbHashes: number);
/**

@@ -32,3 +30,3 @@ * Create an optimal bloom filter providing the maximum of elements stored and the error rate desired

*/
static create(nbItems: number, errorRate: number, seed?: number): BloomFilter;
static create(nbItems: number, errorRate: number): BloomFilter;
/**

@@ -43,3 +41,3 @@ * Build a new Bloom Filter from an existing iterable with a fixed error rate

*/
static from(items: Iterable<HashableInput>, errorRate: number, seed?: number): BloomFilter;
static from(items: Iterable<HashableInput>, errorRate: number): BloomFilter;
/**

@@ -63,7 +61,2 @@ * Generate a bloom filter from a binary export.

/**
* Get the seed of the filter
* @return The filter seed
*/
get seed(): number;
/**
* Get the hash count of the filter

@@ -70,0 +63,0 @@ * @return The filter hash count

@@ -43,7 +43,4 @@ /* file : bloom-filter.ts

* @param nbHashes - The number of hash functions used
* @param seed - The filter seed.
*/
constructor(size, nbHashes, seed = 0x1234567890) {
const positiveIntSeed = Math.abs(Math.ceil(seed));
this._seed = positiveIntSeed;
constructor(size, nbHashes) {
this._size = size;

@@ -60,6 +57,6 @@ this._nbHashes = nbHashes;

*/
static create(nbItems, errorRate, seed = 0x1234567890) {
static create(nbItems, errorRate) {
const size = formulas_1.optimalFilterSize(nbItems, errorRate);
const hashes = formulas_1.optimalHashes(size, nbItems);
return new BloomFilter(size, hashes, seed);
return new BloomFilter(size, hashes);
}

@@ -75,5 +72,5 @@ /**

*/
static from(items, errorRate, seed = 0x1234567890) {
static from(items, errorRate) {
const array = Array.from(items);
const filter = BloomFilter.create(array.length, errorRate, seed);
const filter = BloomFilter.create(array.length, errorRate);
array.forEach(element => filter.add(element));

@@ -89,8 +86,7 @@ return filter;

static import(binaryBloomFilter) {
const seedArray = binaryBloomFilter.slice(0, 8);
const nbHashesArray = binaryBloomFilter.slice(8, 16);
const lengthArray = binaryBloomFilter.slice(16, 24);
const sizeArray = binaryBloomFilter.slice(24, 32);
const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length);
const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray), encoding_1.uint8ArrayToInt64(seedArray));
const nbHashesArray = binaryBloomFilter.slice(0, 8);
const lengthArray = binaryBloomFilter.slice(8, 16);
const sizeArray = binaryBloomFilter.slice(16, 24);
const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length);
const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray));
bloomFilter._length = encoding_1.uint8ArrayToInt64(lengthArray);

@@ -115,9 +111,2 @@ bloomFilter._filter = filterArray;

/**
* Get the seed of the filter
* @return The filter seed
*/
get seed() {
return this._seed;
}
/**
* Get the hash count of the filter

@@ -137,3 +126,3 @@ * @return The filter hash count

add(element) {
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed);
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes);
for (let i = 0; i < indexes.length; i++) {

@@ -158,3 +147,3 @@ const indexToEdit = utils_1.getByteIndexInArray(indexes[i]);

has(element) {
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed);
const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes);
for (let i = 0; i < indexes.length; i++) {

@@ -196,8 +185,7 @@ if (!utils_1.getBitAtIndex(this._filter, indexes[i])) {

export() {
const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters
exportArray.set(encoding_1.int64ToUint8Array(this.seed), 0);
exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 8);
exportArray.set(encoding_1.int64ToUint8Array(this.length), 16);
exportArray.set(encoding_1.int64ToUint8Array(this.size), 24);
const exportArrayStartIndex = 32;
const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters
exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 0);
exportArray.set(encoding_1.int64ToUint8Array(this.length), 8);
exportArray.set(encoding_1.int64ToUint8Array(this.size), 16);
const exportArrayStartIndex = 24;
for (let index = 0; index < this._filter.length; index += 1) {

@@ -204,0 +192,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index];

@@ -23,3 +23,2 @@ /// <reference types="node" />

* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
* @param seed the seed used for hashing
* @return The results of the hash functions applied to the value (in hex or integer)

@@ -31,16 +30,2 @@ * @memberof Utils

/**
* Apply Double Hashing to produce a n-hash
*
* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
* @param n - The indice of the hash function we want to produce
* @param hashA - The result of the first hash function applied to a value.
* @param hashB - The result of the second hash function applied to a value.
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
* @return The result of hash_n applied to a value.
* @memberof Utils
* @author Thomas Minier
*/
export declare function doubleHashing(n: number, hashA: number, hashB: number, size: number): number;
/**
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique

@@ -50,7 +35,9 @@ * @param element - The element to hash

* @param number - The number of indexes desired
* @param seed - The seed used
* @param maxIterations - throw if we exceed this without finding a result of the
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
* to size*100
* @return A array of indexes
* @author Arnaud Grall
* @author Arnaud Grall, Simon Woolf
*/
export declare function getDistinctIndices(element: HashableInput, size: number, number: number, seed?: number): Array<number>;
export declare function getDistinctIndices(element: HashableInput, size: number, number: number, maxIterations?: number): Array<number>;
/**

@@ -57,0 +44,0 @@ * Return the amount of bytes needed to fit the input bits

/* file : utils.ts
MIT License
Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
Bloomit changes: Copyright 2021 Kolja Blauhut
Ably-forks/bloomit changes: Copyright 2021 Simon Woolf
Permission is hereby granted, free of charge, to any person obtaining a copy

@@ -45,4 +49,6 @@ of this software and associated documentation files (the "Software"), to deal

Object.defineProperty(exports, "__esModule", { value: true });
exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.doubleHashing = exports.hashTwice = void 0;
exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.hashTwice = void 0;
const farmhash = __importStar(require("farmhash"));
const PRIME_STR = "'";
const PRIME_BUF = Buffer.from(PRIME_STR);
/**

@@ -52,3 +58,2 @@ * (64-bits only) Hash a value into two values (in hex or integer format)

* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
* @param seed the seed used for hashing
* @return The results of the hash functions applied to the value (in hex or integer)

@@ -69,21 +74,2 @@ * @memberof Utils

/**
* Apply Double Hashing to produce a n-hash
*
* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
* @param n - The indice of the hash function we want to produce
* @param hashA - The result of the first hash function applied to a value.
* @param hashB - The result of the second hash function applied to a value.
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
* @return The result of hash_n applied to a value.
* @memberof Utils
* @author Thomas Minier
*/
function doubleHashing(n, hashA, hashB, size) {
// Cubic term avoids increased-probability-of-collision issue, see
// http://peterd.org/pcd-diss.pdf s.6.5.4
return Math.abs((hashA + n * hashB + Math.floor((n ** 3 - n) / 6)) % size);
}
exports.doubleHashing = doubleHashing;
/**
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique

@@ -93,16 +79,35 @@ * @param element - The element to hash

* @param number - The number of indexes desired
* @param seed - The seed used
* @param maxIterations - throw if we exceed this without finding a result of the
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
* to size*100
* @return A array of indexes
* @author Arnaud Grall
* @author Arnaud Grall, Simon Woolf
*/
function getDistinctIndices(element, size, number, seed) {
const { first, second } = hashTwice(element);
function getDistinctIndices(element, size, number, maxIterations = size * 100) {
let { first, second } = hashTwice(element);
let index, i = 1;
const indices = [];
let n = 0;
// Implements enhanced double hashing algorithm from
// http://peterd.org/pcd-diss.pdf s.6.5.4
while (indices.length < number) {
const index = doubleHashing(n, first, second, size);
index = first % size;
if (!indices.includes(index)) {
indices.push(index);
}
n++;
first = (first + second) % size;
second = (second + i) % size;
i++;
if (i > size) {
// Enhanced double hashing stops cycles of length less than `size` in the case where
// size is coprime with the second hash. But you still get cycles of length `size`.
// So if we reach there and haven't finished, append a prime to the input and
// rehash.
element = Buffer.isBuffer(element)
? Buffer.concat([element, PRIME_BUF])
: element + PRIME_STR;
({ first, second } = hashTwice(element));
}
if (maxIterations && i > maxIterations) {
throw new Error('max iterations exceeded');
}
}

@@ -109,0 +114,0 @@ return indices;

{
"name": "@ably/bloomit",
"version": "1.3.1",
"version": "1.4.0",
"description": "Space efficient bloom filter based on the bloom-filters npm package.",

@@ -5,0 +5,0 @@ "main": "dist/index.js",

@@ -54,3 +54,2 @@ /* file : bloom-filter.ts

private _length: number;
private _seed: number;

@@ -61,7 +60,4 @@ /**

* @param nbHashes - The number of hash functions used
* @param seed - The filter seed.
*/
constructor(size: number, nbHashes: number, seed: number = 0x1234567890) {
const positiveIntSeed = Math.abs(Math.ceil(seed));
this._seed = positiveIntSeed;
constructor(size: number, nbHashes: number) {
this._size = size;

@@ -82,7 +78,6 @@ this._nbHashes = nbHashes;

errorRate: number,
seed: number = 0x1234567890
): BloomFilter {
const size = optimalFilterSize(nbItems, errorRate);
const hashes = optimalHashes(size, nbItems);
return new BloomFilter(size, hashes, seed);
return new BloomFilter(size, hashes);
}

@@ -102,6 +97,5 @@

errorRate: number,
seed: number = 0x1234567890
): BloomFilter {
const array = Array.from(items);
const filter = BloomFilter.create(array.length, errorRate, seed);
const filter = BloomFilter.create(array.length, errorRate);
array.forEach(element => filter.add(element));

@@ -118,12 +112,10 @@ return filter;

static import(binaryBloomFilter: Uint8Array): BloomFilter {
const seedArray = binaryBloomFilter.slice(0, 8);
const nbHashesArray = binaryBloomFilter.slice(8, 16);
const lengthArray = binaryBloomFilter.slice(16, 24);
const sizeArray = binaryBloomFilter.slice(24, 32);
const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length);
const nbHashesArray = binaryBloomFilter.slice(0, 8);
const lengthArray = binaryBloomFilter.slice(8, 16);
const sizeArray = binaryBloomFilter.slice(16, 24);
const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length);
const bloomFilter = new BloomFilter(
uint8ArrayToInt64(sizeArray),
uint8ArrayToInt64(nbHashesArray),
uint8ArrayToInt64(seedArray)
uint8ArrayToInt64(nbHashesArray)
);

@@ -153,10 +145,2 @@

/**
* Get the seed of the filter
* @return The filter seed
*/
get seed(): number {
return this._seed;
}
/**
* Get the hash count of the filter

@@ -180,4 +164,3 @@ * @return The filter hash count

this._size,
this._nbHashes,
this._seed
this._nbHashes
);

@@ -208,4 +191,3 @@

this._size,
this._nbHashes,
this._seed
this._nbHashes
);

@@ -256,9 +238,8 @@ for (let i = 0; i < indexes.length; i++) {

export(): Uint8Array {
const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters
exportArray.set(int64ToUint8Array(this.seed), 0);
exportArray.set(int64ToUint8Array(this.nbHashes), 8);
exportArray.set(int64ToUint8Array(this.length), 16);
exportArray.set(int64ToUint8Array(this.size), 24);
const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters
exportArray.set(int64ToUint8Array(this.nbHashes), 0);
exportArray.set(int64ToUint8Array(this.length), 8);
exportArray.set(int64ToUint8Array(this.size), 16);
const exportArrayStartIndex = 32;
const exportArrayStartIndex = 24;
for (let index = 0; index < this._filter.length; index += 1) {

@@ -265,0 +246,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index];

/* file : utils.ts
MIT License
Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
Bloomit changes: Copyright 2021 Kolja Blauhut
Ably-forks/bloomit changes: Copyright 2021 Simon Woolf
Permission is hereby granted, free of charge, to any person obtaining a copy

@@ -29,2 +33,5 @@ of this software and associated documentation files (the "Software"), to deal

const PRIME_STR = "'";
const PRIME_BUF = Buffer.from(PRIME_STR);
/**

@@ -55,3 +62,2 @@ * Utilitaries functions

* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
* @param seed the seed used for hashing
* @return The results of the hash functions applied to the value (in hex or integer)

@@ -74,26 +80,2 @@ * @memberof Utils

/**
* Apply Double Hashing to produce a n-hash
*
* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
* @param n - The indice of the hash function we want to produce
* @param hashA - The result of the first hash function applied to a value.
* @param hashB - The result of the second hash function applied to a value.
* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
* @return The result of hash_n applied to a value.
* @memberof Utils
* @author Thomas Minier
*/
export function doubleHashing(
n: number,
hashA: number,
hashB: number,
size: number
): number {
// Cubic term avoids increased-probability-of-collision issue, see
// http://peterd.org/pcd-diss.pdf s.6.5.4
return Math.abs((hashA + n*hashB + Math.floor((n**3 - n)/6)) % size);
}
/**
* Generate a set of distinct indexes on interval [0, size) using the double hashing technique

@@ -103,5 +85,7 @@ * @param element - The element to hash

* @param number - The number of indexes desired
* @param seed - The seed used
* @param maxIterations - throw if we exceed this without finding a result of the
* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
* to size*100
* @return A array of indexes
* @author Arnaud Grall
* @author Arnaud Grall, Simon Woolf
*/

@@ -112,15 +96,34 @@ export function getDistinctIndices(

number: number,
seed?: number
maxIterations: number = size * 100
): Array<number> {
const {first, second} = hashTwice(element);
const indices: Array<number> = [];
let n = 0;
while(indices.length < number) {
const index = doubleHashing(n, first, second, size);
if(!indices.includes(index)) {
indices.push(index);
}
n++;
}
return indices;
let {first, second} = hashTwice(element);
let index, i = 1;
const indices: Array<number> = [];
// Implements enhanced double hashing algorithm from
// http://peterd.org/pcd-diss.pdf s.6.5.4
while(indices.length < number) {
index = first % size;
if(!indices.includes(index)) {
indices.push(index);
}
first = (first + second) % size;
second = (second + i) % size;
i++;
if(i > size) {
// Enhanced double hashing stops cycles of length less than `size` in the case where
// size is coprime with the second hash. But you still get cycles of length `size`.
// So if we reach there and haven't finished, append a prime to the input and
// rehash.
element = Buffer.isBuffer(element)
? Buffer.concat([element, PRIME_BUF])
: element + PRIME_STR;
({first, second} = hashTwice(element));
}
if(maxIterations && i > maxIterations) {
throw new Error('max iterations exceeded');
}
}
return indices;
}

@@ -127,0 +130,0 @@

@@ -32,7 +32,6 @@ /* file : bloom-filter-test.js

const targetRate = 0.1;
const seed = Math.random();
describe('construction', () => {
it('should add element to the filter with #add', () => {
const filter = BloomFilter.create(15, targetRate, seed);
const filter = BloomFilter.create(15, targetRate);
filter.add('alice');

@@ -100,6 +99,6 @@ filter.add('bob');

describe('#export', () => {
const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate, seed);
const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate);
it('should export a bloom filter to a Uint8Array', () => {
const exported = filter.export();
exported.length.should.equal(filter._filter.length + 4 * 8);
exported.length.should.equal(filter._filter.length + 3 * 8);
});

@@ -110,3 +109,2 @@

const newFilter = BloomFilter.import(exported);
newFilter._seed.should.equal(filter._seed);
newFilter.size.should.equal(filter.size);

@@ -113,0 +111,0 @@ newFilter.length.should.equal(filter.length);

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc