@ably/bloomit - npm Package Compare versions

Comparing version 1.3.1 to 1.4.0

test/utils-test.js

dist/bloom-filter.d.ts

		@@ -16,3 +16,2 @@ import { HashableInput } from './utils';
		private _length;
		private _seed;
		/**
		@@ -22,5 +21,4 @@ * Constructor
		* @param nbHashes - The number of hash functions used
		* @param seed - The filter seed.
		*/
		constructor(size: number, nbHashes: number, seed?: number);
		constructor(size: number, nbHashes: number);
		/**
		@@ -32,3 +30,3 @@ * Create an optimal bloom filter providing the maximum of elements stored and the error rate desired
		*/
		static create(nbItems: number, errorRate: number, seed?: number): BloomFilter;
		static create(nbItems: number, errorRate: number): BloomFilter;
		/**
		@@ -43,3 +41,3 @@ * Build a new Bloom Filter from an existing iterable with a fixed error rate
		*/
		static from(items: Iterable<HashableInput>, errorRate: number, seed?: number): BloomFilter;
		static from(items: Iterable<HashableInput>, errorRate: number): BloomFilter;
		/**
		@@ -63,7 +61,2 @@ * Generate a bloom filter from a binary export.
		/**
		* Get the seed of the filter
		* @return The filter seed
		*/
		get seed(): number;
		/**
		* Get the hash count of the filter
		@@ -70,0 +63,0 @@ * @return The filter hash count

dist/bloom-filter.js

		@@ -43,7 +43,4 @@ /* file : bloom-filter.ts
		* @param nbHashes - The number of hash functions used
		* @param seed - The filter seed.
		*/
		constructor(size, nbHashes, seed = 0x1234567890) {
		const positiveIntSeed = Math.abs(Math.ceil(seed));
		this._seed = positiveIntSeed;
		constructor(size, nbHashes) {
		this._size = size;
		@@ -60,6 +57,6 @@ this._nbHashes = nbHashes;
		*/
		static create(nbItems, errorRate, seed = 0x1234567890) {
		static create(nbItems, errorRate) {
		const size = formulas_1.optimalFilterSize(nbItems, errorRate);
		const hashes = formulas_1.optimalHashes(size, nbItems);
		return new BloomFilter(size, hashes, seed);
		return new BloomFilter(size, hashes);
		}
		@@ -75,5 +72,5 @@ /**
		*/
		static from(items, errorRate, seed = 0x1234567890) {
		static from(items, errorRate) {
		const array = Array.from(items);
		const filter = BloomFilter.create(array.length, errorRate, seed);
		const filter = BloomFilter.create(array.length, errorRate);
		array.forEach(element => filter.add(element));
		@@ -89,8 +86,7 @@ return filter;
		static import(binaryBloomFilter) {
		const seedArray = binaryBloomFilter.slice(0, 8);
		const nbHashesArray = binaryBloomFilter.slice(8, 16);
		const lengthArray = binaryBloomFilter.slice(16, 24);
		const sizeArray = binaryBloomFilter.slice(24, 32);
		const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length);
		const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray), encoding_1.uint8ArrayToInt64(seedArray));
		const nbHashesArray = binaryBloomFilter.slice(0, 8);
		const lengthArray = binaryBloomFilter.slice(8, 16);
		const sizeArray = binaryBloomFilter.slice(16, 24);
		const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length);
		const bloomFilter = new BloomFilter(encoding_1.uint8ArrayToInt64(sizeArray), encoding_1.uint8ArrayToInt64(nbHashesArray));
		bloomFilter._length = encoding_1.uint8ArrayToInt64(lengthArray);
		@@ -115,9 +111,2 @@ bloomFilter._filter = filterArray;
		/**
		* Get the seed of the filter
		* @return The filter seed
		*/
		get seed() {
		return this._seed;
		}
		/**
		* Get the hash count of the filter
		@@ -137,3 +126,3 @@ * @return The filter hash count
		add(element) {
		const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed);
		const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes);
		for (let i = 0; i < indexes.length; i++) {
		@@ -158,3 +147,3 @@ const indexToEdit = utils_1.getByteIndexInArray(indexes[i]);
		has(element) {
		const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes, this._seed);
		const indexes = utils_1.getDistinctIndices(element, this._size, this._nbHashes);
		for (let i = 0; i < indexes.length; i++) {
		@@ -196,8 +185,7 @@ if (!utils_1.getBitAtIndex(this._filter, indexes[i])) {
		export() {
		const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters
		exportArray.set(encoding_1.int64ToUint8Array(this.seed), 0);
		exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 8);
		exportArray.set(encoding_1.int64ToUint8Array(this.length), 16);
		exportArray.set(encoding_1.int64ToUint8Array(this.size), 24);
		const exportArrayStartIndex = 32;
		const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters
		exportArray.set(encoding_1.int64ToUint8Array(this.nbHashes), 0);
		exportArray.set(encoding_1.int64ToUint8Array(this.length), 8);
		exportArray.set(encoding_1.int64ToUint8Array(this.size), 16);
		const exportArrayStartIndex = 24;
		for (let index = 0; index < this._filter.length; index += 1) {
		@@ -204,0 +192,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index];

dist/utils.d.ts

		@@ -23,3 +23,2 @@ /// <reference types="node" />
		* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
		* @param seed the seed used for hashing
		* @return The results of the hash functions applied to the value (in hex or integer)
		@@ -31,16 +30,2 @@ * @memberof Utils
		/**
		* Apply Double Hashing to produce a n-hash
		*
		* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
		* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
		* @param n - The indice of the hash function we want to produce
		* @param hashA - The result of the first hash function applied to a value.
		* @param hashB - The result of the second hash function applied to a value.
		* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
		* @return The result of hash_n applied to a value.
		* @memberof Utils
		* @author Thomas Minier
		*/
		export declare function doubleHashing(n: number, hashA: number, hashB: number, size: number): number;
		/**
		* Generate a set of distinct indexes on interval [0, size) using the double hashing technique
		@@ -50,7 +35,9 @@ * @param element - The element to hash
		* @param number - The number of indexes desired
		* @param seed - The seed used
		* @param maxIterations - throw if we exceed this without finding a result of the
		* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
		* to size*100
		* @return A array of indexes
		* @author Arnaud Grall
		* @author Arnaud Grall, Simon Woolf
		*/
		export declare function getDistinctIndices(element: HashableInput, size: number, number: number, seed?: number): Array<number>;
		export declare function getDistinctIndices(element: HashableInput, size: number, number: number, maxIterations?: number): Array<number>;
		/**
		@@ -57,0 +44,0 @@ * Return the amount of bytes needed to fit the input bits

dist/utils.js

		/* file : utils.ts
		MIT License

		Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
		Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall

		Bloomit changes: Copyright 2021 Kolja Blauhut

		Ably-forks/bloomit changes: Copyright 2021 Simon Woolf

		Permission is hereby granted, free of charge, to any person obtaining a copy
		@@ -45,4 +49,6 @@ of this software and associated documentation files (the "Software"), to deal
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.doubleHashing = exports.hashTwice = void 0;
		exports.getBitAtIndex = exports.setBitInByte = exports.getBitIndex = exports.getByteIndexInArray = exports.getUint8ArrayLength = exports.getDistinctIndices = exports.hashTwice = void 0;
		const farmhash = __importStar(require("farmhash"));
		const PRIME_STR = "'";
		const PRIME_BUF = Buffer.from(PRIME_STR);
		/**
		@@ -52,3 +58,2 @@ * (64-bits only) Hash a value into two values (in hex or integer format)
		* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
		* @param seed the seed used for hashing
		* @return The results of the hash functions applied to the value (in hex or integer)
		@@ -69,21 +74,2 @@ * @memberof Utils
		/**
		* Apply Double Hashing to produce a n-hash
		*
		* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
		* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
		* @param n - The indice of the hash function we want to produce
		* @param hashA - The result of the first hash function applied to a value.
		* @param hashB - The result of the second hash function applied to a value.
		* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
		* @return The result of hash_n applied to a value.
		* @memberof Utils
		* @author Thomas Minier
		*/
		function doubleHashing(n, hashA, hashB, size) {
		// Cubic term avoids increased-probability-of-collision issue, see
		// http://peterd.org/pcd-diss.pdf s.6.5.4
		return Math.abs((hashA + n * hashB + Math.floor((n ** 3 - n) / 6)) % size);
		}
		exports.doubleHashing = doubleHashing;
		/**
		* Generate a set of distinct indexes on interval [0, size) using the double hashing technique
		@@ -93,16 +79,35 @@ * @param element - The element to hash
		* @param number - The number of indexes desired
		* @param seed - The seed used
		* @param maxIterations - throw if we exceed this without finding a result of the
		* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
		* to size*100
		* @return A array of indexes
		* @author Arnaud Grall
		* @author Arnaud Grall, Simon Woolf
		*/
		function getDistinctIndices(element, size, number, seed) {
		const { first, second } = hashTwice(element);
		function getDistinctIndices(element, size, number, maxIterations = size * 100) {
		let { first, second } = hashTwice(element);
		let index, i = 1;
		const indices = [];
		let n = 0;
		// Implements enhanced double hashing algorithm from
		// http://peterd.org/pcd-diss.pdf s.6.5.4
		while (indices.length < number) {
		const index = doubleHashing(n, first, second, size);
		index = first % size;
		if (!indices.includes(index)) {
		indices.push(index);
		}
		n++;
		first = (first + second) % size;
		second = (second + i) % size;
		i++;
		if (i > size) {
		// Enhanced double hashing stops cycles of length less than `size` in the case where
		// size is coprime with the second hash. But you still get cycles of length `size`.
		// So if we reach there and haven't finished, append a prime to the input and
		// rehash.
		element = Buffer.isBuffer(element)
		? Buffer.concat([element, PRIME_BUF])
		: element + PRIME_STR;
		({ first, second } = hashTwice(element));
		}
		if (maxIterations && i > maxIterations) {
		throw new Error('max iterations exceeded');
		}
		}
		@@ -109,0 +114,0 @@ return indices;

package.json

		{
		"name": "@ably/bloomit",
		"version": "1.3.1",
		"version": "1.4.0",
		"description": "Space efficient bloom filter based on the bloom-filters npm package.",
		@@ -5,0 +5,0 @@ "main": "dist/index.js",

src/bloom-filter.ts

		@@ -54,3 +54,2 @@ /* file : bloom-filter.ts
		private _length: number;
		private _seed: number;

		@@ -61,7 +60,4 @@ /**
		* @param nbHashes - The number of hash functions used
		* @param seed - The filter seed.
		*/
		constructor(size: number, nbHashes: number, seed: number = 0x1234567890) {
		const positiveIntSeed = Math.abs(Math.ceil(seed));
		this._seed = positiveIntSeed;
		constructor(size: number, nbHashes: number) {
		this._size = size;
		@@ -82,7 +78,6 @@ this._nbHashes = nbHashes;
		errorRate: number,
		seed: number = 0x1234567890
		): BloomFilter {
		const size = optimalFilterSize(nbItems, errorRate);
		const hashes = optimalHashes(size, nbItems);
		return new BloomFilter(size, hashes, seed);
		return new BloomFilter(size, hashes);
		}
		@@ -102,6 +97,5 @@
		errorRate: number,
		seed: number = 0x1234567890
		): BloomFilter {
		const array = Array.from(items);
		const filter = BloomFilter.create(array.length, errorRate, seed);
		const filter = BloomFilter.create(array.length, errorRate);
		array.forEach(element => filter.add(element));
		@@ -118,12 +112,10 @@ return filter;
		static import(binaryBloomFilter: Uint8Array): BloomFilter {
		const seedArray = binaryBloomFilter.slice(0, 8);
		const nbHashesArray = binaryBloomFilter.slice(8, 16);
		const lengthArray = binaryBloomFilter.slice(16, 24);
		const sizeArray = binaryBloomFilter.slice(24, 32);
		const filterArray = binaryBloomFilter.slice(32, binaryBloomFilter.length);
		const nbHashesArray = binaryBloomFilter.slice(0, 8);
		const lengthArray = binaryBloomFilter.slice(8, 16);
		const sizeArray = binaryBloomFilter.slice(16, 24);
		const filterArray = binaryBloomFilter.slice(24, binaryBloomFilter.length);

		const bloomFilter = new BloomFilter(
		uint8ArrayToInt64(sizeArray),
		uint8ArrayToInt64(nbHashesArray),
		uint8ArrayToInt64(seedArray)
		uint8ArrayToInt64(nbHashesArray)
		);
		@@ -153,10 +145,2 @@
		/**
		* Get the seed of the filter
		* @return The filter seed
		*/
		get seed(): number {
		return this._seed;
		}

		/**
		* Get the hash count of the filter
		@@ -180,4 +164,3 @@ * @return The filter hash count
		this._size,
		this._nbHashes,
		this._seed
		this._nbHashes
		);
		@@ -208,4 +191,3 @@
		this._size,
		this._nbHashes,
		this._seed
		this._nbHashes
		);
		@@ -256,9 +238,8 @@ for (let i = 0; i < indexes.length; i++) {
		export(): Uint8Array {
		const exportArray = new Uint8Array(this._filter.length + 4 * 8); // Filter length + 4 number parameters
		exportArray.set(int64ToUint8Array(this.seed), 0);
		exportArray.set(int64ToUint8Array(this.nbHashes), 8);
		exportArray.set(int64ToUint8Array(this.length), 16);
		exportArray.set(int64ToUint8Array(this.size), 24);
		const exportArray = new Uint8Array(this._filter.length + 3 * 8); // Filter length + 3 number parameters
		exportArray.set(int64ToUint8Array(this.nbHashes), 0);
		exportArray.set(int64ToUint8Array(this.length), 8);
		exportArray.set(int64ToUint8Array(this.size), 16);

		const exportArrayStartIndex = 32;
		const exportArrayStartIndex = 24;
		for (let index = 0; index < this._filter.length; index += 1) {
		@@ -265,0 +246,0 @@ exportArray[index + exportArrayStartIndex] = this._filter[index];

src/utils.ts

		/* file : utils.ts
		MIT License

		Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall
		Original bloom-filter implementation: Copyright (c) 2017-2020 Thomas Minier & Arnaud Grall

		Bloomit changes: Copyright 2021 Kolja Blauhut

		Ably-forks/bloomit changes: Copyright 2021 Simon Woolf

		Permission is hereby granted, free of charge, to any person obtaining a copy
		@@ -29,2 +33,5 @@ of this software and associated documentation files (the "Software"), to deal

		const PRIME_STR = "'";
		const PRIME_BUF = Buffer.from(PRIME_STR);

		/**
		@@ -55,3 +62,2 @@ * Utilitaries functions
		* @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values.
		* @param seed the seed used for hashing
		* @return The results of the hash functions applied to the value (in hex or integer)
		@@ -74,26 +80,2 @@ * @memberof Utils
		/**
		* Apply Double Hashing to produce a n-hash
		*
		* This implementation used directly the value produced by the two hash functions instead of the functions themselves.
		* @see {@link http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=4060353E67A356EF9528D2C57C064F5A?doi=10.1.1.152.579&rep=rep1&type=pdf} for more details about double hashing.
		* @param n - The indice of the hash function we want to produce
		* @param hashA - The result of the first hash function applied to a value.
		* @param hashB - The result of the second hash function applied to a value.
		* @param size - The size of the datastructures associated to the hash context (ex: the size of a Bloom Filter)
		* @return The result of hash_n applied to a value.
		* @memberof Utils
		* @author Thomas Minier
		*/
		export function doubleHashing(
		n: number,
		hashA: number,
		hashB: number,
		size: number
		): number {
		// Cubic term avoids increased-probability-of-collision issue, see
		// http://peterd.org/pcd-diss.pdf s.6.5.4
		return Math.abs((hashA + nhashB + Math.floor((n*3 - n)/6)) % size);
		}

		/**
		* Generate a set of distinct indexes on interval [0, size) using the double hashing technique
		@@ -103,5 +85,7 @@ * @param element - The element to hash
		* @param number - The number of indexes desired
		* @param seed - The seed used
		* @param maxIterations - throw if we exceed this without finding a result of the
		* requested size; avoids a hard busy loop in the event of an algorithm failure. Defaults
		* to size*100
		* @return A array of indexes
		* @author Arnaud Grall
		* @author Arnaud Grall, Simon Woolf
		*/
		@@ -112,15 +96,34 @@ export function getDistinctIndices(
		number: number,
		seed?: number
		maxIterations: number = size * 100
		): Array<number> {
		const {first, second} = hashTwice(element);
		const indices: Array<number> = [];
		let n = 0;
		while(indices.length < number) {
		const index = doubleHashing(n, first, second, size);
		if(!indices.includes(index)) {
		indices.push(index);
		}
		n++;
		}
		return indices;
		let {first, second} = hashTwice(element);
		let index, i = 1;
		const indices: Array<number> = [];
		// Implements enhanced double hashing algorithm from
		// http://peterd.org/pcd-diss.pdf s.6.5.4
		while(indices.length < number) {
		index = first % size;
		if(!indices.includes(index)) {
		indices.push(index);
		}
		first = (first + second) % size;
		second = (second + i) % size;
		i++;

		if(i > size) {
		// Enhanced double hashing stops cycles of length less than `size` in the case where
		// size is coprime with the second hash. But you still get cycles of length `size`.
		// So if we reach there and haven't finished, append a prime to the input and
		// rehash.
		element = Buffer.isBuffer(element)
		? Buffer.concat([element, PRIME_BUF])
		: element + PRIME_STR;
		({first, second} = hashTwice(element));
		}

		if(maxIterations && i > maxIterations) {
		throw new Error('max iterations exceeded');
		}
		}
		return indices;
		}
		@@ -127,0 +130,0 @@

test/bloom-filter-test.js

		@@ -32,7 +32,6 @@ /* file : bloom-filter-test.js
		const targetRate = 0.1;
		const seed = Math.random();

		describe('construction', () => {
		it('should add element to the filter with #add', () => {
		const filter = BloomFilter.create(15, targetRate, seed);
		const filter = BloomFilter.create(15, targetRate);
		filter.add('alice');
		@@ -100,6 +99,6 @@ filter.add('bob');
		describe('#export', () => {
		const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate, seed);
		const filter = BloomFilter.from(['alice', 'bob', 'carl'], targetRate);
		it('should export a bloom filter to a Uint8Array', () => {
		const exported = filter.export();
		exported.length.should.equal(filter._filter.length + 4 * 8);
		exported.length.should.equal(filter._filter.length + 3 * 8);
		});
		@@ -110,3 +109,2 @@
		const newFilter = BloomFilter.import(exported);
		newFilter._seed.should.equal(filter._seed);
		newFilter.size.should.equal(filter.size);
		@@ -113,0 +111,0 @@ newFilter.length.should.equal(filter.length);

@ably/bloomit - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics