bloom-filters
Advanced tools
Comparing version 1.3.0 to 1.3.1
@@ -27,22 +27,22 @@ /* file : api.ts | ||
var bloom_filter_1 = require("./bloom/bloom-filter"); | ||
exports.BloomFilter = bloom_filter_1.default; | ||
Object.defineProperty(exports, "BloomFilter", { enumerable: true, get: function () { return bloom_filter_1.default; } }); | ||
var counting_bloom_filter_1 = require("./bloom/counting-bloom-filter"); | ||
exports.CountingBloomFilter = counting_bloom_filter_1.default; | ||
Object.defineProperty(exports, "CountingBloomFilter", { enumerable: true, get: function () { return counting_bloom_filter_1.default; } }); | ||
var partitioned_bloom_filter_1 = require("./bloom/partitioned-bloom-filter"); | ||
exports.PartitionedBloomFilter = partitioned_bloom_filter_1.default; | ||
Object.defineProperty(exports, "PartitionedBloomFilter", { enumerable: true, get: function () { return partitioned_bloom_filter_1.default; } }); | ||
var count_min_sketch_1 = require("./sketch/count-min-sketch"); | ||
exports.CountMinSketch = count_min_sketch_1.default; | ||
Object.defineProperty(exports, "CountMinSketch", { enumerable: true, get: function () { return count_min_sketch_1.default; } }); | ||
var hyperloglog_1 = require("./sketch/hyperloglog"); | ||
exports.HyperLogLog = hyperloglog_1.default; | ||
Object.defineProperty(exports, "HyperLogLog", { enumerable: true, get: function () { return hyperloglog_1.default; } }); | ||
var topk_1 = require("./sketch/topk"); | ||
exports.TopK = topk_1.default; | ||
Object.defineProperty(exports, "TopK", { enumerable: true, get: function () { return topk_1.default; } }); | ||
var min_hash_1 = require("./sketch/min-hash"); | ||
exports.MinHash = min_hash_1.MinHash; | ||
Object.defineProperty(exports, "MinHash", { enumerable: true, get: function () { return min_hash_1.MinHash; } }); | ||
var min_hash_factory_1 = require("./sketch/min-hash-factory"); | ||
exports.MinHashFactory = min_hash_factory_1.default; | ||
Object.defineProperty(exports, "MinHashFactory", { enumerable: true, get: function () { return min_hash_factory_1.default; } }); | ||
var cuckoo_filter_1 = require("./cuckoo/cuckoo-filter"); | ||
exports.CuckooFilter = cuckoo_filter_1.default; | ||
Object.defineProperty(exports, "CuckooFilter", { enumerable: true, get: function () { return cuckoo_filter_1.default; } }); | ||
var invertible_bloom_lookup_tables_1 = require("./iblt/invertible-bloom-lookup-tables"); | ||
exports.InvertibleBloomFilter = invertible_bloom_lookup_tables_1.default; | ||
Object.defineProperty(exports, "InvertibleBloomFilter", { enumerable: true, get: function () { return invertible_bloom_lookup_tables_1.default; } }); | ||
var cell_1 = require("./iblt/cell"); | ||
exports.Cell = cell_1.default; | ||
Object.defineProperty(exports, "Cell", { enumerable: true, get: function () { return cell_1.default; } }); |
@@ -25,7 +25,19 @@ /* file : base-filter.ts | ||
'use strict'; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
}) : (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
o[k2] = m[k]; | ||
})); | ||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { | ||
Object.defineProperty(o, "default", { enumerable: true, value: v }); | ||
}) : function(o, v) { | ||
o["default"] = v; | ||
}); | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; | ||
result["default"] = mod; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); | ||
__setModuleDefault(result, mod); | ||
return result; | ||
@@ -64,3 +76,3 @@ }; | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -76,3 +88,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -79,0 +91,0 @@ }); |
@@ -120,3 +120,3 @@ /* file : bloom-filter.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -132,3 +132,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -135,0 +135,0 @@ }); |
@@ -117,3 +117,3 @@ /* file : counting-bloom-filter.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -128,3 +128,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -131,0 +131,0 @@ }); |
@@ -160,3 +160,3 @@ /* file : partitioned-bloom-filter.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -171,3 +171,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -182,3 +182,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -193,3 +193,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -196,0 +196,0 @@ }); |
@@ -25,2 +25,14 @@ /* file : bucket.ts | ||
'use strict'; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
}) : (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
o[k2] = m[k]; | ||
})); | ||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { | ||
Object.defineProperty(o, "default", { enumerable: true, value: v }); | ||
}) : function(o, v) { | ||
o["default"] = v; | ||
}); | ||
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { | ||
@@ -32,2 +44,9 @@ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; | ||
}; | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); | ||
__setModuleDefault(result, mod); | ||
return result; | ||
}; | ||
var __metadata = (this && this.__metadata) || function (k, v) { | ||
@@ -39,9 +58,2 @@ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); | ||
}; | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; | ||
result["default"] = mod; | ||
return result; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
@@ -77,3 +89,3 @@ var lodash_eq_1 = __importDefault(require("lodash.eq")); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -88,3 +100,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -91,0 +103,0 @@ }); |
@@ -136,3 +136,3 @@ /* file : cuckoo-filter.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -147,3 +147,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -158,3 +158,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -169,3 +169,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -180,3 +180,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -191,3 +191,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -194,0 +194,0 @@ }); |
@@ -46,2 +46,3 @@ /* file : exportable.ts | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.AutoExportable = exports.Parameter = exports.Field = exports.Exportable = exports.cloneObject = exports.cloneField = void 0; | ||
require("reflect-metadata"); | ||
@@ -48,0 +49,0 @@ /** |
@@ -26,2 +26,3 @@ /* file : formulas.ts | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.optimalHashes = exports.optimalFilterSize = void 0; | ||
/** | ||
@@ -28,0 +29,0 @@ * Various formulas used with Bloom Filters |
@@ -98,3 +98,3 @@ /* file: cell.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -109,3 +109,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -120,3 +120,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -123,0 +123,0 @@ }); |
@@ -148,3 +148,3 @@ /* file : invertible-bloom-lookup-tables.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -159,3 +159,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -171,3 +171,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -182,3 +182,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -185,0 +185,0 @@ }); |
@@ -139,3 +139,3 @@ /* file : count-min-sketch.ts | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -150,3 +150,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -161,3 +161,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -164,0 +164,0 @@ }); |
@@ -104,3 +104,3 @@ "use strict"; | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -107,0 +107,0 @@ }); |
@@ -74,2 +74,3 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.MinHash = void 0; | ||
var base_filter_1 = __importDefault(require("../base-filter")); | ||
@@ -129,3 +130,3 @@ var exportable_1 = require("../exportable"); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -132,0 +133,0 @@ }); |
@@ -11,5 +11,15 @@ import BaseFilter from '../base-filter'; | ||
/** | ||
* A TopK uses a Count-Min Sketch to calculate the top-K frequent elements in a stream. | ||
* An element in a TopK | ||
* @author Thomas Minier | ||
*/ | ||
interface TopkElement extends HeapElement { | ||
rank: number; | ||
} | ||
/** | ||
* A TopK computes the ranking of elements in a multiset (by an arbitrary score) and returns the `k` results with the highest scores. | ||
* This implementation of the TopK problem sorts items based on their estimated cardinality in the multiset. | ||
* It is based on a Count Min Sketch, for estimating the cardinality of items, and a MinHeap, for implementing a sliding window over the `k` results with the highest scores. | ||
* @author Thomas Minier | ||
* @author Arnaud Grall | ||
*/ | ||
export default class TopK extends BaseFilter { | ||
@@ -38,14 +48,14 @@ private _k; | ||
/** | ||
* Get the top-k values as an array of objects {value: string, frequency: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number} | ||
* Get the top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
*/ | ||
values(): HeapElement[]; | ||
values(): TopkElement[]; | ||
/** | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number}. | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number, rank: number}. | ||
* WARNING: With this method, values are produced on-the-fly, hence you should not modify the TopK | ||
* while the iteration is not done, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number} | ||
* while the iteration is not completed, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number, rank: number} | ||
*/ | ||
iterator(): Iterator<HeapElement>; | ||
iterator(): Iterator<TopkElement>; | ||
} | ||
export {}; |
@@ -86,3 +86,3 @@ "use strict"; | ||
/** | ||
* A Minheap stores items sorted by ascending frequency | ||
* A MinHeap stores items sorted by ascending frequency | ||
* @author Thomas Minier | ||
@@ -101,3 +101,3 @@ */ | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -112,3 +112,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -170,4 +170,7 @@ }); | ||
/** | ||
* A TopK uses a Count-Min Sketch to calculate the top-K frequent elements in a stream. | ||
* A TopK computes the ranking of elements in a multiset (by an arbitrary score) and returns the `k` results with the highest scores. | ||
* This implementation of the TopK problem sorts items based on their estimated cardinality in the multiset. | ||
* It is based on a Count Min Sketch, for estimating the cardinality of items, and a MinHeap, for implementing a sliding window over the `k` results with the highest scores. | ||
* @author Thomas Minier | ||
* @author Arnaud Grall | ||
*/ | ||
@@ -223,4 +226,4 @@ var TopK = /** @class */ (function (_super) { | ||
/** | ||
* Get the top-k values as an array of objects {value: string, frequency: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number} | ||
* Get the top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
*/ | ||
@@ -230,3 +233,8 @@ TopK.prototype.values = function () { | ||
for (var i = this._heap.length - 1; i > 0; i--) { | ||
res.push(this._heap.get(i)); | ||
var elt = this._heap.get(i); | ||
res.push({ | ||
value: elt.value, | ||
frequency: elt.frequency, | ||
rank: this._heap.length - i | ||
}); | ||
} | ||
@@ -236,6 +244,6 @@ return res; | ||
/** | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number}. | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number, rank: number}. | ||
* WARNING: With this method, values are produced on-the-fly, hence you should not modify the TopK | ||
* while the iteration is not done, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number} | ||
* while the iteration is not completed, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number, rank: number} | ||
*/ | ||
@@ -245,3 +253,3 @@ TopK.prototype.iterator = function () { | ||
return function () { | ||
var i; | ||
var i, elt; | ||
return __generator(this, function (_a) { | ||
@@ -254,3 +262,8 @@ switch (_a.label) { | ||
if (!(i > 0)) return [3 /*break*/, 4]; | ||
return [4 /*yield*/, heap.get(i)]; | ||
elt = heap.get(i); | ||
return [4 /*yield*/, { | ||
value: elt.value, | ||
frequency: elt.frequency, | ||
rank: heap.length - i | ||
}]; | ||
case 2: | ||
@@ -257,0 +270,0 @@ _a.sent(); |
@@ -40,2 +40,3 @@ /* file : utils.ts | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.hex2bin = exports.power2 = exports.getDefaultSeed = exports.hashIntAndString = exports.hashAsString = exports.hashAsInt = exports.isEmptyBuffer = exports.xorBuffer = exports.randomInt = exports.getIndices = exports.getDistinctIndices = exports.doubleHashing = exports.allInOneHashTwice = exports.hashTwiceAsString = exports.hashTwice = exports.allocateArray = void 0; | ||
var xxhashjs_1 = __importDefault(require("xxhashjs")); | ||
@@ -42,0 +43,0 @@ /** |
{ | ||
"name": "bloom-filters", | ||
"version": "1.3.0", | ||
"version": "1.3.1", | ||
"description": "JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash", | ||
@@ -5,0 +5,0 @@ "main": "dist/api.js", |
# Bloom-Filters | ||
[![Build Status](https://travis-ci.com/Callidon/bloom-filters.svg?branch=master)](https://travis-ci.com/Callidon/bloom-filters) | ||
**Keywords:** bloom, filter, bloom filter, probabilistic, datastructure | ||
JavaScript/TypeScript implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash. | ||
**This package rely on [non-cryptographic hash functions](https://cyan4973.github.io/xxHash/)**. | ||
JS implementation of probabilistic data structures: Bloom Filter (and its derived), HyperLogLog, Count-Min Sketch, Top-K and MinHash | ||
📕[Online documentation](https://callidon.github.io/bloom-filters/) | ||
**Use non-cryptographic hash internally since (v0.7.0)** [XXHASH](https://cyan4973.github.io/xxHash/) | ||
**Keywords:** *bloom filter, cuckoo filter, KyperLogLog, MinHash, Top-K, probabilistic data-structures.* | ||
**Breaking API changes from the 0.7.1 to the 0.8.0 version.** | ||
[Online documentation](https://callidon.github.io/bloom-filters/) | ||
# Table of contents | ||
@@ -40,2 +37,8 @@ | ||
**Supported platforms** | ||
* [Node.js](https://nodejs.org): *v4.0.0* or higher | ||
* [Google Chrome](https://www.google.com/intl/en/chrome/): *v41* or higher | ||
* [Mozilla Firefox](https://www.mozilla.org/en-US/firefox/new/): *v34* or higher | ||
* [Microsoft Edge](https://www.microsoft.com/en-US/edge): *v12* or higher | ||
## Data structures | ||
@@ -51,6 +54,6 @@ | ||
**Methods**: | ||
#### Methods | ||
* `add(element: string) -> void`: add an element into the filter. | ||
* `has(element: string) -> boolean'`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `equals(other: BloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -97,6 +100,6 @@ * `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
**Methods**: | ||
#### Methods | ||
* `add(element: string) -> void`: add an element into the filter. | ||
* `has(element: string) -> boolean'`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `equals(other: PartitionedBloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -138,7 +141,7 @@ * `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
**Methods**: | ||
#### Methods | ||
* `add(element: string) -> void`: add an element into the filter. | ||
* `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
* `has(element: string) -> boolean'`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `equals(other: CuckooFilter) -> boolean`: Test if two filters are equals. | ||
@@ -172,3 +175,3 @@ * `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
**WARNING**: The error rate cannot be higher than `1 * 10^-18`. After this, You will get an error saying that the fingerprint length is higher than the hash length. | ||
**WARNING**: The error rate cannot be higher than `1 * 10^-18`. Above this value, you will get an exception stating that the fingerprint length is higher than the hash length. | ||
@@ -181,7 +184,7 @@ ### Counting Bloom Filter | ||
**Methods**: | ||
#### Methods | ||
* `add(element: string) -> void`: add an element into the filter. | ||
* `remove(element: string) -> boolean`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
* `has(element: string) -> boolean'`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `has(element: string) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `equals(other: CountingBloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -229,3 +232,3 @@ * `rate() -> number`: compute the filter's false positive rate (or error rate). | ||
**Methods**: | ||
#### Methods | ||
@@ -273,3 +276,3 @@ * `update(element: string, count = 1) -> void`: add `count` occurences of an element into the sketch. | ||
**Methods**: | ||
#### Methods | ||
@@ -312,6 +315,9 @@ * `update(element: string) -> void`: add a new occurence of an element to the sketch. | ||
**Methods**: | ||
#### `MinHashFactory` methods | ||
* `create() -> MinHash`: create a new empty MinHash structure, using the parameters of the factory. | ||
#### `MinHash` methods | ||
* `add(element: number) -> void`: add a new element to the set. | ||
* `bulkLoad(elements: number[]) -> void`: add several new elements to the set in an efficient manner. | ||
* `bulkLoad(elements: number[]) -> void`: efficently add several new elements to the set. | ||
* `isEmpty() -> boolean`: test if the signature of the MinHash is empty. | ||
@@ -325,3 +331,3 @@ * `compareWith(other: MinHash) -> number`: estimate the Jaccard similarity coefficient with another MinHash set. | ||
// it uses 10 random hash functions and expect to see a maximum value of 999 | ||
const sketch = new MinHasFactory(10, 999) | ||
const factory = new MinHashFactory(10, 999) | ||
@@ -336,3 +342,3 @@ // create two empty MinHash | ||
// the MinHash also supports bulk loading | ||
// the MinHash class also supports bulk loading | ||
secondSet.bulkLoad([1, 3, 4]) | ||
@@ -342,3 +348,3 @@ | ||
const jaccardSim = fistSet.compareWith(secondSet) | ||
console.Log(`The estimated Jaccard similarity is ${jaccardSim}`) | ||
console.log(`The estimated Jaccard similarity is ${jaccardSim}`) | ||
``` | ||
@@ -348,10 +354,22 @@ | ||
Given a multiset of elements, the Top-K problem is to compute the ranking of these elements (by an arbitrary score) and returns the `k` results with the highest scores. | ||
This package provides an implementation of the TopK problem that sort items based on their estimated cardinality in the multiset. It is based on a Count Min Sketch, for estimating the cardinality of items, and a MinHeap, for implementing a sliding window over the `k` results with the highest scores. | ||
Given a multiset of elements, the **Top-K problem** is to compute the ranking of these elements (by an arbitrary score) and returns the `k` results with the highest scores. | ||
This package provides an implementation of the Top-K problem that sort items based on their estimated cardinality in the multiset. It is based on a Count Min Sketch, for estimating the cardinality of items, and a MinHeap, for implementing a sliding window over the `k` results with the highest scores. | ||
**Methods**: | ||
Items produced by the `TopK` class are JavaScript objects with the following content (shown in Typescript notation). | ||
```typescript | ||
interface TopkElement { | ||
// The element's value | ||
value: string, | ||
// The element's frequency | ||
frequency: number, | ||
// The element's rank in the TopK, ranging from 1 to k | ||
rank: number | ||
} | ||
``` | ||
#### Methods | ||
* `add(element: string) -> void`: add a new occurence of an element to the sketch. | ||
* `values() -> Array<{value: string, frequency: number}>`: get the top-k values as an array of objects `{value: string, frequency: number}`. | ||
* `iterator() -> Iterator<{value: string, frequency: number}>`: get the top-k values as an iterator that yields objects `{value: string, frequency: number}`. | ||
* `values() -> Array<TopkElement>`: get the top-k values as an array of objects. | ||
* `iterator() -> Iterator<TopkElement>`: get the top-k values as an iterator that yields objects. | ||
@@ -369,7 +387,5 @@ ```javascript | ||
// print the topk | ||
let pos = 1 | ||
// print the top k values | ||
for(let item of topk.values()) { | ||
console.log(`Item "${item.value}" is in position ${pos} with an estimated frequency of ${item.frequency}`) | ||
pos++ | ||
console.log(`Item "${item.value}" is in position ${item.rank} with an estimated frequency of ${item.frequency}`) | ||
} | ||
@@ -391,6 +407,7 @@ // Output: | ||
**Methods** | ||
#### Methods | ||
* `add(element: Buffer) -> void`: add an element into the filter. | ||
* `remove(element: Buffer) -> void`: delete an element from the filter, returning True if the deletion was a success and False otherwise. | ||
* `has(element: Buffer) -> boolean'`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `has(element: Buffer) -> boolean`: Test an element for membership, returning False if the element is definitively not in the filter and True is the element might be in the filter. | ||
* `equals(other: InvertibleBloomFilter) -> boolean`: Test if two filters are equals. | ||
@@ -397,0 +414,0 @@ * `substract(remote: InvertibleBloomFilter)`: peform the XOR substraction of two IBLTs. |
@@ -40,5 +40,13 @@ /* file: topk.ts | ||
/** | ||
* A Minheap stores items sorted by ascending frequency | ||
* An element in a TopK | ||
* @author Thomas Minier | ||
*/ | ||
interface TopkElement extends HeapElement { | ||
rank: number | ||
} | ||
/** | ||
* A MinHeap stores items sorted by ascending frequency | ||
* @author Thomas Minier | ||
*/ | ||
class MinHeap { | ||
@@ -125,4 +133,7 @@ private _content: HeapElement[] | ||
/** | ||
* A TopK uses a Count-Min Sketch to calculate the top-K frequent elements in a stream. | ||
* A TopK computes the ranking of elements in a multiset (by an arbitrary score) and returns the `k` results with the highest scores. | ||
* This implementation of the TopK problem sorts items based on their estimated cardinality in the multiset. | ||
* It is based on a Count Min Sketch, for estimating the cardinality of items, and a MinHeap, for implementing a sliding window over the `k` results with the highest scores. | ||
* @author Thomas Minier | ||
* @author Arnaud Grall | ||
*/ | ||
@@ -200,9 +211,14 @@ @AutoExportable('TopK', ['_seed']) | ||
/** | ||
* Get the top-k values as an array of objects {value: string, frequency: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number} | ||
* Get the top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
* @return The top-k values as an array of objects {value: string, frequency: number, rank: number} | ||
*/ | ||
values (): HeapElement[] { | ||
values (): TopkElement[] { | ||
const res = [] | ||
for (let i = this._heap.length - 1; i > 0; i--) { | ||
res.push(this._heap.get(i)!) | ||
const elt = this._heap.get(i)! | ||
res.push({ | ||
value: elt.value, | ||
frequency: elt.frequency, | ||
rank: this._heap.length - i | ||
}) | ||
} | ||
@@ -213,12 +229,17 @@ return res | ||
/** | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number}. | ||
* Get the top-k values as an iterator that yields objects {value: string, frequency: number, rank: number}. | ||
* WARNING: With this method, values are produced on-the-fly, hence you should not modify the TopK | ||
* while the iteration is not done, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number} | ||
* while the iteration is not completed, otherwise the generated values may not respect the TopK properties. | ||
* @return The top-k values as an iterator of object {value: string, frequency: number, rank: number} | ||
*/ | ||
iterator (): Iterator<HeapElement> { | ||
iterator (): Iterator<TopkElement> { | ||
const heap = this._heap | ||
return function *() { | ||
for (let i = heap.length - 1; i > 0; i--) { | ||
yield heap.get(i)! | ||
const elt = heap.get(i)! | ||
yield { | ||
value: elt.value, | ||
frequency: elt.frequency, | ||
rank: heap.length - i | ||
} | ||
} | ||
@@ -225,0 +246,0 @@ }() |
@@ -48,2 +48,3 @@ /* file : topk-test.js | ||
current.frequency.should.be.below(prev.frequency) | ||
current.rank.should.equal(i + 1) | ||
prev = current | ||
@@ -70,4 +71,6 @@ i++ | ||
for (let current of topk.values()) { | ||
current.should.have.all.keys('value', 'rank', 'frequency') | ||
current.value.should.equal(expectedTop[i]) | ||
current.frequency.should.be.below(prev.frequency) | ||
current.rank.should.equal(i + 1) | ||
prev = current | ||
@@ -92,4 +95,6 @@ i++ | ||
for (let current of topk.iterator()) { | ||
current.should.have.all.keys('value', 'rank', 'frequency') | ||
current.value.should.equal(expectedTop[i]) | ||
current.frequency.should.be.below(prev.frequency) | ||
current.rank.should.equal(i + 1) | ||
prev = current | ||
@@ -116,4 +121,6 @@ i++ | ||
for (let current of topk.iterator()) { | ||
current.should.have.all.keys('value', 'rank', 'frequency') | ||
current.value.should.equal(expectedTop[i]) | ||
current.frequency.should.be.below(prev.frequency) | ||
current.rank.should.equal(i + 1) | ||
prev = current | ||
@@ -120,0 +127,0 @@ i++ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
440862
9945
523