Comparing version 0.38.2 to 0.38.3
# Changelog | ||
## 0.38.3 | ||
* Refactoring `VPTree` memory layout. | ||
* Fixing `VPTree.nearestNeighbors` edge case. | ||
* Various `VPTree` optimizations. | ||
## 0.38.2 | ||
@@ -4,0 +10,0 @@ |
{ | ||
"name": "mnemonist", | ||
"version": "0.38.2", | ||
"version": "0.38.3", | ||
"description": "Curated collection of data structures for the JavaScript language.", | ||
@@ -81,3 +81,3 @@ "scripts": { | ||
"damerau-levenshtein": "^1.0.6", | ||
"eslint": "^7.20.0", | ||
"eslint": "^7.21.0", | ||
"leven": "^3.1.0", | ||
@@ -84,0 +84,0 @@ "lodash": "^4.17.21", |
@@ -135,2 +135,30 @@ /** | ||
/** | ||
* Same as above, but can work on sorted indices. | ||
* | ||
* @param {array} array - Haystack. | ||
* @param {array} array - Indices. | ||
* @param {any} value - Needle. | ||
* @return {number} | ||
*/ | ||
exports.lowerBoundIndices = function(array, indices, value, lo, hi) { | ||
var mid = 0; | ||
lo = typeof lo !== 'undefined' ? lo : 0; | ||
hi = typeof hi !== 'undefined' ? hi : array.length; | ||
while (lo < hi) { | ||
mid = (lo + hi) >>> 1; | ||
if (value <= array[indices[mid]]) { | ||
hi = mid; | ||
} | ||
else { | ||
lo = -~mid; | ||
} | ||
} | ||
return lo; | ||
}; | ||
/** | ||
* Function returning the upper bound of the given value in the array. | ||
@@ -137,0 +165,0 @@ * |
@@ -9,3 +9,3 @@ /** | ||
var isTypedArray = require('./typed-arrays.js').isTypedArray; | ||
var typed = require('./typed-arrays.js'); | ||
@@ -20,3 +20,3 @@ /** | ||
function isArrayLike(target) { | ||
return Array.isArray(target) || isTypedArray(target); | ||
return Array.isArray(target) || typed.isTypedArray(target); | ||
} | ||
@@ -54,2 +54,3 @@ | ||
// TODO: we could optimize when given target is array like | ||
forEach(target, function(value) { | ||
@@ -63,2 +64,29 @@ array[i++] = value; | ||
/** | ||
* Same as above but returns a supplementary indices array. | ||
* | ||
* @param {any} target - Iteration target. | ||
* @return {array} | ||
*/ | ||
function toArrayWithIndices(target) { | ||
var l = guessLength(target); | ||
var IndexArray = typeof l === 'number' ? | ||
typed.getPointerArray(l) : | ||
Array; | ||
var array = typeof l === 'number' ? new Array(l) : []; | ||
var indices = typeof l === 'number' ? new IndexArray(l) : []; | ||
var i = 0; | ||
// TODO: we could optimize when given target is array like | ||
forEach(target, function(value) { | ||
array[i] = value; | ||
indices[i] = i++; | ||
}); | ||
return [array, indices]; | ||
} | ||
/** | ||
* Exporting. | ||
@@ -69,1 +97,2 @@ */ | ||
exports.toArray = toArray; | ||
exports.toArrayWithIndices = toArrayWithIndices; |
@@ -13,2 +13,3 @@ /** | ||
size: number; | ||
D: number; | ||
@@ -15,0 +16,0 @@ // Constructor |
210
vp-tree.js
@@ -15,10 +15,12 @@ /** | ||
*/ | ||
var forEach = require('obliterator/foreach'), | ||
var iterables = require('./utils/iterables.js'), | ||
typed = require('./utils/typed-arrays.js'), | ||
inplaceQuickSortIndices = require('./sort/quick.js').inplaceQuickSortIndices, | ||
lowerBoundIndices = require('./utils/binary-search.js').lowerBoundIndices, | ||
Heap = require('./heap.js'); | ||
// TODO: implement better selection technique for the vantage point | ||
// The one minimizing spread of sample using stdev is usually the accepted one | ||
// TODO: rationalize registers. use getArrayPointers to optimize memory | ||
var getPointerArray = typed.getPointerArray; | ||
// TODO: if sorting to get median, can split | ||
// TODO: implement vantage point selection techniques (by swapping with last) | ||
// TODO: is this required to implement early termination for k <= size? | ||
@@ -31,4 +33,6 @@ /** | ||
return 1; | ||
if (a.distance > b.distance) | ||
return -1; | ||
return 0; | ||
@@ -42,49 +46,57 @@ } | ||
* @param {array} items - Items to index (will be mutated). | ||
* @param {array} indexes - Indexes of the items. | ||
* @param {array} indices - Indexes of the items. | ||
* @return {Float64Array} - The flat binary tree. | ||
*/ | ||
function createBinaryTree(distance, items, indexes) { | ||
var N = indexes.length, | ||
C = 0, | ||
data = new Float64Array(N * 4), | ||
stack = [0, indexes], | ||
distances = [], | ||
sortedDistances = [], | ||
function createBinaryTree(distance, items, indices) { | ||
var N = indices.length; | ||
var PointerArray = getPointerArray(N); | ||
var C = 0, | ||
nodes = new PointerArray(N), | ||
lefts = new PointerArray(N), | ||
rights = new PointerArray(N), | ||
mus = new Float64Array(N), | ||
stack = [0, 0, N], | ||
distances = new Float64Array(N), | ||
nodeIndex, | ||
currentIndexes, | ||
vantagePoint, | ||
medianIndex, | ||
lo, | ||
hi, | ||
mid, | ||
mu, | ||
left, | ||
right, | ||
i, | ||
l, | ||
h; | ||
l; | ||
while (stack.length) { | ||
currentIndexes = stack.pop(); | ||
hi = stack.pop(); | ||
lo = stack.pop(); | ||
nodeIndex = stack.pop(); | ||
// Getting our vantage point | ||
vantagePoint = currentIndexes.pop(); | ||
l = currentIndexes.length; | ||
vantagePoint = indices[hi - 1]; | ||
hi--; | ||
l = hi - lo; | ||
// Storing vantage point | ||
data[nodeIndex] = vantagePoint; | ||
nodes[nodeIndex] = vantagePoint; | ||
// If we only have few items left | ||
if (!l) | ||
// We are in a leaf | ||
if (l === 0) | ||
continue; | ||
// We only have two elements, the second one has to go right | ||
if (l === 1) { | ||
// We put remaining item to the right | ||
mu = distance(items[vantagePoint], items[currentIndexes[0]]); | ||
mu = distance(items[vantagePoint], items[indices[lo]]); | ||
data[nodeIndex + 1] = mu; | ||
mus[nodeIndex] = mu; | ||
// Right | ||
C += 4; | ||
data[nodeIndex + 3] = C; | ||
data[C] = currentIndexes[0]; | ||
C++; | ||
rights[nodeIndex] = C; | ||
nodes[C] = indices[lo]; | ||
@@ -95,48 +107,65 @@ continue; | ||
// Computing distance from vantage point to other points | ||
distances.length = l; | ||
sortedDistances.length = l; | ||
for (i = lo; i < hi; i++) | ||
distances[indices[i]] = distance(items[vantagePoint], items[indices[i]]); | ||
for (i = 0; i < l; i++) | ||
distances[i] = distance(items[vantagePoint], items[currentIndexes[i]]); | ||
inplaceQuickSortIndices(distances, indices, lo, hi); | ||
// Finding median of distances | ||
h = l / 2; | ||
sortedDistances = distances.slice().sort(); | ||
medianIndex = h - 1; | ||
mu = (medianIndex === (medianIndex | 0)) ? | ||
(sortedDistances[medianIndex] + sortedDistances[medianIndex + 1]) / 2 : | ||
sortedDistances[Math.ceil(medianIndex)]; | ||
medianIndex = lo + (l / 2) - 1; | ||
// Need to interpolate? | ||
if (medianIndex === (medianIndex | 0)) { | ||
mu = ( | ||
distances[indices[medianIndex]] + | ||
distances[indices[medianIndex + 1]] | ||
) / 2; | ||
} | ||
else { | ||
mu = distances[indices[Math.ceil(medianIndex)]]; | ||
} | ||
// Storing mu | ||
data[nodeIndex + 1] = mu; | ||
mus[nodeIndex] = mu; | ||
// Dispatching the indexes left & right | ||
left = []; | ||
right = []; | ||
mid = lowerBoundIndices(distances, indices, mu, lo, hi); | ||
for (i = 0; i < l; i++) { | ||
if (distances[i] >= mu) | ||
right.push(currentIndexes[i]); | ||
else | ||
left.push(currentIndexes[i]); | ||
} | ||
// console.log('Vantage point', items[vantagePoint], vantagePoint); | ||
// console.log('mu =', mu); | ||
// console.log('lo =', lo); | ||
// console.log('hi =', hi); | ||
// console.log('mid =', mid); | ||
// console.log('need to split', Array.from(indices).slice(lo, hi).map(i => { | ||
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]]; | ||
// })); | ||
// Right | ||
if (right.length) { | ||
C += 4; | ||
data[nodeIndex + 3] = C; | ||
stack.push(C); | ||
stack.push(right); | ||
if (hi - mid > 0) { | ||
C++; | ||
rights[nodeIndex] = C; | ||
stack.push(C, mid, hi); | ||
// console.log('Went right with ', Array.from(indices).slice(mid, hi).map(i => { | ||
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]]; | ||
// })); | ||
} | ||
// Left | ||
if (left.length) { | ||
C += 4; | ||
data[nodeIndex + 2] = C; | ||
stack.push(C); | ||
stack.push(left); | ||
if (mid - lo > 0) { | ||
C++; | ||
lefts[nodeIndex] = C; | ||
stack.push(C, lo, mid); | ||
// console.log('Went left with', Array.from(indices).slice(lo, mid).map(i => { | ||
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]]; | ||
// })); | ||
} | ||
// console.log(); | ||
} | ||
return data; | ||
return { | ||
nodes: nodes, | ||
lefts: lefts, | ||
rights: rights, | ||
mus: mus | ||
}; | ||
} | ||
@@ -160,16 +189,18 @@ | ||
this.distance = distance; | ||
this.items = []; | ||
this.heap = new Heap(comparator); | ||
this.D = 0; | ||
var indexes = [], | ||
self = this, | ||
i = 0; | ||
var arrays = iterables.toArrayWithIndices(items); | ||
this.items = arrays[0]; | ||
var indices = arrays[1]; | ||
forEach(items, function(value) { | ||
self.items.push(value); | ||
indexes.push(i++); | ||
}); | ||
// Creating the binary tree | ||
this.size = indices.length; | ||
// Creating the binary tree | ||
this.size = indexes.length; | ||
this.data = createBinaryTree(distance, this.items, indexes); | ||
var result = createBinaryTree(distance, this.items, indices); | ||
this.nodes = result.nodes; | ||
this.lefts = result.lefts; | ||
this.rights = result.rights; | ||
this.mus = result.mus; | ||
} | ||
@@ -185,3 +216,3 @@ | ||
VPTree.prototype.nearestNeighbors = function(k, query) { | ||
var neighbors = new Heap(comparator), | ||
var neighbors = this.heap, | ||
stack = [0], | ||
@@ -197,5 +228,7 @@ tau = Infinity, | ||
this.D = 0; | ||
while (stack.length) { | ||
nodeIndex = stack.pop(); | ||
itemIndex = this.data[nodeIndex]; | ||
itemIndex = this.nodes[nodeIndex]; | ||
vantagePoint = this.items[itemIndex]; | ||
@@ -205,2 +238,3 @@ | ||
d = this.distance(vantagePoint, query); | ||
this.D++; | ||
@@ -214,8 +248,9 @@ if (d < tau) { | ||
// Adjusting tau | ||
tau = neighbors.peek().distance; | ||
// Adjusting tau (only if we already have k items, else it stays Infinity) | ||
if (neighbors.size >= k) | ||
tau = neighbors.peek().distance; | ||
} | ||
leftIndex = this.data[nodeIndex + 2]; | ||
rightIndex = this.data[nodeIndex + 3]; | ||
leftIndex = this.lefts[nodeIndex]; | ||
rightIndex = this.rights[nodeIndex]; | ||
@@ -226,3 +261,3 @@ // We are a leaf | ||
mu = this.data[nodeIndex + 1]; | ||
mu = this.mus[nodeIndex]; | ||
@@ -232,3 +267,3 @@ if (d < mu) { | ||
stack.push(leftIndex); | ||
if (rightIndex && d >= mu - tau) // ALT | ||
if (rightIndex && d >= mu - tau) // Might not be necessary to test d | ||
stack.push(rightIndex); | ||
@@ -239,3 +274,3 @@ } | ||
stack.push(rightIndex); | ||
if (leftIndex && d < mu + tau) // ALT | ||
if (leftIndex && d < mu + tau) // Might not be necessary to test d | ||
stack.push(leftIndex); | ||
@@ -271,5 +306,7 @@ } | ||
this.D = 0; | ||
while (stack.length) { | ||
nodeIndex = stack.pop(); | ||
itemIndex = this.data[nodeIndex]; | ||
itemIndex = this.nodes[nodeIndex]; | ||
vantagePoint = this.items[itemIndex]; | ||
@@ -279,2 +316,3 @@ | ||
d = this.distance(vantagePoint, query); | ||
this.D++; | ||
@@ -284,4 +322,4 @@ if (d <= radius) | ||
leftIndex = this.data[nodeIndex + 2]; | ||
rightIndex = this.data[nodeIndex + 3]; | ||
leftIndex = this.lefts[nodeIndex]; | ||
rightIndex = this.rights[nodeIndex]; | ||
@@ -292,3 +330,3 @@ // We are a leaf | ||
mu = this.data[nodeIndex + 1]; | ||
mu = this.mus[nodeIndex]; | ||
@@ -298,3 +336,3 @@ if (d < mu) { | ||
stack.push(leftIndex); | ||
if (rightIndex && d >= mu - radius) // Might not be necessary to test | ||
if (rightIndex && d >= mu - radius) // Might not be necessary to test d | ||
stack.push(rightIndex); | ||
@@ -305,3 +343,3 @@ } | ||
stack.push(rightIndex); | ||
if (leftIndex && d < mu + radius) // Might not be necessary to test | ||
if (leftIndex && d < mu + radius) // Might not be necessary to test d | ||
stack.push(leftIndex); | ||
@@ -308,0 +346,0 @@ } |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
367862
13182