fast-fuzzy
Advanced tools
Comparing version 1.6.2 to 1.8.0
375
lib/fuzzy.js
"use strict"; | ||
function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); } | ||
function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); } | ||
function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); } | ||
function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } } | ||
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } | ||
@@ -159,3 +151,14 @@ | ||
function walkBack(rows) { | ||
function walkBack(rows, length) { | ||
// search term was empty string, return perfect score | ||
if (rows.length === 1) { | ||
return { | ||
score: 1, | ||
match: { | ||
index: 0, | ||
length: 0 | ||
} | ||
}; | ||
} | ||
var lastRow = rows[rows.length - 1]; | ||
@@ -165,3 +168,3 @@ var minValue = lastRow[0]; | ||
for (var i = 1; i < lastRow.length; i++) { | ||
for (var i = 1; i < length; i++) { | ||
var val = lastRow[i]; | ||
@@ -188,5 +191,7 @@ | ||
return { | ||
start: start, | ||
end: minIndex, | ||
value: minValue | ||
score: 1 - minValue / (rows.length - 1), | ||
match: { | ||
index: start - 1, | ||
length: minIndex - start + 1 | ||
} | ||
}; | ||
@@ -198,22 +203,12 @@ } // the fuzzy scoring algorithm: a modification of levenshtein proposed by Peter H. Sellers | ||
function levenshteinSellers(term, candidate) { | ||
if (term.length === 0) { | ||
return { | ||
score: 1, | ||
match: { | ||
index: 0, | ||
length: 0 | ||
} | ||
}; | ||
} | ||
function levenshteinSellers(term, candidate, rows) { | ||
var prefixLength = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0; | ||
var rows = new Array(term.length + 1); | ||
rows[0] = new Array(candidate.length + 1).fill(0); | ||
for (var i = 0; i < term.length; i++) { | ||
rows[i + 1] = rows[i + 1] || []; | ||
var rowA = rows[i]; | ||
var rowB = rows[i + 1] = []; | ||
var rowB = rows[i + 1]; | ||
rowB[0] = i + 1; | ||
for (var j = 0; j < candidate.length; j++) { | ||
for (var j = prefixLength; j < candidate.length; j++) { | ||
var cost = term[i] === candidate[j] ? 0 : 1; | ||
@@ -230,11 +225,2 @@ var m = void 0; | ||
} | ||
var results = walkBack(rows); | ||
return { | ||
score: 1 - results.value / term.length, | ||
match: { | ||
index: results.start - 1, | ||
length: results.end - results.start + 1 | ||
} | ||
}; | ||
} // an implementation of the sellers algorithm using damerau-levenshtein as a base | ||
@@ -245,23 +231,13 @@ // has all the runtime characteristics of the above, but punishes transpositions less, | ||
function damerauLevenshteinSellers(term, candidate) { | ||
if (term.length === 0) { | ||
return { | ||
score: 1, | ||
match: { | ||
index: 0, | ||
length: 0 | ||
} | ||
}; | ||
} | ||
function damerauLevenshteinSellers(term, candidate, rows) { | ||
var prefixLength = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : 0; | ||
var rows = new Array(term.length + 1); | ||
rows[0] = new Array(candidate.length + 1).fill(0); | ||
for (var i = 0; i < term.length; i++) { | ||
rows[i + 1] = rows[i + 1] || []; | ||
var rowA = rows[i - 1]; | ||
var rowB = rows[i]; | ||
var rowC = rows[i + 1] = []; | ||
var rowC = rows[i + 1]; | ||
rowC[0] = i + 1; | ||
for (var j = 0; j < candidate.length; j++) { | ||
for (var j = prefixLength; j < candidate.length; j++) { | ||
var cost = term[i] === candidate[j] ? 0 : 1; | ||
@@ -279,66 +255,226 @@ var m = void 0; | ||
} | ||
} // method for creating a trie from search candidates | ||
// using a trie can significantly improve search time | ||
var results = walkBack(rows); | ||
return { | ||
score: 1 - results.value / term.length, | ||
match: { | ||
index: results.start - 1, | ||
length: results.end - results.start + 1 | ||
function trieInsert(trie, string, item) { | ||
var walker = trie; | ||
var _iteratorNormalCompletion = true; | ||
var _didIteratorError = false; | ||
var _iteratorError = undefined; | ||
try { | ||
for (var _iterator = string[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { | ||
var char = _step.value; | ||
// add child node if not already present | ||
if (walker.children[char] == null) { | ||
walker.children[char] = { | ||
children: {}, | ||
candidates: [], | ||
depth: 0 | ||
}; | ||
} // log max depth of this subtree | ||
walker.depth = Math.max(walker.depth, string.length); // step into child node | ||
walker = walker.children[char]; | ||
} // log max depth of this subtree | ||
} catch (err) { | ||
_didIteratorError = true; | ||
_iteratorError = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion && _iterator.return != null) { | ||
_iterator.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError) { | ||
throw _iteratorError; | ||
} | ||
} | ||
}; | ||
} | ||
walker.depth = Math.max(walker.depth, string.length); | ||
walker.candidates.push(item); | ||
} // transforms a list of candidates into objects with normalized search keys, | ||
// and inserts them into a trie | ||
// the keySelector is used to pick strings from an object to search by | ||
function createSearchTrie(trie, index, items, options) { | ||
var _iteratorNormalCompletion2 = true; | ||
var _didIteratorError2 = false; | ||
var _iteratorError2 = undefined; | ||
try { | ||
var _loop = function _loop() { | ||
var item = _step2.value; | ||
var candidates = arrayWrap(options.keySelector(item)).map(function (key) { | ||
return { | ||
index: index, | ||
item: item, | ||
normalized: normalizeWithMap(key, options) | ||
}; | ||
}); | ||
index++; | ||
var _iteratorNormalCompletion3 = true; | ||
var _didIteratorError3 = false; | ||
var _iteratorError3 = undefined; | ||
try { | ||
for (var _iterator3 = candidates[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { | ||
var candidate = _step3.value; | ||
trieInsert(trie, candidate.normalized.normal, candidate); | ||
} | ||
} catch (err) { | ||
_didIteratorError3 = true; | ||
_iteratorError3 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion3 && _iterator3.return != null) { | ||
_iterator3.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError3) { | ||
throw _iteratorError3; | ||
} | ||
} | ||
} | ||
}; | ||
for (var _iterator2 = items[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { | ||
_loop(); | ||
} | ||
} catch (err) { | ||
_didIteratorError2 = true; | ||
_iteratorError2 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion2 && _iterator2.return != null) { | ||
_iterator2.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError2) { | ||
throw _iteratorError2; | ||
} | ||
} | ||
} | ||
} // scored item comparator | ||
function compareItems(a, b) { | ||
var scoreDiff = b.score - a.score; | ||
if (scoreDiff !== 0) { | ||
return scoreDiff; | ||
} | ||
var lengthDiff = a.lengthDiff - b.lengthDiff; | ||
if (lengthDiff !== 0) { | ||
return lengthDiff; | ||
} | ||
return a.index - b.index; | ||
} // recursively walk the trie | ||
function searchRecurse(node, string, term, scoreMethod, rows, results, resultMap, options) { | ||
// build rows | ||
scoreMethod(term, string, rows, string.length - 1); // insert results | ||
if (node.candidates.length > 0) { | ||
var lengthDiff = Math.abs(string.length - term.length); | ||
var match = walkBack(rows, string.length + 1); | ||
if (match.score >= options.threshold) { | ||
var _iteratorNormalCompletion4 = true; | ||
var _didIteratorError4 = false; | ||
var _iteratorError4 = undefined; | ||
try { | ||
for (var _iterator4 = node.candidates[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { | ||
var candidate = _step4.value; | ||
var scoredItem = { | ||
item: candidate.item, | ||
original: candidate.normalized.original, | ||
key: candidate.normalized.normal, | ||
score: match.score, | ||
match: options.returnMatchData && denormalizeMatchPosition(match.match, candidate.normalized.map), | ||
index: candidate.index, | ||
lengthDiff: lengthDiff | ||
}; | ||
if (resultMap[candidate.index] == null) { | ||
resultMap[candidate.index] = results.length; | ||
results.push(scoredItem); | ||
} else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) { | ||
results[candidate.index] = scoredItem; | ||
} | ||
} | ||
} catch (err) { | ||
_didIteratorError4 = true; | ||
_iteratorError4 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion4 && _iterator4.return != null) { | ||
_iterator4.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError4) { | ||
throw _iteratorError4; | ||
} | ||
} | ||
} | ||
} | ||
} // recurse for children | ||
for (var key in node.children) { | ||
// if the search term is sufficiently longer than a candidate, | ||
// it's impossible to score > threshold. | ||
// skip any subtrees for which this is true. | ||
var value = node.children[key]; | ||
if (value.depth / term.length >= options.threshold) { | ||
searchRecurse(value, string + key, term, scoreMethod, rows, results, resultMap, options); | ||
} | ||
} | ||
} // the core match finder: returns a sorted, filtered list of matches | ||
// this does not normalize input, requiring users to normalize themselves | ||
// it also expects candidates in the form {item: any, key: string} | ||
function searchCore(term, candidates, options) { | ||
var scoreMethod = options.useDamerau ? damerauLevenshteinSellers : levenshteinSellers; | ||
var results = candidates.map(function (candidate) { | ||
var matches = candidate.normalized.map(function (item) { | ||
return _objectSpread({}, item, item.normal.length / term.length < options.threshold ? { | ||
score: 0, | ||
match: {} | ||
} : scoreMethod(term, item.normal)); | ||
}); | ||
var bestMatch = matches.reduce(function (best, cur) { | ||
if (best.score === cur.score) { | ||
var curDiff = Math.abs(cur.normal.length - term.length); | ||
var bestDiff = Math.abs(best.normal.length - term.length); | ||
return curDiff < bestDiff ? cur : best; | ||
} | ||
function searchCore(term, trie, options) { | ||
var scoreMethod = options.useDamerau ? damerauLevenshteinSellers : levenshteinSellers; // walk the trie, scoring and storing the candidates | ||
return cur.score > best.score ? cur : best; | ||
var resultMap = {}; | ||
var results = []; | ||
var rows = new Array(term.length + 1); | ||
rows[0] = new Array(trie.depth + 1).fill(0); | ||
for (var key in trie.children) { | ||
var value = trie.children[key]; | ||
searchRecurse(value, key, term, scoreMethod, rows, results, resultMap, options); | ||
} | ||
var sorted = results.sort(compareItems); | ||
if (options.returnMatchData || options.returnScores) { | ||
return sorted.map(function (candidate) { | ||
return { | ||
item: candidate.item, | ||
original: candidate.original, | ||
key: candidate.key, | ||
score: candidate.score, | ||
match: candidate.match | ||
}; | ||
}); | ||
return { | ||
item: candidate.item, | ||
original: bestMatch.original, | ||
key: bestMatch.normal, | ||
score: bestMatch.score, | ||
match: options.returnMatchData && denormalizeMatchPosition(bestMatch.match, bestMatch.map) | ||
}; | ||
}).filter(function (candidate) { | ||
return candidate.score >= options.threshold; | ||
}).sort(function (a, b) { | ||
if (a.score === b.score) { | ||
return Math.abs(a.key.length - term.length) - Math.abs(b.key.length - term.length); | ||
} | ||
} | ||
return b.score - a.score; | ||
}); | ||
return options.returnMatchData || options.returnScores ? results : results.map(function (candidate) { | ||
return sorted.map(function (candidate) { | ||
return candidate.item; | ||
}); | ||
} // transforms a list of candidates into objects with normalized search keys | ||
// the keySelector is used to pick a string from an object to search by | ||
function createSearchItems(items, options) { | ||
return items.map(function (item) { | ||
return { | ||
item: item, | ||
normalized: arrayWrap(options.keySelector(item)).map(function (key) { | ||
return normalizeWithMap(key, options); | ||
}) | ||
}; | ||
}); | ||
} // wrapper for exporting sellers while allowing options to be passed in | ||
@@ -348,7 +484,10 @@ | ||
function fuzzy(term, candidate, options) { | ||
options = Object.assign({}, defaultOptions, options); | ||
options = _objectSpread({}, defaultOptions, options); | ||
var scoreMethod = options.useDamerau ? damerauLevenshteinSellers : levenshteinSellers; | ||
term = normalize(term, options); | ||
var normalized = normalizeWithMap(candidate, options); | ||
var result = scoreMethod(term, normalized.normal); | ||
var rows = new Array(term.length + 1); | ||
rows[0] = new Array(candidate.length + 1).fill(0); | ||
scoreMethod(term, normalized.normal, rows); | ||
var result = walkBack(rows, candidate.length + 1); | ||
return options.returnMatchData ? { | ||
@@ -366,3 +505,8 @@ item: candidate, | ||
options = Object.assign({}, defaultOptions, options); | ||
return searchCore(normalize(term, options), createSearchItems(candidates, options), options); | ||
var trie = { | ||
children: {}, | ||
depth: 0 | ||
}; | ||
createSearchTrie(trie, 0, candidates, options); | ||
return searchCore(normalize(term, options), trie, options); | ||
} // class that improves performance of searching the same set multiple times | ||
@@ -379,4 +523,8 @@ // normalizes the strings and caches the result for future calls | ||
this.options = Object.assign({}, defaultOptions, options); | ||
this.candidates = []; | ||
this.add.apply(this, _toConsumableArray(candidates)); | ||
this.trie = { | ||
children: {}, | ||
depth: 0 | ||
}; | ||
createSearchTrie(this.trie, 0, candidates, this.options); | ||
this.count = candidates.length; | ||
} | ||
@@ -387,4 +535,2 @@ | ||
value: function add() { | ||
var _this$candidates; | ||
for (var _len = arguments.length, candidates = new Array(_len), _key = 0; _key < _len; _key++) { | ||
@@ -394,3 +540,4 @@ candidates[_key] = arguments[_key]; | ||
(_this$candidates = this.candidates).push.apply(_this$candidates, _toConsumableArray(createSearchItems(candidates, this.options))); | ||
createSearchTrie(this.trie, this.count, candidates, this.options); | ||
this.count += candidates.length; | ||
} | ||
@@ -401,3 +548,3 @@ }, { | ||
options = Object.assign({}, this.options, options); | ||
return searchCore(normalize(term, this.options), this.candidates, options); | ||
return searchCore(normalize(term, this.options), this.trie, options); | ||
} | ||
@@ -404,0 +551,0 @@ }]); |
{ | ||
"name": "fast-fuzzy", | ||
"version": "1.6.2", | ||
"version": "1.8.0", | ||
"description": "Fast and tiny fuzzy-search utility", | ||
@@ -5,0 +5,0 @@ "main": "lib/fuzzy.js", |
@@ -20,3 +20,10 @@ # fast-fuzzy [![Build Status](https://travis-ci.org/EthanRutherford/fast-fuzzy.svg?branch=master)](https://travis-ci.org/EthanRutherford/fast-fuzzy) [![npm](https://img.shields.io/npm/v/fast-fuzzy.svg)](https://www.npmjs.com/package/fast-fuzzy) | ||
This causes matches which are closer to exact full string matches to be effectively ranked higher in the case of a tie. | ||
Ties in length difference are broken by insertion order. | ||
Lists of candidates are stored in a [trie](https://en.wikipedia.org/wiki/Trie) internally, which | ||
avoids doing redundant work on candidates with common prefixes. | ||
Additionally, when a subtree of the trie can be determined to have no string long enough | ||
to score > threshold, the entire subtree is skipped entirely. | ||
This can significantly improve search times compared with a bruteforce search. | ||
## exports | ||
@@ -23,0 +30,0 @@ | name | description | signature | |
@@ -149,3 +149,3 @@ /* global describe, it */ | ||
[ | ||
{name: "hello", value: "world"}, | ||
{name: "hello", value: "jell"}, | ||
{name: "world", value: "hello"}, | ||
@@ -155,3 +155,3 @@ ], | ||
), | ||
[{name: "hello", value: "world"}, {name: "world", value: "hello"}], | ||
[{name: "hello", value: "jell"}, {name: "world", value: "hello"}], | ||
); | ||
@@ -158,0 +158,0 @@ }); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
30004
649
129