fast-fuzzy
Advanced tools
Comparing version 1.8.3 to 1.8.4
276
lib/fuzzy.js
@@ -13,6 +13,7 @@ "use strict"; | ||
var whitespaceRegex = /(\s+)()/g; | ||
var nonWordRegex = /()([`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+)/g; | ||
var bothRegex = /(\s+)|([`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+)/g; // the default options, which will be used for any unset option | ||
var split = require("graphemesplit"); | ||
var whitespaceRegex = /^\s+$/; | ||
var nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/; // the default options, which will be used for any unset option | ||
var defaultOptions = { | ||
@@ -32,96 +33,62 @@ keySelector: function keySelector(s) { | ||
return item instanceof Array ? item : [item]; | ||
}; // normalize a string according to the options passed in | ||
}; // return normalized string, with map included | ||
function normalize(string, options) { | ||
string = string.normalize(); | ||
var lower = options.ignoreCase ? string.toLocaleLowerCase() : string; // track transformations | ||
if (options.ignoreCase) { | ||
string = string.toLocaleLowerCase(); | ||
} | ||
if (options.ignoreSymbols) { | ||
string = string.replace(nonWordRegex, ""); | ||
} | ||
if (options.normalizeWhitespace) { | ||
string = string.replace(whitespaceRegex, " ").trim(); | ||
} | ||
return string; | ||
} // return normalized string, with original and map included | ||
function normalizeWithMap(string, options) { | ||
var original = string.normalize(); | ||
var normal = original; | ||
if (options.ignoreCase) { | ||
normal = normal.toLocaleLowerCase(); | ||
} // track transformations | ||
var normal = []; | ||
var map = []; | ||
var regex; | ||
var lastWasWhitespace = true; | ||
var length = 0; | ||
var _iteratorNormalCompletion = true; | ||
var _didIteratorError = false; | ||
var _iteratorError = undefined; | ||
if (options.normalizeWhitespace) { | ||
var trimStart = normal.match(/^\s+/); | ||
try { | ||
for (var _iterator = split(lower)[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { | ||
var grapheme = _step.value; | ||
whitespaceRegex.lastIndex = 0; | ||
nonWordRegex.lastIndex = 0; | ||
if (trimStart) { | ||
map.push({ | ||
index: 0, | ||
offset: trimStart[0].length | ||
}); | ||
} | ||
if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) { | ||
if (!lastWasWhitespace) { | ||
normal.push(' '); | ||
map.push(length); | ||
lastWasWhitespace = true; | ||
} | ||
} else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) { | ||
normal.push(grapheme.normalize()); | ||
map.push(length); | ||
lastWasWhitespace = false; | ||
} | ||
normal = normal.trim(); | ||
length += grapheme.length; | ||
} // add the end of the string | ||
if (options.ignoreSymbols) { | ||
regex = bothRegex; | ||
} else { | ||
regex = whitespaceRegex; | ||
} catch (err) { | ||
_didIteratorError = true; | ||
_iteratorError = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion && _iterator.return != null) { | ||
_iterator.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError) { | ||
throw _iteratorError; | ||
} | ||
} | ||
} else if (options.ignoreSymbols) { | ||
regex = nonWordRegex; | ||
} else { | ||
return { | ||
original: original, | ||
normal: normal, | ||
map: [] | ||
}; | ||
} | ||
var lastInd = 0; | ||
var built = ""; | ||
var match; | ||
var prev = map[map.length - 1] || { | ||
index: -.1 | ||
}; | ||
map.push(string.length); | ||
while (match = regex.exec(normal)) { | ||
var length = match[0].length; | ||
built += normal.slice(lastInd, match.index); | ||
lastInd = match.index + length; | ||
if (match[1]) { | ||
built += " "; | ||
if (length === 1) continue; | ||
length--; | ||
} | ||
var start = built.length; | ||
if (prev.index === start) { | ||
prev.offset += length; | ||
} else { | ||
map.push(prev = { | ||
index: start, | ||
offset: length | ||
}); | ||
} | ||
while (normal[normal.length - 1] === " ") { | ||
normal.pop(); | ||
map.pop(); | ||
} | ||
return { | ||
original: original, | ||
normal: built + normal.slice(lastInd), | ||
original: string, | ||
normal: normal, | ||
map: map | ||
@@ -132,22 +99,6 @@ }; | ||
function denormalizeMatchPosition(_ref, map) { | ||
var index = _ref.index, | ||
length = _ref.length; | ||
var start = index; | ||
var end = index + length; | ||
var i = 0; | ||
while (i < map.length && map[i].index <= end) { | ||
if (map[i].index <= start) { | ||
index += map[i].offset; | ||
} else { | ||
length += map[i].offset; | ||
} | ||
i++; | ||
} | ||
function denormalizeMatchPosition(match, map) { | ||
return { | ||
index: index, | ||
length: length | ||
index: map[match.start], | ||
length: map[match.end + 1] - map[match.start] | ||
}; | ||
@@ -173,4 +124,4 @@ } // walks back up the matrix to find the match index and length | ||
return { | ||
index: start - 1, | ||
length: scoreIndex - start + 1 | ||
start: start - 1, | ||
end: scoreIndex - 1 | ||
}; | ||
@@ -266,9 +217,9 @@ } // finds the minimum value of the last row from the levenshtein-sellers matrix | ||
var walker = trie; | ||
var _iteratorNormalCompletion = true; | ||
var _didIteratorError = false; | ||
var _iteratorError = undefined; | ||
var _iteratorNormalCompletion2 = true; | ||
var _didIteratorError2 = false; | ||
var _iteratorError2 = undefined; | ||
try { | ||
for (var _iterator = string[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { | ||
var char = _step.value; | ||
for (var _iterator2 = string[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { | ||
var char = _step2.value; | ||
@@ -291,12 +242,12 @@ // add child node if not already present | ||
} catch (err) { | ||
_didIteratorError = true; | ||
_iteratorError = err; | ||
_didIteratorError2 = true; | ||
_iteratorError2 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion && _iterator.return != null) { | ||
_iterator.return(); | ||
if (!_iteratorNormalCompletion2 && _iterator2.return != null) { | ||
_iterator2.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError) { | ||
throw _iteratorError; | ||
if (_didIteratorError2) { | ||
throw _iteratorError2; | ||
} | ||
@@ -314,37 +265,38 @@ } | ||
function createSearchTrie(trie, index, items, options) { | ||
var _iteratorNormalCompletion2 = true; | ||
var _didIteratorError2 = false; | ||
var _iteratorError2 = undefined; | ||
var _iteratorNormalCompletion3 = true; | ||
var _didIteratorError3 = false; | ||
var _iteratorError3 = undefined; | ||
try { | ||
var _loop = function _loop() { | ||
var item = _step2.value; | ||
var candidates = arrayWrap(options.keySelector(item)).map(function (key) { | ||
var item = _step3.value; | ||
var candidates = arrayWrap(options.keySelector(item)).map(function (key, keyIndex) { | ||
return { | ||
index: index, | ||
keyIndex: keyIndex, | ||
item: item, | ||
normalized: normalizeWithMap(key, options) | ||
normalized: normalize(key, options) | ||
}; | ||
}); | ||
index++; | ||
var _iteratorNormalCompletion3 = true; | ||
var _didIteratorError3 = false; | ||
var _iteratorError3 = undefined; | ||
var _iteratorNormalCompletion4 = true; | ||
var _didIteratorError4 = false; | ||
var _iteratorError4 = undefined; | ||
try { | ||
for (var _iterator3 = candidates[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { | ||
var candidate = _step3.value; | ||
for (var _iterator4 = candidates[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { | ||
var candidate = _step4.value; | ||
trieInsert(trie, candidate.normalized.normal, candidate); | ||
} | ||
} catch (err) { | ||
_didIteratorError3 = true; | ||
_iteratorError3 = err; | ||
_didIteratorError4 = true; | ||
_iteratorError4 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion3 && _iterator3.return != null) { | ||
_iterator3.return(); | ||
if (!_iteratorNormalCompletion4 && _iterator4.return != null) { | ||
_iterator4.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError3) { | ||
throw _iteratorError3; | ||
if (_didIteratorError4) { | ||
throw _iteratorError4; | ||
} | ||
@@ -355,16 +307,16 @@ } | ||
for (var _iterator2 = items[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { | ||
for (var _iterator3 = items[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { | ||
_loop(); | ||
} | ||
} catch (err) { | ||
_didIteratorError2 = true; | ||
_iteratorError2 = err; | ||
_didIteratorError3 = true; | ||
_iteratorError3 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion2 && _iterator2.return != null) { | ||
_iterator2.return(); | ||
if (!_iteratorNormalCompletion3 && _iterator3.return != null) { | ||
_iterator3.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError2) { | ||
throw _iteratorError2; | ||
if (_didIteratorError3) { | ||
throw _iteratorError3; | ||
} | ||
@@ -383,2 +335,8 @@ } | ||
var keyIndexDiff = a.keyIndex - b.keyIndex; | ||
if (keyIndexDiff !== 0) { | ||
return keyIndexDiff; | ||
} | ||
var lengthDiff = a.lengthDiff - b.lengthDiff; | ||
@@ -404,16 +362,16 @@ | ||
var match = options.returnMatchData && walkBack(rows, scoreResult.scoreIndex); | ||
var _iteratorNormalCompletion4 = true; | ||
var _didIteratorError4 = false; | ||
var _iteratorError4 = undefined; | ||
var _iteratorNormalCompletion5 = true; | ||
var _didIteratorError5 = false; | ||
var _iteratorError5 = undefined; | ||
try { | ||
for (var _iterator4 = node.candidates[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { | ||
var candidate = _step4.value; | ||
for (var _iterator5 = node.candidates[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) { | ||
var candidate = _step5.value; | ||
var scoredItem = { | ||
item: candidate.item, | ||
original: candidate.normalized.original, | ||
key: candidate.normalized.normal, | ||
normalized: candidate.normalized, | ||
score: scoreResult.score, | ||
match: options.returnMatchData && denormalizeMatchPosition(match, candidate.normalized.map), | ||
match: match, | ||
index: candidate.index, | ||
keyIndex: candidate.keyIndex, | ||
lengthDiff: lengthDiff | ||
@@ -430,12 +388,12 @@ }; | ||
} catch (err) { | ||
_didIteratorError4 = true; | ||
_iteratorError4 = err; | ||
_didIteratorError5 = true; | ||
_iteratorError5 = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion4 && _iterator4.return != null) { | ||
_iterator4.return(); | ||
if (!_iteratorNormalCompletion5 && _iterator5.return != null) { | ||
_iterator5.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError4) { | ||
throw _iteratorError4; | ||
if (_didIteratorError5) { | ||
throw _iteratorError5; | ||
} | ||
@@ -480,6 +438,6 @@ } | ||
item: candidate.item, | ||
original: candidate.original, | ||
key: candidate.key, | ||
original: candidate.normalized.original, | ||
key: candidate.normalized.normal.join(""), | ||
score: candidate.score, | ||
match: candidate.match | ||
match: denormalizeMatchPosition(candidate.match, candidate.normalized.map) | ||
}; | ||
@@ -498,4 +456,4 @@ }); | ||
var scoreMethod = options.useDamerau ? damerauLevenshteinSellers : levenshteinSellers; | ||
term = normalize(term, options); | ||
var normalized = normalizeWithMap(candidate, options); | ||
term = normalize(term, options).normal; | ||
var normalized = normalize(candidate, options); | ||
var rows = initSellersRows(term.length + 1, candidate.length + 1); | ||
@@ -511,3 +469,3 @@ | ||
original: normalized.original, | ||
key: normalized.normal, | ||
key: normalized.normal.join(""), | ||
score: scoreResult.score, | ||
@@ -526,3 +484,3 @@ match: denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) | ||
createSearchTrie(trie, 0, candidates, options); | ||
return searchCore(normalize(term, options), trie, options); | ||
return searchCore(normalize(term, options).normal, trie, options); | ||
} // class that improves performance of searching the same set multiple times | ||
@@ -561,3 +519,3 @@ // normalizes the strings and caches the result for future calls | ||
options = Object.assign({}, this.options, options); | ||
return searchCore(normalize(term, this.options), this.trie, options); | ||
return searchCore(normalize(term, this.options).normal, this.trie, options); | ||
} | ||
@@ -564,0 +522,0 @@ }]); |
{ | ||
"name": "fast-fuzzy", | ||
"version": "1.8.3", | ||
"version": "1.8.4", | ||
"description": "Fast and tiny fuzzy-search utility", | ||
@@ -16,3 +16,6 @@ "main": "lib/fuzzy.js", | ||
"damerau", | ||
"levenshtein" | ||
"levenshtein", | ||
"unicode", | ||
"partial", | ||
"match" | ||
], | ||
@@ -34,3 +37,6 @@ "author": "Ethan Rutherford", | ||
"mocha": "^5.2.0" | ||
}, | ||
"dependencies": { | ||
"graphemesplit": "^2.0.3" | ||
} | ||
} |
@@ -11,6 +11,8 @@ # fast-fuzzy [![Build Status](https://travis-ci.org/EthanRutherford/fast-fuzzy.svg?branch=master)](https://travis-ci.org/EthanRutherford/fast-fuzzy) [![npm](https://img.shields.io/npm/v/fast-fuzzy.svg)](https://www.npmjs.com/package/fast-fuzzy) | ||
Inputs are normalized before search. Normalization consists of standard utf8-normalization, | ||
followed by optionally taking the lowercase of a string, | ||
Inputs are normalized before search. | ||
Normalization consists of standard utf8-normalization, | ||
optionally taking the lowercase of a string, | ||
optionally removing non-word characters, | ||
and optionally flattening/trimming whitespace. | ||
Graphemes, such as conjoined emoji 👨👩👧, are treated as single characters. | ||
@@ -29,9 +31,2 @@ Inputs are scored from `0` to `1`, where a higher score indicates a closer match. | ||
## A note about normalization | ||
utf8 normalization is not optional, all strings will be normalized internally at minimum. | ||
Match positions, as a result, refer to positions *within the normalized string*. Match positions are, however, | ||
mapped back to the position in the string *before* whitespace and symbols are stripped out. | ||
Therefore, it is highly recommended that if one intends on using returned match data, you normalize the strings you intend to search by. | ||
This can be done by calling `string.normalize()`. | ||
## exports | ||
@@ -70,3 +65,4 @@ | name | description | signature | | ||
\*\*\* in the form `{item, original, key, score, match: {index, length}}` | ||
\*\*\* in the form `{item, original, key, score, match: {index, length}}`. | ||
Match index and length are in terms of the original, non-normalized string. | ||
@@ -73,0 +69,0 @@ `fuzzy` accepts a subset of these options (excluding keySelector and threshold) with the same defaults. |
24
test.js
@@ -51,2 +51,14 @@ /* global describe, it */ | ||
it("should handle unicode well", function() { | ||
// unicode characters are normalized | ||
assert.equal(fuzzy("\u212B", "\u0041\u030A"), 1); | ||
// handles high and low surrogates as single characters | ||
assert.equal(fuzzy("high", "h💩gh"), .75); | ||
// handles combining marks as single characters | ||
assert.equal(fuzzy("hi zalgo hello hello", "hi Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘ hello hello"), .75); | ||
// handles graphemes such as hangul jamo and joined emoji as single characters | ||
assert.equal(fuzzy("high", "h깍👨👩👧👦h"), .5); | ||
}); | ||
describe("options", function() { | ||
@@ -138,2 +150,14 @@ it("should have different results when ignoreCase is set", function() { | ||
it("should have good ordering when using multiple keys per object", function() { | ||
assert.deepEqual( | ||
search("grin", [["grinning", "grin"], ["grin", "grinning"]]), | ||
[["grin", "grinning"], ["grinning", "grin"]], | ||
); | ||
assert.deepEqual( | ||
search("laugh", [["smile", "laughing"], ["laughing"], ["laugh"]]), | ||
[["laugh"], ["laughing"], ["smile", "laughing"]], | ||
); | ||
}); | ||
it("should handle searching multiple keys per object", function() { | ||
@@ -140,0 +164,0 @@ assert.doesNotThrow(() => { |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
30934
1
645
132
+ Addedgraphemesplit@^2.0.3
+ Addedgraphemesplit@2.4.4(transitive)
+ Addedjs-base64@3.7.7(transitive)
+ Addedpako@0.2.9(transitive)
+ Addedtiny-inflate@1.0.3(transitive)
+ Addedunicode-trie@2.0.0(transitive)