Comparing version
119
index.js
@@ -6,3 +6,3 @@ /** | ||
* @author komed3 (Paul Köhler) | ||
* @version 1.0.0 | ||
* @version 1.0.1 | ||
* @license MIT | ||
@@ -34,3 +34,3 @@ */ | ||
*/ | ||
const bbigrams = ( str ) => { | ||
const str2bigrams = ( str ) => { | ||
@@ -52,6 +52,30 @@ let bigrams = new Set(); | ||
/** | ||
* | ||
* @param {String} algo algorithm to use | ||
* @param {String} a string 1 | ||
* @param {String} b string 2 | ||
* @returns similarity | ||
*/ | ||
const cpmByAlgo = ( algo, a, b ) => { | ||
switch( algo ) { | ||
case 'levenshtein': | ||
return levenshtein( a, b ); | ||
case 'diceCoefficient': | ||
return diceCoefficient( a, b ); | ||
default: | ||
return 0; | ||
} | ||
}; | ||
/** | ||
* search for closest string | ||
* @param {String} algo algorithm to use | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @param {Array} arr targets to test | ||
* @returns closest target | ||
@@ -69,18 +93,4 @@ */ | ||
switch( algo ) { | ||
pct = cpmByAlgo( algo, test, str ); | ||
case 'levenshtein': | ||
pct = levenshtein( test, str ); | ||
break; | ||
case 'diceCoefficient': | ||
pct = diceCoefficient( test, str ); | ||
break; | ||
default: | ||
pct = 0; | ||
break; | ||
} | ||
if( pct > best ) { | ||
@@ -104,2 +114,39 @@ | ||
/** | ||
* sort best matches to test string | ||
* @param {String} algo algorithm to use | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @returns sorted matches | ||
*/ | ||
const bestMatch = ( algo, test, arr ) => { | ||
let matches = [], | ||
pct; | ||
/* calculate similarity for each arr items */ | ||
arr.forEach( ( str ) => { | ||
pct = cpmByAlgo( algo, test, str ); | ||
matches.push( { | ||
target: str, | ||
match: pct | ||
} ); | ||
} ); | ||
/* sort by highest similarity */ | ||
let sorted = matches.sort( ( a, b ) => { | ||
return b.match - a.match; | ||
} ); | ||
/* return sorted matches */ | ||
return sorted; | ||
}; | ||
/** | ||
* similarity calculations | ||
@@ -241,3 +288,3 @@ * @public | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @param {Array} arr targets to test | ||
* @returns closest target | ||
@@ -252,2 +299,14 @@ */ | ||
/** | ||
* sort best matches to test string | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @returns sorted matches | ||
*/ | ||
const levenshteinMatch = ( test, arr ) => { | ||
return bestMatch( 'levenshtein', test, arr ); | ||
}; | ||
/** | ||
* calculate dice coefficient | ||
@@ -281,4 +340,4 @@ * @param {String} a string 1 | ||
let setA = bbigrams( a ), | ||
setB = bbigrams( b ); | ||
let setA = str2bigrams( a ), | ||
setB = str2bigrams( b ); | ||
@@ -303,3 +362,3 @@ /* calculate dice coefficient */ | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @param {Array} arr targets to test | ||
* @returns closest target | ||
@@ -314,2 +373,14 @@ */ | ||
/** | ||
* sort best matches to test string | ||
* @param {String} test test string | ||
* @param {Array} arr targets to test | ||
* @returns sorted matches | ||
*/ | ||
const diceMatch = ( test, arr ) => { | ||
return bestMatch( 'diceCoefficient', test, arr ); | ||
}; | ||
/** | ||
* export module functions | ||
@@ -321,4 +392,6 @@ */ | ||
levenshteinClosest, | ||
levenshteinMatch, | ||
diceCoefficient, | ||
diceClosest | ||
diceClosest, | ||
diceMatch | ||
}; |
@@ -10,3 +10,3 @@ { | ||
"homepage": "https://github.com/komed3/cmpstr#readme", | ||
"version": "1.0.0", | ||
"version": "1.0.1", | ||
"license": "MIT", | ||
@@ -13,0 +13,0 @@ "keywords": [ |
@@ -27,12 +27,34 @@ # cmpstr | ||
/** | ||
* levenshteinDistance | ||
* expected: 3 | ||
*/ | ||
let distance = cmpstr.levenshteinDistance( str1, str2 ); | ||
// expected 3 | ||
/** | ||
* diceCoefficient | ||
* expected: 0.3636363636363636 | ||
*/ | ||
let dice = cmpstr.diceCoefficient( str1, str2 ); | ||
// expected 0.3636363636363636 | ||
/** | ||
* diceClosest | ||
* expected: bestest | ||
*/ | ||
let closest = cmpstr.diceClosest( 'best', [ | ||
'better', 'bestest', 'well', 'good' | ||
] ); | ||
// expected bestest | ||
/** | ||
* levenshteinMatch | ||
* expected: [ | ||
* { target: 'bestest', match: 0.5714285714285714 }, | ||
* { target: 'better', match: 0.5 }, | ||
* { target: 'well', match: 0.25 }, | ||
* { target: 'good', match: 0 } | ||
* ] | ||
*/ | ||
let matches = cmpstr.levenshteinMatch( 'best', [ | ||
'better', 'bestest', 'well', 'good' | ||
] ); | ||
``` | ||
@@ -63,2 +85,6 @@ | ||
#### ``levenshteinMatch( str, arr )`` | ||
Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order. | ||
### Sørensen-Dice coefficient | ||
@@ -73,1 +99,5 @@ | ||
As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well. | ||
#### ``diceMatch( str, arr )`` | ||
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order. |
11984
23.56%258
25.24%100
40.85%