double-metaphone
Advanced tools
Comparing version
13
cli.js
#!/usr/bin/env node | ||
import {URL} from 'url' | ||
import fs from 'fs' | ||
import {URL} from 'node:url' | ||
import fs from 'node:fs' | ||
import process from 'node:process' | ||
import {doubleMetaphone} from './index.js' | ||
/** @type {Object.<string, unknown>} */ | ||
var pack = JSON.parse( | ||
String(fs.readFileSync(new URL('./package.json', import.meta.url))) | ||
/** @type {Record<string, unknown>} */ | ||
const pack = JSON.parse( | ||
String(fs.readFileSync(new URL('package.json', import.meta.url))) | ||
) | ||
var argv = process.argv.slice(2) | ||
const argv = process.argv.slice(2) | ||
@@ -13,0 +14,0 @@ if (argv.includes('--help') || argv.includes('-h')) { |
@@ -5,4 +5,6 @@ /** | ||
* @param {string} value | ||
* Value to use. | ||
* @returns {[string, string]} | ||
* Double metaphone codes for `value`. | ||
*/ | ||
export function doubleMetaphone(value: string): [string, string] |
144
index.js
// Match vowels (including `Y`). | ||
var vowels = /[AEIOUY]/ | ||
const vowels = /[AEIOUY]/ | ||
// Match few Slavo-Germanic values. | ||
var slavoGermanic = /W|K|CZ|WITZ/ | ||
const slavoGermanic = /W|K|CZ|WITZ/ | ||
// Match few Germanic values. | ||
var germanic = /^(VAN |VON |SCH)/ | ||
const germanic = /^(VAN |VON |SCH)/ | ||
// Match initial values of which the first character should be skipped. | ||
var initialExceptions = /^(GN|KN|PN|WR|PS)/ | ||
const initialExceptions = /^(GN|KN|PN|WR|PS)/ | ||
// Match initial Greek-like values of which the `CH` sounds like `K`. | ||
var initialGreekCh = /^CH(IA|EM|OR([^E])|YM|ARAC|ARIS)/ | ||
const initialGreekCh = /^CH(IA|EM|OR([^E])|YM|ARAC|ARIS)/ | ||
// Match Greek-like values of which the `CH` sounds like `K`. | ||
var greekCh = /ORCHES|ARCHIT|ORCHID/ | ||
const greekCh = /ORCHES|ARCHIT|ORCHID/ | ||
// Match values which when following `CH`, transform `CH` to sound like `K`. | ||
var chForKh = /[ BFHLMNRVW]/ | ||
const chForKh = /[ BFHLMNRVW]/ | ||
// Match values which when preceding a vowel and `UGH`, sound like `F`. | ||
var gForF = /[CGLRT]/ | ||
const gForF = /[CGLRT]/ | ||
// Match initial values which sound like either `K` or `J`. | ||
var initialGForKj = /Y[\s\S]|E[BILPRSY]|I[BELN]/ | ||
const initialGForKj = /Y[\s\S]|E[BILPRSY]|I[BELN]/ | ||
// Match initial values which sound like either `K` or `J`. | ||
var initialAngerException = /^[DMR]ANGER/ | ||
const initialAngerException = /^[DMR]ANGER/ | ||
// Match values which when following `GY`, do not sound like `K` or `J`. | ||
var gForKj = /[EGIR]/ | ||
const gForKj = /[EGIR]/ | ||
// Match values which when following `J`, do not sound `J`. | ||
var jForJException = /[LTKSNMBZ]/ | ||
const jForJException = /[LTKSNMBZ]/ | ||
// Match values which might sound like `L`. | ||
var alle = /AS|OS/ | ||
const alle = /AS|OS/ | ||
// Match Germanic values preceding `SH` which sound like `S`. | ||
var hForS = /EIM|OEK|OLM|OLZ/ | ||
const hForS = /EIM|OEK|OLM|OLZ/ | ||
// Match Dutch values following `SCH` which sound like either `X` and `SK`, | ||
// or `SK`. | ||
var dutchSch = /E[DMNR]|UY|OO/ | ||
const dutchSch = /E[DMNR]|UY|OO/ | ||
@@ -51,32 +51,20 @@ /** | ||
* @param {string} value | ||
* Value to use. | ||
* @returns {[string, string]} | ||
* Double metaphone codes for `value`. | ||
*/ | ||
// eslint-disable-next-line complexity | ||
export function doubleMetaphone(value) { | ||
var primary = '' | ||
var secondary = '' | ||
var index = 0 | ||
var length = value.length | ||
var last = length - 1 | ||
/** @type {boolean} */ | ||
var isSlavoGermanic | ||
/** @type {boolean} */ | ||
var isGermanic | ||
/** @type {string} */ | ||
var subvalue | ||
/** @type {string} */ | ||
var next | ||
/** @type {string} */ | ||
var previous | ||
/** @type {string} */ | ||
var nextnext | ||
/** @type {Array.<string>} */ | ||
var characters | ||
let primary = '' | ||
let secondary = '' | ||
let index = 0 | ||
const length = value.length | ||
const last = length - 1 | ||
const normalized = String(value).toUpperCase() + ' ' | ||
const isSlavoGermanic = slavoGermanic.test(normalized) | ||
const isGermanic = germanic.test(normalized) | ||
const characters = normalized.split('') | ||
value = String(value).toUpperCase() + ' ' | ||
isSlavoGermanic = slavoGermanic.test(value) | ||
isGermanic = germanic.test(value) | ||
characters = value.split('') | ||
// Skip this at beginning of word. | ||
if (initialExceptions.test(value)) { | ||
if (initialExceptions.test(normalized)) { | ||
index++ | ||
@@ -93,5 +81,7 @@ } | ||
while (index < length) { | ||
previous = characters[index - 1] | ||
next = characters[index + 1] | ||
nextnext = characters[index + 2] | ||
const previous = characters[index - 1] | ||
const next = characters[index + 1] | ||
const nextnext = characters[index + 2] | ||
/** @type {string} */ | ||
let subvalue | ||
@@ -142,3 +132,3 @@ switch (characters[index]) { | ||
(nextnext !== 'E' || | ||
((subvalue = value.slice(index - 2, index + 4)) && | ||
((subvalue = normalized.slice(index - 2, index + 4)) && | ||
(subvalue === 'BACHER' || subvalue === 'MACHER'))) | ||
@@ -154,3 +144,3 @@ ) { | ||
// Special case for `Caesar`. | ||
if (index === 0 && value.slice(index + 1, index + 6) === 'AESAR') { | ||
if (index === 0 && normalized.slice(index + 1, index + 6) === 'AESAR') { | ||
primary += 'S' | ||
@@ -164,3 +154,3 @@ secondary += 'S' | ||
// Italian `Chianti`. | ||
if (value.slice(index + 1, index + 4) === 'HIA') { | ||
if (normalized.slice(index + 1, index + 4) === 'HIA') { | ||
primary += 'K' | ||
@@ -184,3 +174,3 @@ secondary += 'K' | ||
// Greek roots such as `chemistry`, `chorus`. | ||
if (index === 0 && initialGreekCh.test(value)) { | ||
if (index === 0 && initialGreekCh.test(normalized)) { | ||
primary += 'K' | ||
@@ -197,3 +187,3 @@ secondary += 'K' | ||
// Such as 'architect' but not 'arch', orchestra', 'orchid'. | ||
greekCh.test(value.slice(index - 2, index + 4)) || | ||
greekCh.test(normalized.slice(index - 2, index + 4)) || | ||
nextnext === 'T' || | ||
@@ -215,3 +205,3 @@ nextnext === 'S' || | ||
// Such as 'McHugh'. | ||
} else if (value.slice(0, 2) === 'MC') { | ||
} else if (normalized.slice(0, 2) === 'MC') { | ||
// Bug? Why matching absolute? what about McHiccup? | ||
@@ -231,3 +221,3 @@ primary += 'K' | ||
// Such as `Czerny`. | ||
if (next === 'Z' && value.slice(index - 2, index) !== 'WI') { | ||
if (next === 'Z' && normalized.slice(index - 2, index) !== 'WI') { | ||
primary += 'S' | ||
@@ -241,3 +231,3 @@ secondary += 'X' | ||
// Such as `Focaccia`. | ||
if (value.slice(index + 1, index + 4) === 'CIA') { | ||
if (normalized.slice(index + 1, index + 4) === 'CIA') { | ||
primary += 'X' | ||
@@ -255,5 +245,5 @@ secondary += 'X' | ||
(nextnext === 'I' || nextnext === 'E' || nextnext === 'H') && | ||
value.slice(index + 2, index + 4) !== 'HU' | ||
normalized.slice(index + 2, index + 4) !== 'HU' | ||
) { | ||
subvalue = value.slice(index - 1, index + 4) | ||
subvalue = normalized.slice(index - 1, index + 4) | ||
@@ -447,4 +437,4 @@ // Such as `Accident`, `Accede`, `Succeed`. | ||
} else if ( | ||
value.slice(index + 2, index + 4) !== 'EY' && | ||
value.slice(index + 1) !== 'Y' && | ||
normalized.slice(index + 2, index + 4) !== 'EY' && | ||
normalized.slice(index + 1) !== 'Y' && | ||
!isSlavoGermanic | ||
@@ -465,3 +455,6 @@ ) { | ||
// Such as `Tagliaro`. | ||
if (value.slice(index + 1, index + 3) === 'LI' && !isSlavoGermanic) { | ||
if ( | ||
normalized.slice(index + 1, index + 3) === 'LI' && | ||
!isSlavoGermanic | ||
) { | ||
primary += 'KL' | ||
@@ -475,3 +468,3 @@ secondary += 'L' | ||
// -ges-, -gep-, -gel- at beginning. | ||
if (index === 0 && initialGForKj.test(value.slice(1, 3))) { | ||
if (index === 0 && initialGForKj.test(normalized.slice(1, 3))) { | ||
primary += 'K' | ||
@@ -486,6 +479,6 @@ secondary += 'J' | ||
if ( | ||
(value.slice(index + 1, index + 3) === 'ER' && | ||
(normalized.slice(index + 1, index + 3) === 'ER' && | ||
previous !== 'I' && | ||
previous !== 'E' && | ||
!initialAngerException.test(value.slice(0, 6))) || | ||
!initialAngerException.test(normalized.slice(0, 6))) || | ||
(next === 'Y' && !gForKj.test(previous)) | ||
@@ -510,3 +503,3 @@ ) { | ||
// Obvious Germanic. | ||
if (value.slice(index + 1, index + 3) === 'ET' || isGermanic) { | ||
if (normalized.slice(index + 1, index + 3) === 'ET' || isGermanic) { | ||
primary += 'K' | ||
@@ -519,3 +512,3 @@ secondary += 'K' | ||
secondary += | ||
value.slice(index + 1, index + 5) === 'IER ' ? 'J' : 'K' | ||
normalized.slice(index + 1, index + 5) === 'IER ' ? 'J' : 'K' | ||
} | ||
@@ -553,7 +546,7 @@ | ||
if ( | ||
value.slice(index, index + 4) === 'JOSE' || | ||
value.slice(0, 4) === 'SAN ' | ||
normalized.slice(index, index + 4) === 'JOSE' || | ||
normalized.slice(0, 4) === 'SAN ' | ||
) { | ||
if ( | ||
value.slice(0, 4) === 'SAN ' || | ||
normalized.slice(0, 4) === 'SAN ' || | ||
(index === 0 && characters[index + 4] === ' ') | ||
@@ -576,3 +569,3 @@ ) { | ||
// Bug: unreachable (see previous statement). | ||
// && value.slice(index, index + 4) !== 'JOSE'. | ||
// && normalized.slice(index, index + 4) !== 'JOSE'. | ||
) { | ||
@@ -631,3 +624,3 @@ primary += 'J' | ||
characters[last] === 'O' || | ||
alle.test(value.slice(last - 1, length)))) | ||
alle.test(normalized.slice(last - 1, length)))) | ||
) { | ||
@@ -654,3 +647,4 @@ primary += 'L' | ||
next === 'B' && | ||
(index + 1 === last || value.slice(index + 2, index + 4) === 'ER')) | ||
(index + 1 === last || | ||
normalized.slice(index + 2, index + 4) === 'ER')) | ||
) { | ||
@@ -746,3 +740,3 @@ index++ | ||
// Special case `sugar-`. | ||
if (index === 0 && value.slice(1, 5) === 'UGAR') { | ||
if (index === 0 && normalized.slice(1, 5) === 'UGAR') { | ||
primary += 'X' | ||
@@ -757,3 +751,3 @@ secondary += 'S' | ||
// Germanic. | ||
if (hForS.test(value.slice(index + 1, index + 5))) { | ||
if (hForS.test(normalized.slice(index + 1, index + 5))) { | ||
primary += 'S' | ||
@@ -774,3 +768,3 @@ secondary += 'S' | ||
// Bug: Already covered by previous branch | ||
// || value.slice(index, index + 4) === 'SIAN' | ||
// || normalized.slice(index, index + 4) === 'SIAN' | ||
) { | ||
@@ -813,3 +807,3 @@ if (isSlavoGermanic) { | ||
if (nextnext === 'H') { | ||
subvalue = value.slice(index + 3, index + 5) | ||
subvalue = normalized.slice(index + 3, index + 5) | ||
@@ -863,3 +857,3 @@ // Dutch origin, such as `school`, `schooner`. | ||
subvalue = value.slice(index - 2, index) | ||
subvalue = normalized.slice(index - 2, index) | ||
@@ -894,3 +888,3 @@ // French such as `resnais`, `artois`. | ||
subvalue = value.slice(index + 1, index + 3) | ||
subvalue = normalized.slice(index + 1, index + 3) | ||
@@ -975,3 +969,3 @@ if ( | ||
// Maybe a bug? Shouldn't this be general Germanic? | ||
value.slice(0, 3) === 'SCH' || | ||
normalized.slice(0, 3) === 'SCH' || | ||
(index === last && vowels.test(previous)) | ||
@@ -1007,3 +1001,3 @@ ) { | ||
// Bug: IAU and EAU also match by AU | ||
// (/IAU|EAU/.test(value.slice(index - 3, index))) || | ||
// (/IAU|EAU/.test(normalized.slice(index - 3, index))) || | ||
previous === 'U' && | ||
@@ -1010,0 +1004,0 @@ (characters[index - 2] === 'A' || characters[index - 2] === 'O') |
{ | ||
"name": "double-metaphone", | ||
"version": "2.0.0", | ||
"version": "2.0.1", | ||
"description": "Double Metaphone algorithm", | ||
@@ -36,20 +36,18 @@ "license": "MIT", | ||
"devDependencies": { | ||
"@types/node": "^14.0.0", | ||
"@types/tape": "^4.0.0", | ||
"@types/node": "^18.0.0", | ||
"c8": "^7.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^9.0.0", | ||
"remark-preset-wooorm": "^8.0.0", | ||
"rimraf": "^3.0.0", | ||
"tape": "^5.0.0", | ||
"remark-cli": "^11.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"type-coverage": "^2.0.0", | ||
"type-fest": "^3.0.0", | ||
"typescript": "^4.0.0", | ||
"xo": "^0.38.0" | ||
"xo": "^0.52.0" | ||
}, | ||
"scripts": { | ||
"prepack": "npm run build && npm run format", | ||
"build": "rimraf \"*.d.ts\" && tsc && type-coverage", | ||
"build": "tsc --build --clean && tsc --build && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node test.js", | ||
"test-api": "node --conditions development test.js", | ||
"test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
@@ -72,8 +70,3 @@ }, | ||
"xo": { | ||
"prettier": true, | ||
"rules": { | ||
"complexity": "off", | ||
"no-var": "off", | ||
"prefer-arrow-callback": "off" | ||
} | ||
"prettier": true | ||
}, | ||
@@ -80,0 +73,0 @@ "remarkConfig": { |
117
readme.md
@@ -10,9 +10,38 @@ # double-metaphone | ||
## Contents | ||
* [What is this?](#what-is-this) | ||
* [When should I use this?](#when-should-i-use-this) | ||
* [Install](#install) | ||
* [Use](#use) | ||
* [API](#api) | ||
* [`doubleMetaphone(value)`](#doublemetaphonevalue) | ||
* [CLI](#cli) | ||
* [Types](#types) | ||
* [Compatibility](#compatibility) | ||
* [Related](#related) | ||
* [Contribute](#contribute) | ||
* [Security](#security) | ||
* [License](#license) | ||
## What is this? | ||
This package exposes a phonetic algorithm. | ||
That means it gets a certain string (typically an English word), and turns it | ||
into codes, which can then be compared to other codes (of other words), to | ||
check if they are (likely) pronounced the same. | ||
## When should I use this? | ||
You’re probably dealing with natural language, and know you need this, if | ||
you’re here! | ||
Depending on your goals, you likely want to additionally use a stemmer (such as | ||
[`stemmer`][stemmer]). | ||
## Install | ||
This package is ESM only: Node 12+ is needed to use it and it must be `import`ed | ||
instead of `require`d. | ||
This package is [ESM only][esm]. | ||
In Node.js (version 14.14+, 16.0+), install with [npm][]: | ||
[npm][]: | ||
```sh | ||
@@ -22,7 +51,18 @@ npm install double-metaphone | ||
## API | ||
In Deno with [`esm.sh`][esmsh]: | ||
This package exports the following identifiers: `doubleMetaphone`. | ||
There is no default export. | ||
```js | ||
import {doubleMetaphone} from 'https://esm.sh/double-metaphone@2' | ||
``` | ||
In browsers with [`esm.sh`][esmsh]: | ||
```html | ||
<script type="module"> | ||
import {doubleMetaphone} from 'https://esm.sh/double-metaphone@2?bundle' | ||
</script> | ||
``` | ||
## Use | ||
```js | ||
@@ -40,3 +80,3 @@ import {doubleMetaphone} from 'double-metaphone' | ||
With [stemmer][]: | ||
With [`stemmer`][stemmer]: | ||
@@ -47,6 +87,23 @@ ```js | ||
doubleMetaphone(stemmer('acceptingness')) // => [ 'AKSPTNK', 'AKSPTNK' ] | ||
doubleMetaphone(stemmer('allegrettos')) // => [ 'ALKRT', 'AKRT' ] | ||
doubleMetaphone(stemmer('acceptingness')) // => ['AKSPTNK', 'AKSPTNK'] | ||
doubleMetaphone(stemmer('allegrettos')) // => ['ALKRT', 'AKRT'] | ||
``` | ||
## API | ||
This package exports the identifier `doubleMetaphone`. | ||
There is no default export. | ||
### `doubleMetaphone(value)` | ||
Get the double metaphone codes from a given value. | ||
###### `value` | ||
Value to use (`string`, required). | ||
##### Returns | ||
Double metaphone codes for `value` (`[string, string]`). | ||
## CLI | ||
@@ -79,17 +136,37 @@ | ||
## Types | ||
This package is fully typed with [TypeScript][]. | ||
It exports no additional types. | ||
## Compatibility | ||
This package is at least compatible with all maintained versions of Node.js. | ||
As of now, that is Node.js 14.14+ and 16.0+. | ||
It also works in Deno and modern browsers. | ||
## Related | ||
* [`metaphone`](https://github.com/words/metaphone) | ||
— Fast Metaphone implementation | ||
— metaphone algorithm | ||
* [`soundex-code`](https://github.com/words/soundex-code) | ||
— Fast Soundex implementation | ||
— soundex algorithm | ||
* [`stemmer`](https://github.com/words/stemmer) | ||
— Porter Stemmer algorithm | ||
— porter stemmer algorithm | ||
* [`dice-coefficient`](https://github.com/words/dice-coefficient) | ||
— Sørensen–Dice coefficient | ||
— sørensen–dice coefficient | ||
* [`levenshtein-edit-distance`](https://github.com/words/levenshtein-edit-distance) | ||
— Levenshtein edit distance | ||
— levenshtein edit distance | ||
* [`syllable`](https://github.com/words/syllable) | ||
— Syllable count in an English word | ||
— syllable count of English words | ||
## Contribute | ||
Yes please! | ||
See [How to Contribute to Open Source][contribute]. | ||
## Security | ||
This package is safe. | ||
## License | ||
@@ -119,2 +196,10 @@ | ||
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c | ||
[esmsh]: https://esm.sh | ||
[typescript]: https://www.typescriptlang.org | ||
[contribute]: https://opensource.guide/how-to-contribute/ | ||
[license]: license | ||
@@ -121,0 +206,0 @@ |
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
34248
6.21%9
-18.18%206
70.25%1
-50%916
-0.22%