dice-coefficient
Advanced tools
Comparing version 2.1.0 to 2.1.1
@@ -7,5 +7,5 @@ #!/usr/bin/env node | ||
/** @type {Object.<string, unknown>} */ | ||
/** @type {Record<string, unknown>} */ | ||
const pack = JSON.parse( | ||
String(fs.readFileSync(new URL('./package.json', import.meta.url))) | ||
String(fs.readFileSync(new URL('package.json', import.meta.url))) | ||
) | ||
@@ -12,0 +12,0 @@ |
/** | ||
* Get the edit-distance according to Dice between two values. | ||
* Get the difference according to Sørensen–Dice. | ||
* | ||
* @param {string|string[]} value | ||
* @param {string|string[]} alternative | ||
* > 👉 **Note**: you can pass bigrams (from [`n-gram`][n-gram]) too, which will | ||
* > improve performance when you are comparing the same values multiple times. | ||
* | ||
* @param {string|Array<string>} value | ||
* Primary value. | ||
* @param {string|Array<string>} other | ||
* Other value. | ||
* @returns {number} | ||
* Difference. | ||
* | ||
* The result is normalized to a number between `0` (completely different) | ||
* and `1` (exactly the same). | ||
*/ | ||
export function diceCoefficient( | ||
value: string | string[], | ||
alternative: string | string[] | ||
value: string | Array<string>, | ||
other: string | Array<string> | ||
): number |
75
index.js
import {bigram} from 'n-gram' | ||
/** | ||
* Get the edit-distance according to Dice between two values. | ||
* Get the difference according to Sørensen–Dice. | ||
* | ||
* @param {string|string[]} value | ||
* @param {string|string[]} alternative | ||
* > 👉 **Note**: you can pass bigrams (from [`n-gram`][n-gram]) too, which will | ||
* > improve performance when you are comparing the same values multiple times. | ||
* | ||
* @param {string|Array<string>} value | ||
* Primary value. | ||
* @param {string|Array<string>} other | ||
* Other value. | ||
* @returns {number} | ||
* Difference. | ||
* | ||
* The result is normalized to a number between `0` (completely different) | ||
* and `1` (exactly the same). | ||
*/ | ||
export function diceCoefficient(value, alternative) { | ||
/** @type {string} */ | ||
let value_ | ||
/** @type {string} */ | ||
let alt | ||
/** @type {string[]} */ | ||
let left | ||
/** @type {string[]} */ | ||
let right | ||
if (Array.isArray(value)) { | ||
left = value.map((valueBigram) => String(valueBigram).toLowerCase()) | ||
} else { | ||
value_ = String(value).toLowerCase() | ||
left = value_.length === 1 ? [value_] : bigram(value_) | ||
} | ||
if (Array.isArray(alternative)) { | ||
right = alternative.map((altBigram) => String(altBigram).toLowerCase()) | ||
} else { | ||
alt = String(alternative).toLowerCase() | ||
right = alt.length === 1 ? [alt] : bigram(alt) | ||
} | ||
export function diceCoefficient(value, other) { | ||
const left = toPairs(value) | ||
const right = toPairs(other) | ||
let index = -1 | ||
let intersections = 0 | ||
/** @type {string} */ | ||
let leftPair | ||
/** @type {string} */ | ||
let rightPair | ||
/** @type {number} */ | ||
let offset | ||
while (++index < left.length) { | ||
leftPair = left[index] | ||
offset = -1 | ||
const leftPair = left[index] | ||
let offset = -1 | ||
while (++offset < right.length) { | ||
rightPair = right[offset] | ||
const rightPair = right[offset] | ||
@@ -62,1 +44,22 @@ if (leftPair === rightPair) { | ||
} | ||
/** | ||
* @param {string|Array<string>} value | ||
* @returns {Array<string>} | ||
*/ | ||
function toPairs(value) { | ||
if (Array.isArray(value)) { | ||
return value.map((bigram) => normalize(bigram)) | ||
} | ||
const normal = normalize(value) | ||
return normal.length === 1 ? [normal] : bigram(normal) | ||
} | ||
/** | ||
* @param {string} value | ||
* @returns {string} | ||
*/ | ||
function normalize(value) { | ||
return String(value).toLowerCase() | ||
} |
{ | ||
"name": "dice-coefficient", | ||
"version": "2.1.0", | ||
"version": "2.1.1", | ||
"description": "Sørensen–Dice coefficient", | ||
@@ -43,19 +43,17 @@ "license": "MIT", | ||
"devDependencies": { | ||
"@types/tape": "^4.0.0", | ||
"@types/node": "^18.0.0", | ||
"c8": "^7.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^10.0.0", | ||
"remark-cli": "^11.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"rimraf": "^3.0.0", | ||
"tape": "^5.0.0", | ||
"type-coverage": "^2.0.0", | ||
"typescript": "^4.0.0", | ||
"xo": "^0.46.0" | ||
"xo": "^0.52.0" | ||
}, | ||
"scripts": { | ||
"prepack": "npm run build && npm run format", | ||
"build": "rimraf \"*.d.ts\" && tsc && type-coverage", | ||
"build": "tsc --build --clean && tsc --build && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node test.js", | ||
"test-api": "node --conditions development test.js", | ||
"test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
@@ -62,0 +60,0 @@ }, |
125
readme.md
@@ -10,9 +10,34 @@ # dice-coefficient | ||
## Contents | ||
* [What is this?](#what-is-this) | ||
* [When should I use this?](#when-should-i-use-this) | ||
* [Install](#install) | ||
* [Use](#use) | ||
* [API](#api) | ||
* [`diceCoefficient(value, other)`](#dicecoefficientvalue-other) | ||
* [CLI](#cli) | ||
* [Types](#types) | ||
* [Compatibility](#compatibility) | ||
* [Related](#related) | ||
* [Contribute](#contribute) | ||
* [Security](#security) | ||
* [License](#license) | ||
## What is this? | ||
This package exposes a string similarity algorithm. | ||
That means it gets two strings (typically words), and turns it into a number | ||
between `0` (completely different) and `1` (exactly the same). | ||
## When should I use this? | ||
You’re probably dealing with natural language, and know you need this, if | ||
you’re here! | ||
## Install | ||
This package is ESM only: Node 12+ is needed to use it and it must be `import`ed | ||
instead of `require`d. | ||
This package is [ESM only][esm]. | ||
In Node.js (version 14.14+, 16.0+), install with [npm][]: | ||
[npm][]: | ||
```sh | ||
@@ -22,7 +47,18 @@ npm install dice-coefficient | ||
## API | ||
In Deno with [`esm.sh`][esmsh]: | ||
This package exports the following identifiers: `diceCoefficient`. | ||
There is no default export. | ||
```js | ||
import {diceCoefficient} from 'https://esm.sh/dice-coefficient@2' | ||
``` | ||
In browsers with [`esm.sh`][esmsh]: | ||
```html | ||
<script type="module"> | ||
import {diceCoefficient} from 'https://esm.sh/dice-coefficient@2?bundle' | ||
</script> | ||
``` | ||
## Use | ||
```js | ||
@@ -37,22 +73,29 @@ import {diceCoefficient} from 'dice-coefficient' | ||
Instead of strings you can also pass lists of bigrams. | ||
This can improve performance when processing the same strings repeatedly. | ||
## API | ||
```js | ||
diceCoefficient(['ab', 'bc'], ['xy', 'yz']) // => 0 | ||
diceCoefficient(['ab', 'bc'], ['ab', 'bc']) // => 1 | ||
diceCoefficient(['ab', 'bc'], ['AB', 'BC']) // => 1 | ||
``` | ||
This package exports the identifier `diceCoefficient`. | ||
There is no default export. | ||
See [`n-gram`](https://github.com/words/n-gram) for generating bigrams. | ||
### `diceCoefficient(value, other)` | ||
```js | ||
import {bigram} from 'n-gram' | ||
Get the difference according to Sørensen–Dice. | ||
const abc = bigram('abc') // => ['ab', 'bc'] | ||
const xyz = bigram('xyz') // => ['xy', 'yz'] | ||
> 👉 **Note**: you can pass bigrams (from [`n-gram`][n-gram]) too, which will | ||
> improve performance when you are comparing the same values multiple times. | ||
diceCoefficient(abc, xyz) // => 0 | ||
``` | ||
###### `value` | ||
Primary value (`string`, `Array<String>`, required). | ||
###### `other` | ||
Other value (`string`, `Array<String>`, required). | ||
##### Returns | ||
Difference (`number`). | ||
The result is normalized to a number between `0` (completely different) | ||
and `1` (exactly the same). | ||
## CLI | ||
@@ -81,15 +124,35 @@ | ||
## Types | ||
This package is fully typed with [TypeScript][]. | ||
It exports no additional types. | ||
## Compatibility | ||
This package is at least compatible with all maintained versions of Node.js. | ||
As of now, that is Node.js 14.14+ and 16.0+. | ||
It also works in Deno and modern browsers. | ||
## Related | ||
* [`levenshtein-edit-distance`](https://github.com/words/levenshtein-edit-distance) | ||
— Levenshtein edit distance | ||
— levenshtein edit distance | ||
* [`lancaster-stemmer`](https://github.com/words/lancaster-stemmer) | ||
— Lancaster stemming algorithm | ||
— lancaster stemming algorithm | ||
* [`double-metaphone`](https://github.com/words/double-metaphone) | ||
— Double Metaphone implementation | ||
— double metaphone algorithm | ||
* [`soundex-code`](https://github.com/words/soundex-code) | ||
— Fast Soundex implementation | ||
— soundex algorithm | ||
* [`syllable`](https://github.com/words/syllable) | ||
— Syllable count in an English word | ||
— syllable count of English words | ||
## Contribute | ||
Yes please! | ||
See [How to Contribute to Open Source][contribute]. | ||
## Security | ||
This package is safe. | ||
## License | ||
@@ -119,2 +182,10 @@ | ||
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c | ||
[esmsh]: https://esm.sh | ||
[typescript]: https://www.typescriptlang.org | ||
[contribute]: https://opensource.guide/how-to-contribute/ | ||
[license]: license | ||
@@ -125,1 +196,3 @@ | ||
[wiki]: https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient | ||
[n-gram]: https://github.com/words/n-gram |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
11376
8
134
193