Socket
Socket
Sign inDemoInstall

node-nlp

Package Overview
Dependencies
Maintainers
1
Versions
161
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-nlp - npm Package Compare versions

Comparing version 1.2.3 to 1.2.4

coverage/lcov-report/lib/ner/enum-named-entity.js.html

6

.vscode/launch.json

@@ -12,10 +12,4 @@ {

"program": "${workspaceFolder}\\toto.js"
},
{
"type": "node",
"request": "launch",
"name": "Console bot",
"program": "${workspaceFolder}\\examples\\console-bot\\index.js"
}
]
}

@@ -0,1 +1,24 @@

/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const readline = require('readline');

@@ -2,0 +25,0 @@ const { NlpManager } = require('../../lib');

@@ -0,1 +1,24 @@

/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const fs = require('fs');

@@ -2,0 +25,0 @@

@@ -29,9 +29,13 @@ /*

const {
NerManager,
NlpClassifier,
NlpManager,
NlpUtil,
NamedEntity,
NlpExcelReader,
} = require('./nlp');
const {
NerManager,
NamedEntity,
EnumNamedEntity,
RegexNamedEntity,
} = require('./ner');
const { SentimentAnalyzer, SentimentManager } = require('./sentiment');

@@ -56,2 +60,4 @@ const { Evaluator, SimilarSearch } = require('./util');

NamedEntity,
EnumNamedEntity,
RegexNamedEntity,
SentimentAnalyzer,

@@ -58,0 +64,0 @@ SentimentManager,

2

lib/language/language.js

@@ -80,2 +80,3 @@ /*

* @param {Number} limit Limit of results.
* @returns {Object[]} Array of guesses.
*/

@@ -115,2 +116,3 @@ guess(utterance, whitelist, limit) {

* @param {String[]} whitelist Whitelist of accepted languages.
* @return {Object} Best guess.
*/

@@ -117,0 +119,0 @@ guessBest(utterance, whitelist) {

@@ -48,2 +48,3 @@ /*

* @param {Matrix} observations Observations.
* @returns {Matrix} Hypothesis result.
*/

@@ -54,2 +55,10 @@ static hypothesis(theta, observations) {

/**
* Cost function
* @param {Matrix} theta Theta matrix.
* @param {Matrix} observations Observations.
* @param {Matrix} classifications Classification matrix.
* @param {Matrix} srcHypothesis Hypothesis. If not provided is calculated.
* @return {number} Calculated cost based on the hypothesis.
*/
static cost(theta, observations, classifications, srcHypothesis) {

@@ -65,2 +74,9 @@ const hypothesis = srcHypothesis || Mathops.hypothesis(theta, observations);

/**
* Descend the gradient based on the cost function.
* @param {Matrix} srcTheta Theta matrix.
* @param {Vector} srcExamples Examples.
* @param {Matrix} classifications Classification matrix.
* @param {Object} srcOptions Settings for the descend.
*/
static descendGradient(srcTheta, srcExamples, classifications, srcOptions) {

@@ -110,2 +126,7 @@ const options = srcOptions || {};

/**
* Return a vector representing x.
* @param {Number[]} x Input array.
* @returns {Vector} Vector representing x.
*/
static asVector(x) {

@@ -115,2 +136,7 @@ return new Vector(x);

/**
* Returns a matrix representing x.
* @param {Number[][]} x Input array.
* @return {Matrix} Matrix representing x.
*/
static asMatrix(x) {

@@ -120,2 +146,6 @@ return new Matrix(x);

/**
* Function returning 0.
* @returns {number} Returns 0.
*/
static zero() {

@@ -125,2 +155,7 @@ return 0;

/**
* Compute the thetas of the examples and classifications.
* @param {Vector} srcExamples Vector of examples.
* @param {Matrix} srcClassifications Matrix of classifications.
*/
static computeThetas(srcExamples, srcClassifications) {

@@ -127,0 +162,0 @@ const examples = Mathops.asMatrix(srcExamples);

@@ -258,2 +258,9 @@ /*

/**
* Given a matrix an an operation function, applies this operation
* multiplying this matrix with the provided one.
* @param {Matrix} matrix Matrix to operate with this.
* @param {Function} op Multiply operation.
* @returns {Matrix} Result matrix of multiplying both matrix.
*/
mulOp(matrix, op) {

@@ -288,2 +295,4 @@ if (typeof (matrix) === 'number') {

* @param {Matrix} matrix Input matrix.
* @param {Function} activation Optional activation fucntion.
* @returns {Matrix} Multiplication of the matrix.
*/

@@ -290,0 +299,0 @@ multiply(matrix, activation) {

@@ -24,16 +24,12 @@ /*

const NerManager = require('./ner-manager');
const NlpUtil = require('./nlp-util');
const NlpClassifier = require('./nlp-classifier');
const NlpManager = require('./nlp-manager');
const NamedEntity = require('./named-entity');
const NlpExcelReader = require('./nlp-excel-reader');
module.exports = {
NerManager,
NlpUtil,
NlpClassifier,
NlpManager,
NamedEntity,
NlpExcelReader,
};

@@ -37,2 +37,3 @@ /*

this.loadNamedEntities();
this.loadRegexEntities();
this.loadIntents();

@@ -59,2 +60,12 @@ this.loadResponses();

loadRegexEntities() {
const table = this.xdoc.getTable('Regex Entities');
if (table) {
table.data.forEach((row) => {
const languages = row.language.split(',').map(x => x.trim());
this.manager.addRegexEntity(row.entity, languages, row.regex);
});
}
}
loadIntents() {

@@ -61,0 +72,0 @@ this.xdoc.getTable('Intents').data.forEach((row) => {

@@ -27,4 +27,3 @@ /*

const { Language } = require('../language');
const NerManager = require('./ner-manager');
const NamedEntity = require('./named-entity');
const { NerManager } = require('../ner');
const { SentimentManager } = require('../sentiment');

@@ -120,4 +119,10 @@ const NlpUtil = require('./nlp-util');

*/
addRegexEntity(entityName, regex) {
return this.nerManager.addNamedEntity(entityName, regex);
addRegexEntity(entityName, languages, regex) {
const entity = this.nerManager.addNamedEntity(entityName, 'regex');
if (typeof regex === 'string') {
entity.addStrRegex(languages, regex);
} else {
entity.addRegex(languages, regex);
}
return entity;
}

@@ -315,2 +320,3 @@

languageGuessed = true;
throw new Error(locale);
}

@@ -401,5 +407,3 @@ const truncated = NlpUtil.getTruncatedLocale(locale);

clone.intentEntities = this.intentEntities;
clone.ner = {};
clone.ner.namedEntities = this.nerManager.namedEntities;
clone.ner.threshold = this.nerManager.threshold;
clone.nerManager = this.nerManager.save();
clone.classifiers = [];

@@ -438,19 +442,3 @@ clone.responses = this.nlgManager.responses;

this.languages = clone.languages;
const keys = Object.keys(clone.ner.namedEntities);
this.nerManager.namedEntities = {};
for (let i = 0; i < keys.length; i += 1) {
const key = keys[i];
const srcNamedEntity = clone.ner.namedEntities[key];
let namedEntity;
if (srcNamedEntity.regex) {
namedEntity = new NamedEntity(key, srcNamedEntity.regex);
} else {
namedEntity = new NamedEntity(key);
namedEntity.options = srcNamedEntity.options;
}
namedEntity.settings = srcNamedEntity.settings;
this.nerManager.namedEntities[key] = namedEntity;
}
this.nerManager.threshold = clone.ner.threshold;
this.nerManager.load(clone.nerManager);
this.intentEntities = clone.intentEntities;

@@ -457,0 +445,0 @@ this.nlgManager.responses = clone.responses;

@@ -101,3 +101,3 @@ /*

return Natural.PorterStemmerNl;
case 'id': return Natural.PorterStemmerId; // Indonesian
case 'id': return Natural.StemmerId; // Indonesian
case 'ja': return new Natural.StemmerJa(); // Japanese

@@ -140,2 +140,31 @@ case 'da': return new DanishStemmer(NlpUtil.getTokenizer(locale)); // Danish

}
static getCulture(locale) {
switch (locale) {
case 'en': return 'en-us'; // English
case 'fa': return 'fa-ir'; // Farsi
case 'fr': return 'fr-fr'; // French
case 'ru': return 'ru-ru'; // Russian
case 'es': return 'es-es'; // Spanish
case 'it': return 'it-it'; // Italian
case 'nl': return 'nl-nl'; // Dutch
case 'no': return 'no-no'; // Norwegian
case 'pt': return 'pt-br'; // Portuguese
case 'pl': return 'pl-pl'; // Polish
case 'sv': return 'sv-se'; // Swedish
case 'id': return 'id-id'; // Indonesian
case 'ja': return 'ja-jp'; // Japanese
case 'da': return 'da-dk'; // Danish
case 'fi': return 'fi-fi'; // Finnish
case 'de': return 'de-de'; // German
case 'hu': return 'hu-hu'; // Hungarian
case 'ro': return 'ro-ro'; // Romanian
case 'tr': return 'tr-tr'; // Turkish
case 'zh': return 'zh-cn'; // Chinese
default: return 'en-us';
}
}
}

@@ -142,0 +171,0 @@

@@ -130,3 +130,3 @@ /*

if (!atWhiteSpace) {
result.push({ start: lastIndex, end: currentIndex, len: currentIndex - lastIndex });
result.push({ start: lastIndex, end: currentIndex - 1, len: currentIndex - lastIndex });
atWhiteSpace = true;

@@ -141,3 +141,3 @@ }

if (!atWhiteSpace) {
result.push({ start: lastIndex, end: currentIndex, len: currentIndex - lastIndex });
result.push({ start: lastIndex, end: currentIndex - 1, len: currentIndex - lastIndex });
}

@@ -164,3 +164,4 @@ return result;

start: 0,
end: str1len,
end: str1len - 1,
len: str1len,
levenshtein: this.getSimilarity(str1, str2),

@@ -176,2 +177,3 @@ };

end: 0,
len: 0,
levenshtein: undefined,

@@ -182,3 +184,3 @@ accuracy: 0,

for (let j = i; j < wordPositionsLen; j += 1) {
const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end);
const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end + 1);
const levenshtein = this.getSimilarity(str3, str2);

@@ -189,2 +191,3 @@ if (best.levenshtein === undefined || levenshtein < best.levenshtein) {

best.end = wordPositions[j].end;
best.len = (best.end - best.start) + 1;
}

@@ -198,2 +201,107 @@ }

/**
* Given two strings, search all the occurences of the second inside the first,
* where the accuracy is at least as good as the threshold.
* @param {String} str1 First string.
* @param {String} str2 Second string.
* @param {Object[]} words1 Array of positions of the words of the first string.
* If not provided this will be built.
* @returns {Object[]} List of occurences.
*/
getBestSubstringList(str1, str2, words1, threshold = 1) {
const str1len = str1.length;
const str2len = str2.length;
const result = [];
if (str1len <= str2len) {
const levenshtein = this.getSimilarity(str1, str2);
const accuracy = (str2len - levenshtein) / str2len;
if (accuracy >= threshold) {
result.push({
start: 0,
end: str1len - 1,
len: str1len,
levenshtein,
accuracy,
});
}
return result;
}
const wordPositions = words1 || this.getWordPositions(str1);
const wordPositionsLen = wordPositions.length;
for (let i = 0; i < wordPositionsLen; i += 1) {
for (let j = i; j < wordPositionsLen; j += 1) {
const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end + 1);
const levenshtein = this.getSimilarity(str3, str2);
const accuracy = (str2len - levenshtein) / str2len;
if (accuracy >= threshold) {
result.push({
start: wordPositions[i].start,
end: wordPositions[j].end,
len: (wordPositions[j].end - wordPositions[i].start) + 1,
levenshtein,
accuracy,
});
}
}
}
return result;
}
reduceEdges(edges) {
for (let i = 0, l = edges.length; i < l; i += 1) {
const edge = edges[i];
if (!edge.discarded) {
for (let j = i + 1; j < l; j += 1) {
const other = edges[j];
if (!other.discarded) {
if (other.start <= edge.end && other.end >= edge.start) {
if (other.accuracy < edge.accuracy) {
other.discarded = true;
} else if (other.accuracy > edge.accuracy) {
edge.discarded = true;
} else if (other.len <= edge.len) {
other.discarded = true;
} else {
edge.discarded = true;
}
}
}
}
}
}
const result = [];
for (let i = 0, l = edges.length; i < l; i += 1) {
if (!edges[i].discarded) {
result.push(edges[i]);
}
}
return result;
}
getEdgesFromEntity(str, entity, language, entityName, threshold = 1, srcWordPositions) {
const wordPositions = srcWordPositions || this.getWordPositions(str);
const locale = entity.getLocaleRules ? entity.getLocaleRules(language) : entity[language];
const result = [];
if (!locale) {
return result;
}
const optionKeys = Object.keys(locale);
for (let i = 0, li = optionKeys.length; i < li; i += 1) {
const optionName = optionKeys[i];
const texts = locale[optionName];
for (let j = 0, lj = texts.length; j < lj; j += 1) {
const current = this.getBestSubstringList(str, texts[j], wordPositions, threshold);
for (let k = 0, lk = current.length; k < lk; k += 1) {
const item = current[k];
item.option = optionName;
item.sourceText = texts[j];
item.entity = entityName || entity.name;
item.utteranceText = str.substring(item.start, item.end + 1);
result.push(item);
}
}
}
return this.reduceEdges(result);
}
/**
* Given an utterance and an array of entities with options, search the

@@ -206,32 +314,24 @@ * best option for each entity and return the results.

*/
getBestEntity(str, entities, locale, whitelist) {
getEdgesFromEntities(str, entities, language, whitelist, threshold = 1) {
const result = [];
const wordPositions = this.getWordPositions(str);
const entityKeys = Object.keys(entities);
for (let i = 0; i < entityKeys.length; i += 1) {
const entity = entities[entityKeys[i]];
if (!whitelist || whitelist.indexOf(entity.name) > -1) {
let best;
for (let j = 0; j < entity.options.length; j += 1) {
const option = entity.options[j];
if (option.texts[locale]) {
const texts = option.texts[locale];
for (let k = 0; k < texts.length; k += 1) {
const current = this.getBestSubstring(str, texts[k], wordPositions);
if (best === undefined || current.levenshtein < best.levenshtein) {
best = current;
best.option = option.name;
best.sourceText = texts[k];
best.entity = entity.name;
best.utteranceText = str.substring(best.start, best.end);
}
}
}
}
if (best) {
result.push(best);
}
for (let i = 0, l = entityKeys.length; i < l; i += 1) {
const entityName = entityKeys[i];
if (!whitelist || whitelist.indexOf(entityName) !== -1) {
const edges = this.getEdgesFromEntity(
str,
entities[entityName],
language,
entityName,
threshold,
wordPositions,
);
edges.forEach((srcEdge) => {
const edge = srcEdge;
result.push(edge);
});
}
}
return result;
return this.reduceEdges(result);
}

@@ -238,0 +338,0 @@ }

@@ -68,2 +68,5 @@ /*

const row = block[index];
if (!row) {
return true;
}
for (let i = 0; i < row.length; i += 1) {

@@ -70,0 +73,0 @@ if (row[i]) {

{
"name": "node-nlp",
"version": "1.2.3",
"version": "1.2.4",
"description": "Library for NLU (Natural Language Understanding) done in Node.js",

@@ -40,2 +40,3 @@ "main": "lib/index.js",

"dependencies": {
"@microsoft/recognizers-text-suite": "^1.0.1",
"escodegen": "^1.10.0",

@@ -59,3 +60,5 @@ "esprima": "^4.0.0",

"logistic regression",
"Natural"
"Natural",
"entity extraction",
"named entity recognition"
],

@@ -62,0 +65,0 @@ "config": {

@@ -51,9 +51,4 @@ /*

expect(food.type).toEqual('enum');
expect(hero.options[0].name).toEqual('spiderman');
expect(hero.options[1].name).toEqual('ironman');
expect(hero.options[2].name).toEqual('hulk');
expect(hero.options[3].name).toEqual('thor');
expect(food.options[0].name).toEqual('burguer');
expect(food.options[1].name).toEqual('pizza');
expect(food.options[2].name).toEqual('pasta');
expect(hero.locales.en).toBeDefined();
expect(hero.locales.es).toBeDefined();
});

@@ -60,0 +55,0 @@ test('It should create the classifiers for the languages', () => {

@@ -147,4 +147,4 @@ /*

manager.removeNamedEntityText('hero', 'iron man', 'en', 'iron-man');
const ironman = manager.nerManager.getNamedEntityOption('hero', 'iron man');
expect(ironman.texts.en).toHaveLength(1);
const ironman = manager.nerManager.getNamedEntity('hero', false);
expect(ironman.locales.en['iron man']).toEqual(['iron man']);
});

@@ -361,3 +361,3 @@

test('Should search for entities', () => {
const manager = new NlpManager();
const manager = new NlpManager({ ner: { builtins: [] } });
manager.addLanguage(['en']);

@@ -382,3 +382,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);

test('Should search for entities if the language is specified', () => {
const manager = new NlpManager();
const manager = new NlpManager({ ner: { builtins: [] } });
manager.addLanguage(['en']);

@@ -535,3 +535,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);

test('Should allow to save, load and all should be working', () => {
let manager = new NlpManager();
let manager = new NlpManager({ ner: { builtins: [] } });
manager.addLanguage(['en']);

@@ -544,3 +544,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);

manager.addNamedEntityText('food', 'pasta', ['en'], ['Pasta', 'spaghetti']);
manager.addRegexEntity('mail', /\b(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,3})\b/ig);
manager.addRegexEntity('mail', 'en', /\b(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,3})\b/ig);
manager.addDocument('en', 'I saw %hero% eating %food%', 'sawhero');

@@ -591,10 +591,14 @@ manager.addDocument('en', 'I have seen %hero%, he was eating %food%', 'sawhero');

expect(food.type).toEqual('enum');
expect(hero.options[0].name).toEqual('spiderman');
expect(hero.options[1].name).toEqual('ironman');
expect(hero.options[2].name).toEqual('hulk');
expect(hero.options[3].name).toEqual('thor');
expect(food.options[0].name).toEqual('burguer');
expect(food.options[1].name).toEqual('pizza');
expect(food.options[2].name).toEqual('pasta');
expect(food.locales.en).toEqual({
burguer: ['burguer', 'hamburguer'],
pasta: ['pasta', 'spaghetti'],
pizza: ['pizza'],
});
expect(food.locales.es).toEqual({
burguer: ['hamburguesa'],
pasta: ['pasta', 'spaghetti'],
pizza: ['pizza'],
});
});
test('It should create the classifiers for the languages', () => {

@@ -601,0 +605,0 @@ const manager = new NlpManager();

@@ -53,3 +53,3 @@ /*

expect(NlpUtil.getStemmer('nl')).toBe(Natural.PorterStemmerNl); // Dutch
expect(NlpUtil.getStemmer('id')).toBe(Natural.PorterStemmerId); // Indonesian
expect(NlpUtil.getStemmer('id')).toBe(Natural.StemmerId); // Indonesian
expect(NlpUtil.getStemmer('ja')).toBeInstanceOf(Natural.StemmerJa); // Japanese

@@ -98,3 +98,3 @@ expect(NlpUtil.getStemmer('da').constructor.name).toEqual('DanishStemmer'); // Danish

expect(NlpUtil.getStemmer('nl').constructor.name).toEqual('DutchStemmer'); // Dutch
expect(NlpUtil.getStemmer('id')).toBe(Natural.PorterStemmerId); // Indonesian
expect(NlpUtil.getStemmer('id')).toBe(Natural.StemmerId); // Indonesian
expect(NlpUtil.getStemmer('ja')).toBeInstanceOf(Natural.StemmerJa); // Japanese

@@ -157,2 +157,30 @@ expect(NlpUtil.getStemmer('da').constructor.name).toEqual('DanishStemmer'); // Danish

});
describe('Get culture', () => {
test('Should return correct culture for the locale', () => {
expect(NlpUtil.getCulture('en')).toEqual('en-us'); // english
expect(NlpUtil.getCulture('fa')).toEqual('fa-ir'); // farsi
expect(NlpUtil.getCulture('fr')).toEqual('fr-fr'); // french
expect(NlpUtil.getCulture('ru')).toEqual('ru-ru'); // russian
expect(NlpUtil.getCulture('es')).toEqual('es-es'); // spanish
expect(NlpUtil.getCulture('it')).toEqual('it-it'); // italian
expect(NlpUtil.getCulture('nl')).toEqual('nl-nl'); // dutch
expect(NlpUtil.getCulture('no')).toEqual('no-no'); // norwegian
expect(NlpUtil.getCulture('pt')).toEqual('pt-br'); // portuguese
expect(NlpUtil.getCulture('pl')).toEqual('pl-pl'); // polish
expect(NlpUtil.getCulture('sv')).toEqual('sv-se'); // swedish
expect(NlpUtil.getCulture('id')).toEqual('id-id'); // indonesian
expect(NlpUtil.getCulture('ja')).toEqual('ja-jp'); // japanese
expect(NlpUtil.getCulture('da')).toEqual('da-dk'); // danish
expect(NlpUtil.getCulture('fi')).toEqual('fi-fi'); // finnish
expect(NlpUtil.getCulture('de')).toEqual('de-de'); // german
expect(NlpUtil.getCulture('hu')).toEqual('hu-hu'); // hungarian
expect(NlpUtil.getCulture('ro')).toEqual('ro-ro'); // romanian
expect(NlpUtil.getCulture('tr')).toEqual('tr-tr'); // turkish
expect(NlpUtil.getCulture('zh')).toEqual('zh-cn'); // Chinese
});
test('If the locale is not recognized return default english', () => {
expect(NlpUtil.getCulture('aa')).toEqual('en-us'); // english
});
});
});

@@ -115,3 +115,3 @@ /*

expect(result).toHaveLength(1);
expect(result[0]).toEqual({ start: 0, end: 5, len: 5 });
expect(result[0]).toEqual({ start: 0, end: 4, len: 5 });
});

@@ -124,3 +124,3 @@ test('Should get position of only one word even if surrounded by non alphanumeric chars', () => {

expect(result).toHaveLength(1);
expect(result[0]).toEqual({ start: 5, end: 10, len: 5 });
expect(result[0]).toEqual({ start: 5, end: 9, len: 5 });
});

@@ -132,5 +132,5 @@ test('Should get position of several words', () => {

expect(result).toHaveLength(3);
expect(result[0]).toEqual({ start: 2, end: 7, len: 5 });
expect(result[1]).toEqual({ start: 11, end: 19, len: 8 });
expect(result[2]).toEqual({ start: 23, end: 32, len: 9 });
expect(result[0]).toEqual({ start: 2, end: 6, len: 5 });
expect(result[1]).toEqual({ start: 11, end: 18, len: 8 });
expect(result[2]).toEqual({ start: 23, end: 31, len: 9 });
});

@@ -142,4 +142,4 @@ test('Should get position of words on long texts', () => {

expect(result).toHaveLength(26);
expect(result[0]).toEqual({ start: 0, end: 5, len: 5 });
expect(result[25]).toEqual({ start: 188, end: 194, len: 6 });
expect(result[0]).toEqual({ start: 0, end: 4, len: 5 });
expect(result[25]).toEqual({ start: 188, end: 193, len: 6 });
});

@@ -157,3 +157,4 @@ });

start: 6,
end: 24,
end: 23,
len: 18,
levenshtein: 0,

@@ -171,3 +172,4 @@ accuracy: 1,

start: 6,
end: 30,
end: 29,
len: 24,
levenshtein: 2,

@@ -185,3 +187,4 @@ accuracy: 0.9166666666666666,

start: 0,
end: 16,
end: 15,
len: 16,
levenshtein: 8,

@@ -192,60 +195,477 @@ accuracy: 0.6666666666666666,

});
describe('Get best entity', () => {
test('', () => {
describe('Reduce edges', () => {
test('It should do nothing if edges are empty', () => {
const similar = new SimilarSearch();
const edges = [];
const result = similar.reduceEdges(edges);
expect(result).toEqual([]);
});
test('If two edges collide, only the best accuracy remains', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 1,
end: 10,
len: 9,
accuracy: 0.9,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
]);
});
test('Edges can overlap in the left', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 0,
end: 9,
len: 9,
accuracy: 0.9,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
]);
});
test('Edges can overlap in the right', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 2,
end: 11,
len: 9,
accuracy: 0.9,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
]);
});
test('One edge can contain other', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 0,
end: 11,
len: 11,
accuracy: 0.9,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
]);
});
test('If both have same accuracy, return largest one', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
]);
});
test('If both have same accuracy, return largest one even if goes first', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
]);
});
test('If there are more than 2 edges overlaped, decide 1', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 9,
end: 18,
len: 9,
accuracy: 1,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
]);
});
test('Should respect non overlaped edges', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 12,
end: 20,
len: 8,
accuracy: 1,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 12,
end: 20,
len: 8,
accuracy: 1,
},
]);
});
test('When there are different groups of overlaped edges, return one per group', () => {
const similar = new SimilarSearch();
const edges = [
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 12,
end: 20,
len: 8,
accuracy: 1,
},
{
start: 1,
end: 10,
len: 9,
accuracy: 1,
},
{
start: 12,
end: 21,
len: 9,
accuracy: 1,
},
];
const result = similar.reduceEdges(edges);
expect(result).toEqual([
{
start: 0,
end: 11,
len: 11,
accuracy: 1,
},
{
start: 12,
end: 21,
len: 9,
accuracy: 1,
},
]);
});
});
describe('Get best substring list', () => {
test('If not threshold is defined, then search for exact occurences', () => {
const similar = new SimilarSearch();
const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
const text2 = 'interdum ultricies';
const result = similar.getBestSubstringList(text1, text2);
expect(result).toHaveLength(1);
expect(result[0]).toEqual({
start: 6,
end: 23,
len: 18,
levenshtein: 0,
accuracy: 1,
});
});
test('If there are more than 1 occurence search exact, should return all', () => {
const similar = new SimilarSearch();
const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
const text2 = 'interdum';
const result = similar.getBestSubstringList(text1, text2);
expect(result).toHaveLength(2);
expect(result[0]).toEqual({
start: 6,
end: 13,
len: 8,
levenshtein: 0,
accuracy: 1,
});
expect(result[1]).toEqual({
start: 73,
end: 80,
len: 8,
levenshtein: 0,
accuracy: 1,
});
});
test('Should get more than 1 occurence when searching with threshold', () => {
const similar = new SimilarSearch();
const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
const text2 = 'internum';
const result = similar.getBestSubstringList(text1, text2, undefined, 0.8);
expect(result).toHaveLength(2);
expect(result[0]).toEqual({
start: 6,
end: 13,
len: 8,
levenshtein: 1,
accuracy: 0.875,
});
expect(result[1]).toEqual({
start: 73,
end: 80,
len: 8,
levenshtein: 1,
accuracy: 0.875,
});
});
test('Should return 0 to length element in array when the substring is longer than the string and accuracy is at least threshold', () => {
const similar = new SimilarSearch();
const text1 = 'dumaultriciesbne';
const text2 = 'interdumaultriciesbneque';
const result = similar.getBestSubstringList(text1, text2, undefined, 0.6);
expect(result).toBeDefined();
expect(result).toEqual([{
start: 0,
end: 15,
len: 16,
levenshtein: 8,
accuracy: 0.6666666666666666,
}]);
});
test('Should return empty array when the substring is longer than the string and accuracy is lower than threshold', () => {
const similar = new SimilarSearch();
const text1 = 'dumaultriciesbne';
const text2 = 'interdumaultriciesbneque';
const result = similar.getBestSubstringList(text1, text2, undefined, 0.7);
expect(result).toBeDefined();
expect(result).toEqual([]);
});
});
describe('Get edges from entity', () => {
test('It should get the edges from an utterance', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spederman eating spaghetti in the city';
const entity = {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
};
const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en', 'entity', 0.8);
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(1);
expect(bestEntity[0].start).toEqual(6);
expect(bestEntity[0].end).toEqual(14);
expect(bestEntity[0].levenshtein).toEqual(1);
expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);
expect(bestEntity[0].option).toEqual('spiderman');
expect(bestEntity[0].sourceText).toEqual('Spiderman');
expect(bestEntity[0].utteranceText).toEqual('spederman');
});
test('It no threshold is provided, then is 1', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spiderman eating iron-men in the city';
const entity = {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
};
const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en');
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(1);
expect(bestEntity[0].start).toEqual(6);
expect(bestEntity[0].end).toEqual(14);
expect(bestEntity[0].levenshtein).toEqual(0);
expect(bestEntity[0].accuracy).toEqual(1);
expect(bestEntity[0].option).toEqual('spiderman');
expect(bestEntity[0].sourceText).toEqual('Spiderman');
expect(bestEntity[0].utteranceText).toEqual('spiderman');
});
test('It can return several occurances of options', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spiderman eating iron-men in the city spederman';
const entity = {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
};
const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en', 'entity', 0.8);
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(3);
expect(bestEntity[0].start).toEqual(6);
expect(bestEntity[0].end).toEqual(14);
expect(bestEntity[0].levenshtein).toEqual(0);
expect(bestEntity[0].accuracy).toEqual(1);
expect(bestEntity[0].option).toEqual('spiderman');
expect(bestEntity[0].sourceText).toEqual('Spiderman');
expect(bestEntity[0].utteranceText).toEqual('spiderman');
expect(bestEntity[1].start).toEqual(44);
expect(bestEntity[1].end).toEqual(52);
expect(bestEntity[1].levenshtein).toEqual(1);
expect(bestEntity[1].accuracy).toEqual(0.8888888888888888);
expect(bestEntity[1].option).toEqual('spiderman');
expect(bestEntity[1].sourceText).toEqual('Spiderman');
expect(bestEntity[1].utteranceText).toEqual('spederman');
expect(bestEntity[2].start).toEqual(23);
expect(bestEntity[2].end).toEqual(30);
expect(bestEntity[2].levenshtein).toEqual(1);
expect(bestEntity[2].accuracy).toEqual(0.875);
expect(bestEntity[2].option).toEqual('iron man');
expect(bestEntity[2].sourceText).toEqual('iron-man');
expect(bestEntity[2].utteranceText).toEqual('iron-men');
});
test('If locale does not exists return empty array', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spiderman eating iron-men in the city spederman';
const entity = {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
};
const bestEntity = similar.getEdgesFromEntity(text1, entity, 'es', 0.8);
expect(bestEntity).toEqual([]);
});
});
describe('Get edges from entities', () => {
test('It should get the edges from an utterance', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spederman eating spaghetti in the city';
const entities = {
hero: {
name: 'hero',
options: [
{
name: 'spiderman',
texts: {
en: ['Spiderman', 'Spider-man'],
},
},
{
name: 'iron man',
texts: {
en: ['iron man', 'iron-man'],
},
},
{
name: 'thor',
texts: {
en: ['Thor'],
},
},
],
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
},
food: {
name: 'food',
options: [
{
name: 'burguer',
texts: {
en: ['Burguer', 'Hamburguer'],
},
},
{
name: 'pizza',
texts: {
en: ['pizza'],
},
},
{
name: 'pasta',
texts: {
en: ['Pasta', 'spaghetti'],
},
},
],
en: {
burguer: ['Burguer', 'Hamburguer'],
pizza: ['pizza'],
pasta: ['Pasta', 'spaguetti', 'spaghetti'],
},
},
};
const bestEntity = similar.getBestEntity(text1, entities, 'en');
const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en', undefined, 0.8);
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(2);
expect(bestEntity[0].start).toEqual(6);
expect(bestEntity[0].end).toEqual(15);
expect(bestEntity[0].end).toEqual(14);
expect(bestEntity[0].levenshtein).toEqual(1);

@@ -258,3 +678,3 @@ expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);

expect(bestEntity[1].start).toEqual(23);
expect(bestEntity[1].end).toEqual(32);
expect(bestEntity[1].end).toEqual(31);
expect(bestEntity[1].levenshtein).toEqual(0);

@@ -267,3 +687,65 @@ expect(bestEntity[1].accuracy).toEqual(1);

});
test('It no threshold is provided then is 1', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spederman eating spaghetti in the city';
const entities = {
hero: {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
},
food: {
en: {
burguer: ['Burguer', 'Hamburguer'],
pizza: ['pizza'],
pasta: ['Pasta', 'spaguetti', 'spaghetti'],
},
},
};
const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en');
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(1);
expect(bestEntity[0].start).toEqual(23);
expect(bestEntity[0].end).toEqual(31);
expect(bestEntity[0].levenshtein).toEqual(0);
expect(bestEntity[0].accuracy).toEqual(1);
expect(bestEntity[0].option).toEqual('pasta');
expect(bestEntity[0].sourceText).toEqual('spaghetti');
expect(bestEntity[0].entity).toEqual('food');
expect(bestEntity[0].utteranceText).toEqual('spaghetti');
});
test('If whitelist of entities is provided, check only those entities', () => {
const similar = new SimilarSearch({ normalize: true });
const text1 = 'I saw spederman eating spaghetti in the city';
const entities = {
hero: {
en: {
spiderman: ['Spiderman', 'Spider-man'],
'iron man': ['iron man', 'iron-man'],
thor: ['Thor'],
},
},
food: {
en: {
burguer: ['Burguer', 'Hamburguer'],
pizza: ['pizza'],
pasta: ['Pasta', 'spaguetti', 'spaghetti'],
},
},
};
const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en', ['hero'], 0.8);
expect(bestEntity).toBeDefined();
expect(bestEntity).toHaveLength(1);
expect(bestEntity[0].start).toEqual(6);
expect(bestEntity[0].end).toEqual(14);
expect(bestEntity[0].levenshtein).toEqual(1);
expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);
expect(bestEntity[0].option).toEqual('spiderman');
expect(bestEntity[0].sourceText).toEqual('Spiderman');
expect(bestEntity[0].entity).toEqual('hero');
expect(bestEntity[0].utteranceText).toEqual('spederman');
});
});
});

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc