node-nlp - npm Package Compare versions

coverage/lcov-report/lib/ner/enum-named-entity.js.html

coverage/lcov-report/lib/ner/index.html

coverage/lcov-report/lib/ner/index.js.html

coverage/lcov-report/lib/ner/named-entity.js.html

coverage/lcov-report/lib/ner/ner-manager.js.html

coverage/lcov-report/lib/ner/regex-named-entity.js.html

lib/ner/enum-named-entity.js

lib/ner/index.js

lib/ner/named-entity.js

lib/ner/ner-manager.js

lib/ner/regex-named-entity.js

test/ner/builtins/number-builtin.test.js

test/ner/enum-named-entity.test.js

test/ner/named-entity.test.js

test/ner/ner-manager.test.js

test/ner/regex-named-entity.test.js

6

.vscode/launch.json

		@@ -12,10 +12,4 @@ {
		"program": "${workspaceFolder}\\toto.js"
		},
		{
		"type": "node",
		"request": "launch",
		"name": "Console bot",
		"program": "${workspaceFolder}\\examples\\console-bot\\index.js"
		}
		]
		}

23

examples/console-bot/index.js

		@@ -0,1 +1,24 @@
		/*
		* Copyright (c) AXA Shared Services Spain S.A.
		*
		* Permission is hereby granted, free of charge, to any person obtaining
		* a copy of this software and associated documentation files (the
		* "Software"), to deal in the Software without restriction, including
		* without limitation the rights to use, copy, modify, merge, publish,
		* distribute, sublicense, and/or sell copies of the Software, and to
		* permit persons to whom the Software is furnished to do so, subject to
		* the following conditions:
		*
		* The above copyright notice and this permission notice shall be
		* included in all copies or substantial portions of the Software.
		*
		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
		* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
		* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		*/

		const readline = require('readline');
		@@ -2,0 +25,0 @@ const { NlpManager } = require('../../lib');

23

examples/console-bot/train-nlp.js

		@@ -0,1 +1,24 @@
		/*
		* Copyright (c) AXA Shared Services Spain S.A.
		*
		* Permission is hereby granted, free of charge, to any person obtaining
		* a copy of this software and associated documentation files (the
		* "Software"), to deal in the Software without restriction, including
		* without limitation the rights to use, copy, modify, merge, publish,
		* distribute, sublicense, and/or sell copies of the Software, and to
		* permit persons to whom the Software is furnished to do so, subject to
		* the following conditions:
		*
		* The above copyright notice and this permission notice shall be
		* included in all copies or substantial portions of the Software.
		*
		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
		* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
		* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		*/

		const fs = require('fs');
		@@ -2,0 +25,0 @@

10

lib/index.js

		@@ -29,9 +29,13 @@ /*
		const {
		NerManager,
		NlpClassifier,
		NlpManager,
		NlpUtil,
		NamedEntity,
		NlpExcelReader,
		} = require('./nlp');
		const {
		NerManager,
		NamedEntity,
		EnumNamedEntity,
		RegexNamedEntity,
		} = require('./ner');
		const { SentimentAnalyzer, SentimentManager } = require('./sentiment');
		@@ -56,2 +60,4 @@ const { Evaluator, SimilarSearch } = require('./util');
		NamedEntity,
		EnumNamedEntity,
		RegexNamedEntity,
		SentimentAnalyzer,
		@@ -58,0 +64,0 @@ SentimentManager,

2

lib/language/language.js

		@@ -80,2 +80,3 @@ /*
		* @param {Number} limit Limit of results.
		* @returns {Object[]} Array of guesses.
		*/
		@@ -115,2 +116,3 @@ guess(utterance, whitelist, limit) {
		* @param {String[]} whitelist Whitelist of accepted languages.
		* @return {Object} Best guess.
		*/
		@@ -117,0 +119,0 @@ guessBest(utterance, whitelist) {

35

lib/math/mathops.js

		@@ -48,2 +48,3 @@ /*
		* @param {Matrix} observations Observations.
		* @returns {Matrix} Hypothesis result.
		*/
		@@ -54,2 +55,10 @@ static hypothesis(theta, observations) {

		/**
		* Cost function
		* @param {Matrix} theta Theta matrix.
		* @param {Matrix} observations Observations.
		* @param {Matrix} classifications Classification matrix.
		* @param {Matrix} srcHypothesis Hypothesis. If not provided is calculated.
		* @return {number} Calculated cost based on the hypothesis.
		*/
		static cost(theta, observations, classifications, srcHypothesis) {
		@@ -65,2 +74,9 @@ const hypothesis = srcHypothesis \|\| Mathops.hypothesis(theta, observations);

		/**
		* Descend the gradient based on the cost function.
		* @param {Matrix} srcTheta Theta matrix.
		* @param {Vector} srcExamples Examples.
		* @param {Matrix} classifications Classification matrix.
		* @param {Object} srcOptions Settings for the descend.
		*/
		static descendGradient(srcTheta, srcExamples, classifications, srcOptions) {
		@@ -110,2 +126,7 @@ const options = srcOptions \|\| {};

		/**
		* Return a vector representing x.
		* @param {Number[]} x Input array.
		* @returns {Vector} Vector representing x.
		*/
		static asVector(x) {
		@@ -115,2 +136,7 @@ return new Vector(x);

		/**
		* Returns a matrix representing x.
		* @param {Number[][]} x Input array.
		* @return {Matrix} Matrix representing x.
		*/
		static asMatrix(x) {
		@@ -120,2 +146,6 @@ return new Matrix(x);

		/**
		* Function returning 0.
		* @returns {number} Returns 0.
		*/
		static zero() {
		@@ -125,2 +155,7 @@ return 0;

		/**
		* Compute the thetas of the examples and classifications.
		* @param {Vector} srcExamples Vector of examples.
		* @param {Matrix} srcClassifications Matrix of classifications.
		*/
		static computeThetas(srcExamples, srcClassifications) {
		@@ -127,0 +162,0 @@ const examples = Mathops.asMatrix(srcExamples);

9

lib/math/matrix.js

		@@ -258,2 +258,9 @@ /*

		/**
		* Given a matrix an an operation function, applies this operation
		* multiplying this matrix with the provided one.
		* @param {Matrix} matrix Matrix to operate with this.
		* @param {Function} op Multiply operation.
		* @returns {Matrix} Result matrix of multiplying both matrix.
		*/
		mulOp(matrix, op) {
		@@ -288,2 +295,4 @@ if (typeof (matrix) === 'number') {
		* @param {Matrix} matrix Input matrix.
		* @param {Function} activation Optional activation fucntion.
		* @returns {Matrix} Multiplication of the matrix.
		*/
		@@ -290,0 +299,0 @@ multiply(matrix, activation) {

4

lib/nlp/index.js

		@@ -24,16 +24,12 @@ /*

		const NerManager = require('./ner-manager');
		const NlpUtil = require('./nlp-util');
		const NlpClassifier = require('./nlp-classifier');
		const NlpManager = require('./nlp-manager');
		const NamedEntity = require('./named-entity');
		const NlpExcelReader = require('./nlp-excel-reader');

		module.exports = {
		NerManager,
		NlpUtil,
		NlpClassifier,
		NlpManager,
		NamedEntity,
		NlpExcelReader,
		};

11

lib/nlp/nlp-excel-reader.js

		@@ -37,2 +37,3 @@ /*
		this.loadNamedEntities();
		this.loadRegexEntities();
		this.loadIntents();
		@@ -59,2 +60,12 @@ this.loadResponses();

		loadRegexEntities() {
		const table = this.xdoc.getTable('Regex Entities');
		if (table) {
		table.data.forEach((row) => {
		const languages = row.language.split(',').map(x => x.trim());
		this.manager.addRegexEntity(row.entity, languages, row.regex);
		});
		}
		}

		loadIntents() {
		@@ -61,0 +72,0 @@ this.xdoc.getTable('Intents').data.forEach((row) => {

36

lib/nlp/nlp-manager.js

		@@ -27,4 +27,3 @@ /*
		const { Language } = require('../language');
		const NerManager = require('./ner-manager');
		const NamedEntity = require('./named-entity');
		const { NerManager } = require('../ner');
		const { SentimentManager } = require('../sentiment');
		@@ -120,4 +119,10 @@ const NlpUtil = require('./nlp-util');
		*/
		addRegexEntity(entityName, regex) {
		return this.nerManager.addNamedEntity(entityName, regex);
		addRegexEntity(entityName, languages, regex) {
		const entity = this.nerManager.addNamedEntity(entityName, 'regex');
		if (typeof regex === 'string') {
		entity.addStrRegex(languages, regex);
		} else {
		entity.addRegex(languages, regex);
		}
		return entity;
		}
		@@ -315,2 +320,3 @@
		languageGuessed = true;
		throw new Error(locale);
		}
		@@ -401,5 +407,3 @@ const truncated = NlpUtil.getTruncatedLocale(locale);
		clone.intentEntities = this.intentEntities;
		clone.ner = {};
		clone.ner.namedEntities = this.nerManager.namedEntities;
		clone.ner.threshold = this.nerManager.threshold;
		clone.nerManager = this.nerManager.save();
		clone.classifiers = [];
		@@ -438,19 +442,3 @@ clone.responses = this.nlgManager.responses;
		this.languages = clone.languages;
		const keys = Object.keys(clone.ner.namedEntities);
		this.nerManager.namedEntities = {};
		for (let i = 0; i < keys.length; i += 1) {
		const key = keys[i];
		const srcNamedEntity = clone.ner.namedEntities[key];
		let namedEntity;
		if (srcNamedEntity.regex) {
		namedEntity = new NamedEntity(key, srcNamedEntity.regex);
		} else {
		namedEntity = new NamedEntity(key);
		namedEntity.options = srcNamedEntity.options;
		}
		namedEntity.settings = srcNamedEntity.settings;
		this.nerManager.namedEntities[key] = namedEntity;
		}

		this.nerManager.threshold = clone.ner.threshold;
		this.nerManager.load(clone.nerManager);
		this.intentEntities = clone.intentEntities;
		@@ -457,0 +445,0 @@ this.nlgManager.responses = clone.responses;

31

lib/nlp/nlp-util.js

		@@ -101,3 +101,3 @@ /*
		return Natural.PorterStemmerNl;
		case 'id': return Natural.PorterStemmerId; // Indonesian
		case 'id': return Natural.StemmerId; // Indonesian
		case 'ja': return new Natural.StemmerJa(); // Japanese
		@@ -140,2 +140,31 @@ case 'da': return new DanishStemmer(NlpUtil.getTokenizer(locale)); // Danish
		}

		static getCulture(locale) {
		switch (locale) {
		case 'en': return 'en-us'; // English
		case 'fa': return 'fa-ir'; // Farsi
		case 'fr': return 'fr-fr'; // French
		case 'ru': return 'ru-ru'; // Russian
		case 'es': return 'es-es'; // Spanish
		case 'it': return 'it-it'; // Italian
		case 'nl': return 'nl-nl'; // Dutch
		case 'no': return 'no-no'; // Norwegian
		case 'pt': return 'pt-br'; // Portuguese
		case 'pl': return 'pl-pl'; // Polish
		case 'sv': return 'sv-se'; // Swedish
		case 'id': return 'id-id'; // Indonesian
		case 'ja': return 'ja-jp'; // Japanese

		case 'da': return 'da-dk'; // Danish
		case 'fi': return 'fi-fi'; // Finnish
		case 'de': return 'de-de'; // German
		case 'hu': return 'hu-hu'; // Hungarian
		case 'ro': return 'ro-ro'; // Romanian
		case 'tr': return 'tr-tr'; // Turkish

		case 'zh': return 'zh-cn'; // Chinese

		default: return 'en-us';
		}
		}
		}
		@@ -142,0 +171,0 @@

158

lib/util/similar-search.js

		@@ -130,3 +130,3 @@ /*
		if (!atWhiteSpace) {
		result.push({ start: lastIndex, end: currentIndex, len: currentIndex - lastIndex });
		result.push({ start: lastIndex, end: currentIndex - 1, len: currentIndex - lastIndex });
		atWhiteSpace = true;
		@@ -141,3 +141,3 @@ }
		if (!atWhiteSpace) {
		result.push({ start: lastIndex, end: currentIndex, len: currentIndex - lastIndex });
		result.push({ start: lastIndex, end: currentIndex - 1, len: currentIndex - lastIndex });
		}
		@@ -164,3 +164,4 @@ return result;
		start: 0,
		end: str1len,
		end: str1len - 1,
		len: str1len,
		levenshtein: this.getSimilarity(str1, str2),
		@@ -176,2 +177,3 @@ };
		end: 0,
		len: 0,
		levenshtein: undefined,
		@@ -182,3 +184,3 @@ accuracy: 0,
		for (let j = i; j < wordPositionsLen; j += 1) {
		const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end);
		const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end + 1);
		const levenshtein = this.getSimilarity(str3, str2);
		@@ -189,2 +191,3 @@ if (best.levenshtein === undefined \|\| levenshtein < best.levenshtein) {
		best.end = wordPositions[j].end;
		best.len = (best.end - best.start) + 1;
		}
		@@ -198,2 +201,107 @@ }
		/**
		* Given two strings, search all the occurences of the second inside the first,
		* where the accuracy is at least as good as the threshold.
		* @param {String} str1 First string.
		* @param {String} str2 Second string.
		* @param {Object[]} words1 Array of positions of the words of the first string.
		* If not provided this will be built.
		* @returns {Object[]} List of occurences.
		*/
		getBestSubstringList(str1, str2, words1, threshold = 1) {
		const str1len = str1.length;
		const str2len = str2.length;
		const result = [];
		if (str1len <= str2len) {
		const levenshtein = this.getSimilarity(str1, str2);
		const accuracy = (str2len - levenshtein) / str2len;
		if (accuracy >= threshold) {
		result.push({
		start: 0,
		end: str1len - 1,
		len: str1len,
		levenshtein,
		accuracy,
		});
		}
		return result;
		}
		const wordPositions = words1 \|\| this.getWordPositions(str1);
		const wordPositionsLen = wordPositions.length;
		for (let i = 0; i < wordPositionsLen; i += 1) {
		for (let j = i; j < wordPositionsLen; j += 1) {
		const str3 = str1.substring(wordPositions[i].start, wordPositions[j].end + 1);
		const levenshtein = this.getSimilarity(str3, str2);
		const accuracy = (str2len - levenshtein) / str2len;
		if (accuracy >= threshold) {
		result.push({
		start: wordPositions[i].start,
		end: wordPositions[j].end,
		len: (wordPositions[j].end - wordPositions[i].start) + 1,
		levenshtein,
		accuracy,
		});
		}
		}
		}
		return result;
		}

		reduceEdges(edges) {
		for (let i = 0, l = edges.length; i < l; i += 1) {
		const edge = edges[i];
		if (!edge.discarded) {
		for (let j = i + 1; j < l; j += 1) {
		const other = edges[j];
		if (!other.discarded) {
		if (other.start <= edge.end && other.end >= edge.start) {
		if (other.accuracy < edge.accuracy) {
		other.discarded = true;
		} else if (other.accuracy > edge.accuracy) {
		edge.discarded = true;
		} else if (other.len <= edge.len) {
		other.discarded = true;
		} else {
		edge.discarded = true;
		}
		}
		}
		}
		}
		}
		const result = [];
		for (let i = 0, l = edges.length; i < l; i += 1) {
		if (!edges[i].discarded) {
		result.push(edges[i]);
		}
		}
		return result;
		}

		getEdgesFromEntity(str, entity, language, entityName, threshold = 1, srcWordPositions) {
		const wordPositions = srcWordPositions \|\| this.getWordPositions(str);
		const locale = entity.getLocaleRules ? entity.getLocaleRules(language) : entity[language];
		const result = [];
		if (!locale) {
		return result;
		}
		const optionKeys = Object.keys(locale);
		for (let i = 0, li = optionKeys.length; i < li; i += 1) {
		const optionName = optionKeys[i];
		const texts = locale[optionName];
		for (let j = 0, lj = texts.length; j < lj; j += 1) {
		const current = this.getBestSubstringList(str, texts[j], wordPositions, threshold);
		for (let k = 0, lk = current.length; k < lk; k += 1) {
		const item = current[k];
		item.option = optionName;
		item.sourceText = texts[j];
		item.entity = entityName \|\| entity.name;
		item.utteranceText = str.substring(item.start, item.end + 1);
		result.push(item);
		}
		}
		}
		return this.reduceEdges(result);
		}

		/**
		* Given an utterance and an array of entities with options, search the
		@@ -206,32 +314,24 @@ * best option for each entity and return the results.
		*/
		getBestEntity(str, entities, locale, whitelist) {
		getEdgesFromEntities(str, entities, language, whitelist, threshold = 1) {
		const result = [];
		const wordPositions = this.getWordPositions(str);
		const entityKeys = Object.keys(entities);
		for (let i = 0; i < entityKeys.length; i += 1) {
		const entity = entities[entityKeys[i]];
		if (!whitelist \|\| whitelist.indexOf(entity.name) > -1) {
		let best;
		for (let j = 0; j < entity.options.length; j += 1) {
		const option = entity.options[j];
		if (option.texts[locale]) {
		const texts = option.texts[locale];
		for (let k = 0; k < texts.length; k += 1) {
		const current = this.getBestSubstring(str, texts[k], wordPositions);
		if (best === undefined \|\| current.levenshtein < best.levenshtein) {
		best = current;
		best.option = option.name;
		best.sourceText = texts[k];
		best.entity = entity.name;
		best.utteranceText = str.substring(best.start, best.end);
		}
		}
		}
		}
		if (best) {
		result.push(best);
		}
		for (let i = 0, l = entityKeys.length; i < l; i += 1) {
		const entityName = entityKeys[i];
		if (!whitelist \|\| whitelist.indexOf(entityName) !== -1) {
		const edges = this.getEdgesFromEntity(
		str,
		entities[entityName],
		language,
		entityName,
		threshold,
		wordPositions,
		);
		edges.forEach((srcEdge) => {
		const edge = srcEdge;
		result.push(edge);
		});
		}
		}
		return result;
		return this.reduceEdges(result);
		}
		@@ -238,0 +338,0 @@ }

3

lib/xtables/xdoc.js

		@@ -68,2 +68,5 @@ /*
		const row = block[index];
		if (!row) {
		return true;
		}
		for (let i = 0; i < row.length; i += 1) {
		@@ -70,0 +73,0 @@ if (row[i]) {

7

package.json

		{
		"name": "node-nlp",
		"version": "1.2.3",
		"version": "1.2.4",
		"description": "Library for NLU (Natural Language Understanding) done in Node.js",
		@@ -40,2 +40,3 @@ "main": "lib/index.js",
		"dependencies": {
		"@microsoft/recognizers-text-suite": "^1.0.1",
		"escodegen": "^1.10.0",
		@@ -59,3 +60,5 @@ "esprima": "^4.0.0",
		"logistic regression",
		"Natural"
		"Natural",
		"entity extraction",
		"named entity recognition"
		],
		@@ -62,0 +65,0 @@ "config": {

9

test/nlp/nlp-excel-reader.test.js

		@@ -51,9 +51,4 @@ /*
		expect(food.type).toEqual('enum');
		expect(hero.options[0].name).toEqual('spiderman');
		expect(hero.options[1].name).toEqual('ironman');
		expect(hero.options[2].name).toEqual('hulk');
		expect(hero.options[3].name).toEqual('thor');
		expect(food.options[0].name).toEqual('burguer');
		expect(food.options[1].name).toEqual('pizza');
		expect(food.options[2].name).toEqual('pasta');
		expect(hero.locales.en).toBeDefined();
		expect(hero.locales.es).toBeDefined();
		});
		@@ -60,0 +55,0 @@ test('It should create the classifiers for the languages', () => {

30

test/nlp/nlp-manager.test.js

		@@ -147,4 +147,4 @@ /*
		manager.removeNamedEntityText('hero', 'iron man', 'en', 'iron-man');
		const ironman = manager.nerManager.getNamedEntityOption('hero', 'iron man');
		expect(ironman.texts.en).toHaveLength(1);
		const ironman = manager.nerManager.getNamedEntity('hero', false);
		expect(ironman.locales.en['iron man']).toEqual(['iron man']);
		});
		@@ -361,3 +361,3 @@
		test('Should search for entities', () => {
		const manager = new NlpManager();
		const manager = new NlpManager({ ner: { builtins: [] } });
		manager.addLanguage(['en']);
		@@ -382,3 +382,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);
		test('Should search for entities if the language is specified', () => {
		const manager = new NlpManager();
		const manager = new NlpManager({ ner: { builtins: [] } });
		manager.addLanguage(['en']);
		@@ -535,3 +535,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);
		test('Should allow to save, load and all should be working', () => {
		let manager = new NlpManager();
		let manager = new NlpManager({ ner: { builtins: [] } });
		manager.addLanguage(['en']);
		@@ -544,3 +544,3 @@ manager.addNamedEntityText('hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man']);
		manager.addNamedEntityText('food', 'pasta', ['en'], ['Pasta', 'spaghetti']);
		manager.addRegexEntity('mail', /\b(\w[-._\w]\w@\w[-._\w]\w\.\w{2,3})\b/ig);
		manager.addRegexEntity('mail', 'en', /\b(\w[-._\w]\w@\w[-._\w]\w\.\w{2,3})\b/ig);
		manager.addDocument('en', 'I saw %hero% eating %food%', 'sawhero');
		@@ -591,10 +591,14 @@ manager.addDocument('en', 'I have seen %hero%, he was eating %food%', 'sawhero');
		expect(food.type).toEqual('enum');
		expect(hero.options[0].name).toEqual('spiderman');
		expect(hero.options[1].name).toEqual('ironman');
		expect(hero.options[2].name).toEqual('hulk');
		expect(hero.options[3].name).toEqual('thor');
		expect(food.options[0].name).toEqual('burguer');
		expect(food.options[1].name).toEqual('pizza');
		expect(food.options[2].name).toEqual('pasta');
		expect(food.locales.en).toEqual({
		burguer: ['burguer', 'hamburguer'],
		pasta: ['pasta', 'spaghetti'],
		pizza: ['pizza'],
		});
		expect(food.locales.es).toEqual({
		burguer: ['hamburguesa'],
		pasta: ['pasta', 'spaghetti'],
		pizza: ['pizza'],
		});
		});

		test('It should create the classifiers for the languages', () => {
		@@ -601,0 +605,0 @@ const manager = new NlpManager();

32

test/nlp/nlp-util.test.js

		@@ -53,3 +53,3 @@ /*
		expect(NlpUtil.getStemmer('nl')).toBe(Natural.PorterStemmerNl); // Dutch
		expect(NlpUtil.getStemmer('id')).toBe(Natural.PorterStemmerId); // Indonesian
		expect(NlpUtil.getStemmer('id')).toBe(Natural.StemmerId); // Indonesian
		expect(NlpUtil.getStemmer('ja')).toBeInstanceOf(Natural.StemmerJa); // Japanese
		@@ -98,3 +98,3 @@ expect(NlpUtil.getStemmer('da').constructor.name).toEqual('DanishStemmer'); // Danish
		expect(NlpUtil.getStemmer('nl').constructor.name).toEqual('DutchStemmer'); // Dutch
		expect(NlpUtil.getStemmer('id')).toBe(Natural.PorterStemmerId); // Indonesian
		expect(NlpUtil.getStemmer('id')).toBe(Natural.StemmerId); // Indonesian
		expect(NlpUtil.getStemmer('ja')).toBeInstanceOf(Natural.StemmerJa); // Japanese
		@@ -157,2 +157,30 @@ expect(NlpUtil.getStemmer('da').constructor.name).toEqual('DanishStemmer'); // Danish
		});

		describe('Get culture', () => {
		test('Should return correct culture for the locale', () => {
		expect(NlpUtil.getCulture('en')).toEqual('en-us'); // english
		expect(NlpUtil.getCulture('fa')).toEqual('fa-ir'); // farsi
		expect(NlpUtil.getCulture('fr')).toEqual('fr-fr'); // french
		expect(NlpUtil.getCulture('ru')).toEqual('ru-ru'); // russian
		expect(NlpUtil.getCulture('es')).toEqual('es-es'); // spanish
		expect(NlpUtil.getCulture('it')).toEqual('it-it'); // italian
		expect(NlpUtil.getCulture('nl')).toEqual('nl-nl'); // dutch
		expect(NlpUtil.getCulture('no')).toEqual('no-no'); // norwegian
		expect(NlpUtil.getCulture('pt')).toEqual('pt-br'); // portuguese
		expect(NlpUtil.getCulture('pl')).toEqual('pl-pl'); // polish
		expect(NlpUtil.getCulture('sv')).toEqual('sv-se'); // swedish
		expect(NlpUtil.getCulture('id')).toEqual('id-id'); // indonesian
		expect(NlpUtil.getCulture('ja')).toEqual('ja-jp'); // japanese
		expect(NlpUtil.getCulture('da')).toEqual('da-dk'); // danish
		expect(NlpUtil.getCulture('fi')).toEqual('fi-fi'); // finnish
		expect(NlpUtil.getCulture('de')).toEqual('de-de'); // german
		expect(NlpUtil.getCulture('hu')).toEqual('hu-hu'); // hungarian
		expect(NlpUtil.getCulture('ro')).toEqual('ro-ro'); // romanian
		expect(NlpUtil.getCulture('tr')).toEqual('tr-tr'); // turkish
		expect(NlpUtil.getCulture('zh')).toEqual('zh-cn'); // Chinese
		});
		test('If the locale is not recognized return default english', () => {
		expect(NlpUtil.getCulture('aa')).toEqual('en-us'); // english
		});
		});
		});

598

test/util/similar_search.test.js

		@@ -115,3 +115,3 @@ /*
		expect(result).toHaveLength(1);
		expect(result[0]).toEqual({ start: 0, end: 5, len: 5 });
		expect(result[0]).toEqual({ start: 0, end: 4, len: 5 });
		});
		@@ -124,3 +124,3 @@ test('Should get position of only one word even if surrounded by non alphanumeric chars', () => {
		expect(result).toHaveLength(1);
		expect(result[0]).toEqual({ start: 5, end: 10, len: 5 });
		expect(result[0]).toEqual({ start: 5, end: 9, len: 5 });
		});
		@@ -132,5 +132,5 @@ test('Should get position of several words', () => {
		expect(result).toHaveLength(3);
		expect(result[0]).toEqual({ start: 2, end: 7, len: 5 });
		expect(result[1]).toEqual({ start: 11, end: 19, len: 8 });
		expect(result[2]).toEqual({ start: 23, end: 32, len: 9 });
		expect(result[0]).toEqual({ start: 2, end: 6, len: 5 });
		expect(result[1]).toEqual({ start: 11, end: 18, len: 8 });
		expect(result[2]).toEqual({ start: 23, end: 31, len: 9 });
		});
		@@ -142,4 +142,4 @@ test('Should get position of words on long texts', () => {
		expect(result).toHaveLength(26);
		expect(result[0]).toEqual({ start: 0, end: 5, len: 5 });
		expect(result[25]).toEqual({ start: 188, end: 194, len: 6 });
		expect(result[0]).toEqual({ start: 0, end: 4, len: 5 });
		expect(result[25]).toEqual({ start: 188, end: 193, len: 6 });
		});
		@@ -157,3 +157,4 @@ });
		start: 6,
		end: 24,
		end: 23,
		len: 18,
		levenshtein: 0,
		@@ -171,3 +172,4 @@ accuracy: 1,
		start: 6,
		end: 30,
		end: 29,
		len: 24,
		levenshtein: 2,
		@@ -185,3 +187,4 @@ accuracy: 0.9166666666666666,
		start: 0,
		end: 16,
		end: 15,
		len: 16,
		levenshtein: 8,
		@@ -192,60 +195,477 @@ accuracy: 0.6666666666666666,
		});

		describe('Get best entity', () => {
		test('', () => {
		describe('Reduce edges', () => {
		test('It should do nothing if edges are empty', () => {
		const similar = new SimilarSearch();
		const edges = [];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([]);
		});
		test('If two edges collide, only the best accuracy remains', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 0.9,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		]);
		});
		test('Edges can overlap in the left', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 0,
		end: 9,
		len: 9,
		accuracy: 0.9,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		]);
		});
		test('Edges can overlap in the right', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 2,
		end: 11,
		len: 9,
		accuracy: 0.9,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		]);
		});
		test('One edge can contain other', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 0.9,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		]);
		});
		test('If both have same accuracy, return largest one', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		]);
		});
		test('If both have same accuracy, return largest one even if goes first', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		]);
		});
		test('If there are more than 2 edges overlaped, decide 1', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 9,
		end: 18,
		len: 9,
		accuracy: 1,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		]);
		});
		test('Should respect non overlaped edges', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 12,
		end: 20,
		len: 8,
		accuracy: 1,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 12,
		end: 20,
		len: 8,
		accuracy: 1,
		},
		]);
		});
		test('When there are different groups of overlaped edges, return one per group', () => {
		const similar = new SimilarSearch();
		const edges = [
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 12,
		end: 20,
		len: 8,
		accuracy: 1,
		},
		{
		start: 1,
		end: 10,
		len: 9,
		accuracy: 1,
		},
		{
		start: 12,
		end: 21,
		len: 9,
		accuracy: 1,
		},
		];
		const result = similar.reduceEdges(edges);
		expect(result).toEqual([
		{
		start: 0,
		end: 11,
		len: 11,
		accuracy: 1,
		},
		{
		start: 12,
		end: 21,
		len: 9,
		accuracy: 1,
		},
		]);
		});
		});
		describe('Get best substring list', () => {
		test('If not threshold is defined, then search for exact occurences', () => {
		const similar = new SimilarSearch();
		const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
		const text2 = 'interdum ultricies';
		const result = similar.getBestSubstringList(text1, text2);
		expect(result).toHaveLength(1);
		expect(result[0]).toEqual({
		start: 6,
		end: 23,
		len: 18,
		levenshtein: 0,
		accuracy: 1,
		});
		});
		test('If there are more than 1 occurence search exact, should return all', () => {
		const similar = new SimilarSearch();
		const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
		const text2 = 'interdum';
		const result = similar.getBestSubstringList(text1, text2);
		expect(result).toHaveLength(2);
		expect(result[0]).toEqual({
		start: 6,
		end: 13,
		len: 8,
		levenshtein: 0,
		accuracy: 1,
		});
		expect(result[1]).toEqual({
		start: 73,
		end: 80,
		len: 8,
		levenshtein: 0,
		accuracy: 1,
		});
		});
		test('Should get more than 1 occurence when searching with threshold', () => {
		const similar = new SimilarSearch();
		const text1 = 'Morbi interdum ultricies neque varius condimentum. Donec volutpat turpis interdum metus ultricies vulputate. Duis ultricies rhoncus sapien, sit amet fermentum risus imperdiet vitae. Ut et lectus';
		const text2 = 'internum';
		const result = similar.getBestSubstringList(text1, text2, undefined, 0.8);
		expect(result).toHaveLength(2);
		expect(result[0]).toEqual({
		start: 6,
		end: 13,
		len: 8,
		levenshtein: 1,
		accuracy: 0.875,
		});
		expect(result[1]).toEqual({
		start: 73,
		end: 80,
		len: 8,
		levenshtein: 1,
		accuracy: 0.875,
		});
		});
		test('Should return 0 to length element in array when the substring is longer than the string and accuracy is at least threshold', () => {
		const similar = new SimilarSearch();
		const text1 = 'dumaultriciesbne';
		const text2 = 'interdumaultriciesbneque';
		const result = similar.getBestSubstringList(text1, text2, undefined, 0.6);
		expect(result).toBeDefined();
		expect(result).toEqual([{
		start: 0,
		end: 15,
		len: 16,
		levenshtein: 8,
		accuracy: 0.6666666666666666,
		}]);
		});
		test('Should return empty array when the substring is longer than the string and accuracy is lower than threshold', () => {
		const similar = new SimilarSearch();
		const text1 = 'dumaultriciesbne';
		const text2 = 'interdumaultriciesbneque';
		const result = similar.getBestSubstringList(text1, text2, undefined, 0.7);
		expect(result).toBeDefined();
		expect(result).toEqual([]);
		});
		});
		describe('Get edges from entity', () => {
		test('It should get the edges from an utterance', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spederman eating spaghetti in the city';
		const entity = {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		};
		const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en', 'entity', 0.8);
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(1);
		expect(bestEntity[0].start).toEqual(6);
		expect(bestEntity[0].end).toEqual(14);
		expect(bestEntity[0].levenshtein).toEqual(1);
		expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);
		expect(bestEntity[0].option).toEqual('spiderman');
		expect(bestEntity[0].sourceText).toEqual('Spiderman');
		expect(bestEntity[0].utteranceText).toEqual('spederman');
		});
		test('It no threshold is provided, then is 1', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spiderman eating iron-men in the city';
		const entity = {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		};
		const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en');
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(1);
		expect(bestEntity[0].start).toEqual(6);
		expect(bestEntity[0].end).toEqual(14);
		expect(bestEntity[0].levenshtein).toEqual(0);
		expect(bestEntity[0].accuracy).toEqual(1);
		expect(bestEntity[0].option).toEqual('spiderman');
		expect(bestEntity[0].sourceText).toEqual('Spiderman');
		expect(bestEntity[0].utteranceText).toEqual('spiderman');
		});
		test('It can return several occurances of options', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spiderman eating iron-men in the city spederman';
		const entity = {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		};
		const bestEntity = similar.getEdgesFromEntity(text1, entity, 'en', 'entity', 0.8);
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(3);
		expect(bestEntity[0].start).toEqual(6);
		expect(bestEntity[0].end).toEqual(14);
		expect(bestEntity[0].levenshtein).toEqual(0);
		expect(bestEntity[0].accuracy).toEqual(1);
		expect(bestEntity[0].option).toEqual('spiderman');
		expect(bestEntity[0].sourceText).toEqual('Spiderman');
		expect(bestEntity[0].utteranceText).toEqual('spiderman');
		expect(bestEntity[1].start).toEqual(44);
		expect(bestEntity[1].end).toEqual(52);
		expect(bestEntity[1].levenshtein).toEqual(1);
		expect(bestEntity[1].accuracy).toEqual(0.8888888888888888);
		expect(bestEntity[1].option).toEqual('spiderman');
		expect(bestEntity[1].sourceText).toEqual('Spiderman');
		expect(bestEntity[1].utteranceText).toEqual('spederman');
		expect(bestEntity[2].start).toEqual(23);
		expect(bestEntity[2].end).toEqual(30);
		expect(bestEntity[2].levenshtein).toEqual(1);
		expect(bestEntity[2].accuracy).toEqual(0.875);
		expect(bestEntity[2].option).toEqual('iron man');
		expect(bestEntity[2].sourceText).toEqual('iron-man');
		expect(bestEntity[2].utteranceText).toEqual('iron-men');
		});
		test('If locale does not exists return empty array', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spiderman eating iron-men in the city spederman';
		const entity = {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		};
		const bestEntity = similar.getEdgesFromEntity(text1, entity, 'es', 0.8);
		expect(bestEntity).toEqual([]);
		});
		});
		describe('Get edges from entities', () => {
		test('It should get the edges from an utterance', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spederman eating spaghetti in the city';
		const entities = {
		hero: {
		name: 'hero',
		options: [
		{
		name: 'spiderman',
		texts: {
		en: ['Spiderman', 'Spider-man'],
		},
		},
		{
		name: 'iron man',
		texts: {
		en: ['iron man', 'iron-man'],
		},
		},
		{
		name: 'thor',
		texts: {
		en: ['Thor'],
		},
		},
		],
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		},
		food: {
		name: 'food',
		options: [
		{
		name: 'burguer',
		texts: {
		en: ['Burguer', 'Hamburguer'],
		},
		},
		{
		name: 'pizza',
		texts: {
		en: ['pizza'],
		},
		},
		{
		name: 'pasta',
		texts: {
		en: ['Pasta', 'spaghetti'],
		},
		},
		],
		en: {
		burguer: ['Burguer', 'Hamburguer'],
		pizza: ['pizza'],
		pasta: ['Pasta', 'spaguetti', 'spaghetti'],
		},
		},
		};
		const bestEntity = similar.getBestEntity(text1, entities, 'en');
		const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en', undefined, 0.8);
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(2);
		expect(bestEntity[0].start).toEqual(6);
		expect(bestEntity[0].end).toEqual(15);
		expect(bestEntity[0].end).toEqual(14);
		expect(bestEntity[0].levenshtein).toEqual(1);
		@@ -258,3 +678,3 @@ expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);
		expect(bestEntity[1].start).toEqual(23);
		expect(bestEntity[1].end).toEqual(32);
		expect(bestEntity[1].end).toEqual(31);
		expect(bestEntity[1].levenshtein).toEqual(0);
		@@ -267,3 +687,65 @@ expect(bestEntity[1].accuracy).toEqual(1);
		});
		test('It no threshold is provided then is 1', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spederman eating spaghetti in the city';
		const entities = {
		hero: {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		},
		food: {
		en: {
		burguer: ['Burguer', 'Hamburguer'],
		pizza: ['pizza'],
		pasta: ['Pasta', 'spaguetti', 'spaghetti'],
		},
		},
		};
		const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en');
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(1);
		expect(bestEntity[0].start).toEqual(23);
		expect(bestEntity[0].end).toEqual(31);
		expect(bestEntity[0].levenshtein).toEqual(0);
		expect(bestEntity[0].accuracy).toEqual(1);
		expect(bestEntity[0].option).toEqual('pasta');
		expect(bestEntity[0].sourceText).toEqual('spaghetti');
		expect(bestEntity[0].entity).toEqual('food');
		expect(bestEntity[0].utteranceText).toEqual('spaghetti');
		});
		test('If whitelist of entities is provided, check only those entities', () => {
		const similar = new SimilarSearch({ normalize: true });
		const text1 = 'I saw spederman eating spaghetti in the city';
		const entities = {
		hero: {
		en: {
		spiderman: ['Spiderman', 'Spider-man'],
		'iron man': ['iron man', 'iron-man'],
		thor: ['Thor'],
		},
		},
		food: {
		en: {
		burguer: ['Burguer', 'Hamburguer'],
		pizza: ['pizza'],
		pasta: ['Pasta', 'spaguetti', 'spaghetti'],
		},
		},
		};
		const bestEntity = similar.getEdgesFromEntities(text1, entities, 'en', ['hero'], 0.8);
		expect(bestEntity).toBeDefined();
		expect(bestEntity).toHaveLength(1);
		expect(bestEntity[0].start).toEqual(6);
		expect(bestEntity[0].end).toEqual(14);
		expect(bestEntity[0].levenshtein).toEqual(1);
		expect(bestEntity[0].accuracy).toEqual(0.8888888888888888);
		expect(bestEntity[0].option).toEqual('spiderman');
		expect(bestEntity[0].sourceText).toEqual('Spiderman');
		expect(bestEntity[0].entity).toEqual('hero');
		expect(bestEntity[0].utteranceText).toEqual('spederman');
		});
		});
		});

coverage/lcov-report/lib/nlp/named-entity.js.html

coverage/lcov-report/lib/nlp/ner-manager.js.html

lib/nlp/named-entity.js

lib/nlp/ner-manager.js

model.nlp

test/nlp/named-entity.test.js

test/nlp/ner-manager.test.js

.eslintignore