@nlpjs/ner
Advanced tools
Comparing version
{ | ||
"name": "@nlpjs/ner", | ||
"version": "4.0.0-rc.4", | ||
"version": "4.0.0-rc.5", | ||
"description": "Named Entity Recognition", | ||
@@ -28,7 +28,7 @@ "author": { | ||
"dependencies": { | ||
"@nlpjs/core": "^4.0.0-rc.4", | ||
"@nlpjs/core": "^4.0.0-rc.5", | ||
"@nlpjs/language": "^4.0.0-rc.4", | ||
"@nlpjs/similarity": "^4.0.0-rc.4" | ||
}, | ||
"gitHead": "390f1aec656eb1aef3334a35835e53b0f56f2bbb" | ||
"gitHead": "73426d8b37d556bbf112c03e0b4b123819c427a7" | ||
} |
@@ -150,2 +150,3 @@ /* | ||
} | ||
const maxLevenshtein = str2len * (1 - threshold); | ||
const wordPositions = words1 || this.getWordPositions(str1); | ||
@@ -170,2 +171,8 @@ const wordPositionsLen = wordPositions.length; | ||
} | ||
if ( | ||
str3.length - wordPositions[0].len >= | ||
str2.length + maxLevenshtein | ||
) { | ||
break; | ||
} | ||
} | ||
@@ -184,29 +191,96 @@ } | ||
extractFromRule(text, rule, words, threshold) { | ||
const edges = []; | ||
normalize(str) { | ||
return str | ||
.normalize('NFD') | ||
.replace(/[\u0300-\u036f]/g, '') | ||
.toLowerCase(); | ||
} | ||
buildRuleDict(rule) { | ||
const dict = {}; | ||
for (let i = 0; i < rule.rules.length; i += 1) { | ||
const current = rule.rules[i]; | ||
for (let j = 0; j < current.texts.length; j += 1) { | ||
const newEdges = this.getBestSubstringList( | ||
text, | ||
current.texts[j], | ||
words, | ||
current.threshold || threshold | ||
const key = this.normalize(current.texts[j]); | ||
if (!dict[key]) { | ||
dict[key] = []; | ||
} | ||
dict[key].push(current); | ||
} | ||
} | ||
rule.dict = dict; | ||
} | ||
getBestExact(srcText, words, rule) { | ||
const text = this.normalize(srcText); | ||
const wordPositions = words || this.getWordPositions(text); | ||
const wordPositionsLen = wordPositions.length; | ||
const result = []; | ||
for (let i = 0; i < wordPositionsLen; i += 1) { | ||
for (let j = i; j < wordPositionsLen; j += 1) { | ||
const str = text.substring( | ||
wordPositions[i].start, | ||
wordPositions[j].end + 1 | ||
); | ||
for (let k = 0; k < newEdges.length; k += 1) { | ||
edges.push({ | ||
...newEdges[k], | ||
entity: rule.name, | ||
type: rule.type, | ||
option: rule.rules[i].option, | ||
sourceText: current.texts[j], | ||
utteranceText: text.substring( | ||
newEdges[k].start, | ||
newEdges[k].end + 1 | ||
), | ||
}); | ||
if (rule.dict[str]) { | ||
const subrule = rule.dict[str]; | ||
for (let k = 0; k < subrule.length; k += 1) { | ||
result.push({ | ||
accuracy: 1, | ||
start: wordPositions[i].start, | ||
end: wordPositions[j].end, | ||
len: wordPositions[j].end - wordPositions[i].start + 1, | ||
levenshtein: 0, | ||
entity: rule.name, | ||
type: rule.type, | ||
option: subrule[k].option, | ||
sourceText: str, | ||
utteranceText: srcText.substring( | ||
wordPositions[i].start, | ||
wordPositions[j].end + 1 | ||
), | ||
}); | ||
} | ||
} | ||
} | ||
} | ||
return result; | ||
} | ||
extractFromRule(text, rule, words, threshold) { | ||
const edges = []; | ||
if (threshold >= 1) { | ||
if (!rule.dict) { | ||
this.buildRuleDict(rule); | ||
} | ||
const newEdges = this.getBestExact(text, words, rule); | ||
for (let i = 0; i < newEdges.length; i += 1) { | ||
edges.push(newEdges[i]); | ||
} | ||
} else { | ||
for (let i = 0; i < rule.rules.length; i += 1) { | ||
const current = rule.rules[i]; | ||
for (let j = 0; j < current.texts.length; j += 1) { | ||
const newEdges = this.getBestSubstringList( | ||
text, | ||
current.texts[j], | ||
words, | ||
current.threshold || threshold | ||
); | ||
for (let k = 0; k < newEdges.length; k += 1) { | ||
edges.push({ | ||
...newEdges[k], | ||
entity: rule.name, | ||
type: rule.type, | ||
option: rule.rules[i].option, | ||
sourceText: current.texts[j], | ||
utteranceText: text.substring( | ||
newEdges[k].start, | ||
newEdges[k].end + 1 | ||
), | ||
}); | ||
} | ||
} | ||
} | ||
} | ||
return edges; | ||
@@ -225,3 +299,3 @@ } | ||
wordPositions, | ||
0.8 | ||
input.threshold || 0.8 | ||
); | ||
@@ -228,0 +302,0 @@ for (let j = 0; j < newEdges.length; j += 1) { |
@@ -327,4 +327,5 @@ /* | ||
process(input) { | ||
return this.runPipeline( | ||
async process(srcInput) { | ||
const input = { threshold: this.settings.threshold || 0.8, ...srcInput }; | ||
const result = await this.runPipeline( | ||
input, | ||
@@ -335,2 +336,4 @@ input.locale | ||
); | ||
delete result.threshold; | ||
return result; | ||
} | ||
@@ -337,0 +340,0 @@ |
@@ -38,3 +38,3 @@ /* | ||
) { | ||
other.discarded = true; | ||
// Do nothing! entities have same priority | ||
} else if ( | ||
@@ -41,0 +41,0 @@ (useMaxLength || |
44628
5.79%1288
6.18%Updated