Comparing version 0.1.0 to 0.2.0
{ | ||
"name": "wink-ner", | ||
"version": "0.1.0", | ||
"description": "Named Entity Recognition", | ||
"version": "0.2.0", | ||
"description": "Language agnostic named entity recognizer", | ||
"keywords": [ | ||
@@ -6,0 +6,0 @@ "NER", |
@@ -228,2 +228,42 @@ // wink-ner | ||
// ### addUniWordEntity | ||
/** | ||
* | ||
* Process uni-word entity and adds it to `uniWordEntities`. | ||
* | ||
* @param {string[]} words — from entity's text. | ||
* @param {object} entity — to be added. | ||
* @return {undefined} nothing! | ||
* @private | ||
*/ | ||
var addUniWordEntity = function ( words, entity ) { | ||
const firstWord = words[ 0 ]; | ||
let wordCounts; | ||
// Latest value overrides previous value, if any. | ||
if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts; | ||
uniWordEntities[ firstWord ] = cloneEntity( entity ); | ||
if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts; | ||
}; // addUniWordEntity() | ||
// ### addUniWordEntity | ||
/** | ||
* | ||
* Process multi-word entity and adds it to `multiWordEntities`. | ||
* | ||
* @param {string} text — property of entity. | ||
* @param {string[]} words — from entity's text. | ||
* @param {object} entity — to be added. | ||
* @return {undefined} nothing! | ||
* @private | ||
*/ | ||
var addMultiWordEntity = function ( text, words, entity ) { | ||
const firstWord = words[ 0 ]; | ||
uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] || Object.create( null ); | ||
uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts || []; | ||
if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length ); | ||
multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity ); | ||
// The expression is a simple arithmatic formulation to detect acronyms. | ||
if ( words.length === ( ( text.length + 1 ) / 2 ) ) addUniWordEntity( [ words.join( '' ) ], entity ); | ||
}; // addMultiWordEntity() | ||
// ### learn | ||
@@ -243,2 +283,6 @@ /** | ||
* | ||
* Acronyms must be added with space between each character; for example USA | ||
* should be added as `'u s a'` — this ensure correct detection of | ||
* `U S A` or `U. S. A.` or `U.S.A.` as `USA.` | ||
* | ||
* @param {object[]} entities — where each element defines an entity via | ||
@@ -270,2 +314,9 @@ * two mandatory properties viz. `text` and `entityType` as described later. | ||
* @example | ||
* var trainingData = [ | ||
* { text: 'manchester united', entityType: 'club' }, | ||
* { text: 'manchester', entityType: 'city' }, | ||
* { text: 'uk', entityType: 'country' } | ||
* ]; | ||
* learn( trainingData ); | ||
* // -> 3 | ||
*/ | ||
@@ -275,22 +326,16 @@ var learn = function ( entities ) { | ||
// declarations in the beginning. | ||
let length = 0; | ||
for ( let i = 0, imax = entities.length; i < imax; i += 1 ) { | ||
const entity = entities[ i ]; | ||
const text = entity.text; | ||
const entityType = entity.text; | ||
let wordCounts; | ||
// Normalize after removing extra white spaces; required for acronyms processing. | ||
const text = normalize( ( entity.text ).trim().replace( /\s+/, ' ' ) ); | ||
const entityType = ( entity.text ).trim().replace( /\s+/, ' ' ); | ||
// Add if `text` and `entityType` are defined. | ||
if ( text && entityType ) { | ||
const words = normalize( text.trim() ).split( /\s+/ ); | ||
const firstWord = words[ 0 ]; | ||
const words = text.split( /\s+/ ); | ||
length += 1; | ||
if ( words.length === 1 ) { | ||
// Process uni-word entity. | ||
// Latest value overrides previous value, if any. | ||
if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts; | ||
uniWordEntities[ firstWord ] = cloneEntity( entity ); | ||
if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts; | ||
addUniWordEntity( words, entity ); | ||
} else { | ||
// Process multi-word entity. | ||
uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] || Object.create( null ); | ||
uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts || []; | ||
if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length ); | ||
multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity ); | ||
addMultiWordEntity( text, words, entity ); | ||
} | ||
@@ -305,3 +350,3 @@ } | ||
return entities.length; | ||
return length; | ||
}; // learn() | ||
@@ -308,0 +353,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
57502
526