wink-ner - npm Package Compare versions

Comparing version 0.1.0 to 0.2.0

package.json

		{
		"name": "wink-ner",
		"version": "0.1.0",
		"description": "Named Entity Recognition",
		"version": "0.2.0",
		"description": "Language agnostic named entity recognizer",
		"keywords": [
		@@ -6,0 +6,0 @@ "NER",

src/wink-ner.js

		@@ -228,2 +228,42 @@ // wink-ner

		// ### addUniWordEntity
		/**
		*
		* Process uni-word entity and adds it to `uniWordEntities`.
		*
		* @param {string[]} words — from entity's text.
		* @param {object} entity — to be added.
		* @return {undefined} nothing!
		* @private
		*/
		var addUniWordEntity = function ( words, entity ) {
		const firstWord = words[ 0 ];
		let wordCounts;
		// Latest value overrides previous value, if any.
		if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts;
		uniWordEntities[ firstWord ] = cloneEntity( entity );
		if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts;
		}; // addUniWordEntity()

		// ### addUniWordEntity
		/**
		*
		* Process multi-word entity and adds it to `multiWordEntities`.
		*
		* @param {string} text — property of entity.
		* @param {string[]} words — from entity's text.
		* @param {object} entity — to be added.
		* @return {undefined} nothing!
		* @private
		*/
		var addMultiWordEntity = function ( text, words, entity ) {
		const firstWord = words[ 0 ];
		uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] \|\| Object.create( null );
		uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts \|\| [];
		if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length );
		multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity );
		// The expression is a simple arithmatic formulation to detect acronyms.
		if ( words.length === ( ( text.length + 1 ) / 2 ) ) addUniWordEntity( [ words.join( '' ) ], entity );
		}; // addMultiWordEntity()

		// ### learn
		@@ -243,2 +283,6 @@ /**
		*
		* Acronyms must be added with space between each character; for example USA
		* should be added as `'u s a'` — this ensure correct detection of
		* `U S A` or `U. S. A.` or `U.S.A.` as `USA.`
		*
		* @param {object[]} entities — where each element defines an entity via
		@@ -270,2 +314,9 @@ * two mandatory properties viz. `text` and `entityType` as described later.
		* @example
		* var trainingData = [
		* { text: 'manchester united', entityType: 'club' },
		* { text: 'manchester', entityType: 'city' },
		* { text: 'uk', entityType: 'country' }
		* ];
		* learn( trainingData );
		* // -> 3
		*/
		@@ -275,22 +326,16 @@ var learn = function ( entities ) {
		// declarations in the beginning.
		let length = 0;
		for ( let i = 0, imax = entities.length; i < imax; i += 1 ) {
		const entity = entities[ i ];
		const text = entity.text;
		const entityType = entity.text;
		let wordCounts;
		// Normalize after removing extra white spaces; required for acronyms processing.
		const text = normalize( ( entity.text ).trim().replace( /\s+/, ' ' ) );
		const entityType = ( entity.text ).trim().replace( /\s+/, ' ' );
		// Add if `text` and `entityType` are defined.
		if ( text && entityType ) {
		const words = normalize( text.trim() ).split( /\s+/ );
		const firstWord = words[ 0 ];
		const words = text.split( /\s+/ );
		length += 1;
		if ( words.length === 1 ) {
		// Process uni-word entity.
		// Latest value overrides previous value, if any.
		if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts;
		uniWordEntities[ firstWord ] = cloneEntity( entity );
		if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts;
		addUniWordEntity( words, entity );
		} else {
		// Process multi-word entity.
		uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] \|\| Object.create( null );
		uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts \|\| [];
		if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length );
		multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity );
		addMultiWordEntity( text, words, entity );
		}
		@@ -305,3 +350,3 @@ }

		return entities.length;
		return length;
		}; // learn()
		@@ -308,0 +353,0 @@

wink-ner - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics