Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

wink-ner

Package Overview
Dependencies
Maintainers
3
Versions
12
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

wink-ner - npm Package Compare versions

Comparing version 0.1.0 to 0.2.0

4

package.json
{
"name": "wink-ner",
"version": "0.1.0",
"description": "Named Entity Recognition",
"version": "0.2.0",
"description": "Language agnostic named entity recognizer",
"keywords": [

@@ -6,0 +6,0 @@ "NER",

@@ -228,2 +228,42 @@ // wink-ner

// ### addUniWordEntity
/**
*
* Process uni-word entity and adds it to `uniWordEntities`.
*
* @param {string[]} words — from entity's text.
* @param {object} entity — to be added.
* @return {undefined} nothing!
* @private
*/
var addUniWordEntity = function ( words, entity ) {
const firstWord = words[ 0 ];
let wordCounts;
// Latest value overrides previous value, if any.
if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts;
uniWordEntities[ firstWord ] = cloneEntity( entity );
if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts;
}; // addUniWordEntity()
// ### addUniWordEntity
/**
*
* Process multi-word entity and adds it to `multiWordEntities`.
*
* @param {string} text — property of entity.
* @param {string[]} words — from entity's text.
* @param {object} entity — to be added.
* @return {undefined} nothing!
* @private
*/
var addMultiWordEntity = function ( text, words, entity ) {
const firstWord = words[ 0 ];
uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] || Object.create( null );
uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts || [];
if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length );
multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity );
// The expression is a simple arithmatic formulation to detect acronyms.
if ( words.length === ( ( text.length + 1 ) / 2 ) ) addUniWordEntity( [ words.join( '' ) ], entity );
}; // addMultiWordEntity()
// ### learn

@@ -243,2 +283,6 @@ /**

*
* Acronyms must be added with space between each character; for example USA
* should be added as `'u s a'` — this ensure correct detection of
* `U S A` or `U. S. A.` or `U.S.A.` as `USA.`
*
* @param {object[]} entities — where each element defines an entity via

@@ -270,2 +314,9 @@ * two mandatory properties viz. `text` and `entityType` as described later.

* @example
* var trainingData = [
* { text: 'manchester united', entityType: 'club' },
* { text: 'manchester', entityType: 'city' },
* { text: 'uk', entityType: 'country' }
* ];
* learn( trainingData );
* // -> 3
*/

@@ -275,22 +326,16 @@ var learn = function ( entities ) {

// declarations in the beginning.
let length = 0;
for ( let i = 0, imax = entities.length; i < imax; i += 1 ) {
const entity = entities[ i ];
const text = entity.text;
const entityType = entity.text;
let wordCounts;
// Normalize after removing extra white spaces; required for acronyms processing.
const text = normalize( ( entity.text ).trim().replace( /\s+/, ' ' ) );
const entityType = ( entity.text ).trim().replace( /\s+/, ' ' );
// Add if `text` and `entityType` are defined.
if ( text && entityType ) {
const words = normalize( text.trim() ).split( /\s+/ );
const firstWord = words[ 0 ];
const words = text.split( /\s+/ );
length += 1;
if ( words.length === 1 ) {
// Process uni-word entity.
// Latest value overrides previous value, if any.
if ( uniWordEntities[ firstWord ] ) wordCounts = uniWordEntities[ firstWord ].wordCounts;
uniWordEntities[ firstWord ] = cloneEntity( entity );
if ( wordCounts ) uniWordEntities[ firstWord ].wordCounts = wordCounts;
addUniWordEntity( words, entity );
} else {
// Process multi-word entity.
uniWordEntities[ firstWord ] = uniWordEntities[ firstWord ] || Object.create( null );
uniWordEntities[ firstWord ].wordCounts = uniWordEntities[ firstWord ].wordCounts || [];
if ( uniWordEntities[ firstWord ].wordCounts.indexOf( words.length ) === -1 ) uniWordEntities[ firstWord ].wordCounts.push( words.length );
multiWordEntities[ words.join( ' ' ) ] = cloneEntity( entity );
addMultiWordEntity( text, words, entity );
}

@@ -305,3 +350,3 @@ }

return entities.length;
return length;
}; // learn()

@@ -308,0 +353,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc