wink-naive-bayes-text-classifier
Advanced tools
Comparing version 1.1.1 to 1.1.2
{ | ||
"name": "wink-naive-bayes-text-classifier", | ||
"version": "1.1.1", | ||
"version": "1.1.2", | ||
"description": "Configurable Naive Bayes Classifier for text with cross-validation support", | ||
@@ -33,15 +33,15 @@ "keywords": [ | ||
"devDependencies": { | ||
"chai": "^3.5.0", | ||
"chai": "^4.1.0", | ||
"coveralls": "^2.11.15", | ||
"docco": "^0.7.0", | ||
"eslint": "^3.13.1", | ||
"eslint": "^4.3.0", | ||
"istanbul": "^0.4.5", | ||
"jshint": "^2.9.4", | ||
"mocha": "^3.1.0", | ||
"jshint": "^2.9.5", | ||
"mocha": "^3.4.2", | ||
"mocha-lcov-reporter": "^1.2.0", | ||
"wink-nlp-utils": "^1.0.2" | ||
"wink-nlp-utils": "^1.2.1" | ||
}, | ||
"dependencies": { | ||
"wink-helpers": "^1.0.0" | ||
"wink-helpers": "^1.1.1" | ||
} | ||
} |
@@ -77,3 +77,3 @@ | ||
#### defineConfig( config ) | ||
Defines the configuration from the `config` object. This object must define 2 properties viz. (a) `considerOnlyPresence` and `smoothingFactor`. The `considerOnlyPresence` must be a boolean — true indicates a binarized model; default value is false. The `smoothingFactor` defines the value for additive smoothing; its default value is 0.5. The `defineConfig()` must be called before attempting to learn. | ||
Defines the configuration from the `config` object. This object must define 2 properties viz. (a) `considerOnlyPresence` and `smoothingFactor`. The `considerOnlyPresence` must be a boolean — true indicates a binarized model; default value is false. The `smoothingFactor` defines the value for additive smoothing; its default value is **1**. The `defineConfig()` must be called before attempting to learn. | ||
@@ -104,4 +104,4 @@ #### learn( input, label ) | ||
[ | ||
[ 'prepay', 12.052329801050746 ], | ||
[ 'autoloan', -0.5258305619141872 ] | ||
[ 'prepay', 6.169686751688911 ], | ||
[ 'autoloan', -6.169686751688911 ] | ||
] | ||
@@ -108,0 +108,0 @@ ``` |
@@ -85,3 +85,4 @@ // wink-naive-bayes-text-classifier | ||
config.considerOnlyPresence = false; | ||
config.smoothingFactor = 0.5; | ||
// Default smoothingFactor is set to Laplace add+1 smoothing. | ||
config.smoothingFactor = 1; | ||
@@ -106,14 +107,15 @@ // ### Private functions | ||
var logLikelihood = function ( w, label ) { | ||
// If there is a **non-zero** `smoothingFactor`, then use the regular | ||
// formula for computation. When it is **0**, in that case if the `w` | ||
// is not found in vocabulary, return 0; otherwise perform add-1. | ||
// Note, a 0 `smoothingFactor` can lead to `unknown` prediction if non-zero | ||
// of the words are found in the vocabulary. | ||
// To avoid recomputation. | ||
var clw = ( count[ label ][ w ] || 0 ); | ||
return ( | ||
( config.smoothingFactor > 0 ) ? | ||
( Math.log2( ( ( count[ label ][ w ] || 0 ) + config.smoothingFactor ) ) - | ||
Math.log2( words[ label ] + ( voc.size * config.smoothingFactor ) ) ) : | ||
voc.has( w ) ? ( Math.log2( ( ( count[ label ][ w ] || 0 ) + 1 ) ) - | ||
Math.log2( ( words[ label ] + voc.size ) ) ) : | ||
0 | ||
// Numerator will never be **0** due to smoothing. | ||
( Math.log2( ( clw + config.smoothingFactor ) ) - | ||
Math.log2( words[ label ] + ( voc.size * config.smoothingFactor ) ) ) : | ||
// Numerator will be 0 if `w` is not found under the `label`. | ||
( clw ) ? | ||
// Non-zero numerator means normal handling | ||
( Math.log2( clw ) - Math.log2( ( words[ label ] + voc.size ) ) ) : | ||
// Zero numerator: return **0**. | ||
0 | ||
); | ||
@@ -124,3 +126,3 @@ }; // logLikelihood() | ||
// Computes the 1+ smoothed log likelihood `( w | label )`. | ||
// Computes the pre-definable smoothed inverse log likelihood `( w | label )`. | ||
var inverseLogLikelihood = function ( w, label ) { | ||
@@ -141,9 +143,15 @@ // Index and temporary label. | ||
} | ||
// No need to perform `voc.has( w )` check as `odds()` will not call the | ||
// `inverseLogLikelihood()` if `logLikelihood()` returns a **0**. It does | ||
// so to avoid recomputation. See comments in `logLikelihood()`. | ||
return ( Math.log2( ( clw + ( config.smoothingFactor || 1 ) ) ) - | ||
Math.log2( ( wl + ( voc.size * ( config.smoothingFactor || 1 ) ) ) ) | ||
return ( | ||
( config.smoothingFactor > 0 ) ? | ||
// Numerator will never be **0** due to smoothing. | ||
( Math.log2( ( clw + config.smoothingFactor ) ) - | ||
Math.log2( wl + ( voc.size * config.smoothingFactor ) ) ) : | ||
// Numerator may be 0. | ||
( clw ) ? | ||
// Non-zero numerator means normal handling | ||
( Math.log2( clw ) - Math.log2( ( wl + voc.size ) ) ) : | ||
// Zero numerator: return **0**. | ||
0 | ||
); | ||
}; // inverseLogLikelihood() | ||
@@ -167,2 +175,9 @@ | ||
// Filter unknown tokens. | ||
var ivTokens = tokens.filter( function ( e ) { | ||
return voc.has( e ); | ||
} ); | ||
// No known tokens means simply return **0**. | ||
if ( ivTokens.length === 0 ) return 0; | ||
// Compute `samplesNotInLabel`. | ||
@@ -176,6 +191,6 @@ for ( i = 0; i < labelCount; i += 1 ) { | ||
// Update them for the given tokens for `label` | ||
for ( i = 0, imax = tokens.length; i < imax; i += 1 ) { | ||
lh += logLikelihood( tokens[ i ], label ); | ||
for ( i = 0, imax = ivTokens.length; i < imax; i += 1 ) { | ||
lh += logLikelihood( ivTokens[ i ], label ); | ||
// If `lh` is **0** then ilh will be zero - avoid computation. | ||
ilh += ( lh === 0 ) ? 0 : inverseLogLikelihood( tokens[ i ], label ); | ||
ilh += ( lh === 0 ) ? 0 : inverseLogLikelihood( ivTokens[ i ], label ); | ||
} | ||
@@ -216,4 +231,11 @@ | ||
cfg.considerOnlyPresence : false; | ||
config.smoothingFactor = ( isNaN( cfg.smoothingFactor ) ) ? | ||
0 : Math.max( Math.min( cfg.smoothingFactor, 1 ), 0 ); | ||
// If smoothing factor is undefined set it to lapalce add+1 smoothing. | ||
var sf = ( cfg.smoothingFactor === undefined ) ? 1 : parseFloat( cfg.smoothingFactor ); | ||
// Throw error for a value beyond 0-1 or NaN. | ||
if ( isNaN( sf ) || ( sf < 0 ) || ( sf > 1 ) ) { | ||
throw Error( 'winkNBTC: smoothing factor must be a number between 0 & 1, instead found: ' + JSON.stringify( sf ) ); | ||
} | ||
// All good, set smoothingFactor as `sf`. | ||
config.smoothingFactor = sf; | ||
return true; | ||
@@ -220,0 +242,0 @@ }; // defineConfig() |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
62060
518
Updatedwink-helpers@^1.1.1