Socket
Socket
Sign inDemoInstall

wink-naive-bayes-text-classifier

Package Overview
Dependencies
Maintainers
3
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

wink-naive-bayes-text-classifier - npm Package Compare versions

Comparing version 1.1.4 to 1.2.0

.jsdoc.json

31

package.json
{
"name": "wink-naive-bayes-text-classifier",
"version": "1.1.4",
"version": "1.2.0",
"description": "Configurable Naive Bayes Classifier for text with cross-validation support",

@@ -15,7 +15,7 @@ "keywords": [

"scripts": {
"pretest": "npm run lint && npm run hint",
"pretest": "npm run lint && npm run docs",
"test": "istanbul cover _mocha ./test/",
"coveralls": "istanbul cover _mocha --report lcovonly -- -R spec && cat ./coverage/lcov.info | coveralls && rm -rf ./coverage",
"docs": "docco -o ./docs/ ./src/*.js",
"hint": "jshint ./src/*.js ./test/*.js",
"sourcedocs": "docker -i src -o ./sourcedocs --sidebar no",
"docs": "jsdoc src/*.js -c .jsdoc.json",
"lint": "eslint ./src/*.js ./test/*.js"

@@ -28,21 +28,22 @@ },

"author": "Sanjaya Kumar Saxena",
"license": "AGPL-3.0",
"license": "AGPL-3.0-only",
"bugs": {
"url": "https://github.com/winkjs/wink-naive-bayes-text-classifier/issues"
},
"homepage": "https://github.com/winkjs/wink-naive-bayes-text-classifier#readme",
"homepage": "http://winkjs.org/wink-naive-bayes-text-classifier",
"devDependencies": {
"chai": "^4.1.2",
"coveralls": "^3.0.0",
"docco": "^0.8.0",
"eslint": "^4.13.1",
"istanbul": "^0.4.5",
"jshint": "^2.9.5",
"mocha": "^4.0.1",
"chai": "^4.2.0",
"coveralls": "^3.0.2",
"docdash": "^1.0.0",
"docker": "^1.0.0",
"eslint": "^5.6.1",
"istanbul": "^1.1.0-alpha.1",
"jsdoc": "^3.5.5",
"mocha": "^5.2.0",
"mocha-lcov-reporter": "^1.3.0"
},
"dependencies": {
"wink-helpers": "^1.2.0",
"wink-nlp-utils": "^1.7.0"
"wink-helpers": "^1.5.0",
"wink-nlp-utils": "^1.9.1"
}
}

@@ -6,9 +6,9 @@

### [![Build Status](https://api.travis-ci.org/winkjs/wink-naive-bayes-text-classifier.svg?branch=master)](https://travis-ci.org/winkjs/wink-naive-bayes-text-classifier) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-naive-bayes-text-classifier/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-naive-bayes-text-classifier?branch=master) [![Inline docs](http://inch-ci.org/github/winkjs/wink-naive-bayes-text-classifier.svg?branch=master)](http://inch-ci.org/github/winkjs/wink-naive-bayes-text-classifier) [![dependencies Status](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier/status.svg)](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier) [![devDependencies Status](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier/dev-status.svg)](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier?type=dev)
### [![Build Status](https://api.travis-ci.org/winkjs/wink-naive-bayes-text-classifier.svg?branch=master)](https://travis-ci.org/winkjs/wink-naive-bayes-text-classifier) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-naive-bayes-text-classifier/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-naive-bayes-text-classifier?branch=master) [![Inline docs](http://inch-ci.org/github/winkjs/wink-naive-bayes-text-classifier.svg?branch=master)](http://inch-ci.org/github/winkjs/wink-naive-bayes-text-classifier) [![dependencies Status](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier/status.svg)](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier) [![devDependencies Status](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier/dev-status.svg)](https://david-dm.org/winkjs/wink-naive-bayes-text-classifier?type=dev) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby)
<img align="right" src="https://decisively.github.io/wink-logos/logo-title.png" width="100px" >
Classify text, analyse sentiments, recognize user intents for chatbot using **`wink-naive-bayes-text-classifier`**. It is a part of [wink](https://www.npmjs.com/~sanjaya) — a growing family of high quality packages for Statistical Analysis, Natural Language Processing and Machine Learning in NodeJS.
Classify text, analyse sentiments, recognize user intents for chatbot using **`wink-naive-bayes-text-classifier`**. It is a part of [wink](http://winkjs.org/) — a growing family of high quality packages for Statistical Analysis, Natural Language Processing and Machine Learning in NodeJS.
It's [API](#api) offers a rich set of features:
It's [API](http://winkjs.org/wink-naive-bayes-text-classifier/NaiveBayesTextClassifier.html) offers a rich set of features:

@@ -22,3 +22,3 @@ 1. Configure text preparation task such as **amplify negation**, **tokenize**, **stem**, **remove stop words**, and **propagate negation** using [wink-nlp-utils](https://www.npmjs.com/package/wink-nlp-utils) or any other package of your choice.

## Installation
### Installation
Use [npm](https://www.npmjs.com/package/wink-naive-bayes-text-classifier) to install:

@@ -30,3 +30,3 @@ ```

## Example
### Example
```javascript

@@ -72,65 +72,13 @@

## API
### Documentation
Check out the [Naive Bayes Text Classifier](http://winkjs.org/wink-naive-bayes-text-classifier/) API documentation to learn more.
#### definePrepTasks( tasks )
Defines the text preparation `tasks` to transform raw incoming text into an array of tokens required during `learn()`, `evaluate()` and `predict()` operations. The `tasks` should be an array of functions. The first function in this array must accept a string as input; and the last function must return an array of tokens as JavaScript Strings. Each function must accept one input argument and return a single value. `definePrepTasks` returns the count of `tasks`.
As illustrated in the usage, [wink-nlp-utils](https://www.npmjs.com/package/wink-nlp-utils) offers a rich set of such functions.
#### defineConfig( config )
Defines the configuration from the `config` object. This object must define 2 properties viz. (a) `considerOnlyPresence` and `smoothingFactor`. The `considerOnlyPresence` must be a boolean — true indicates a binarized model; default value is false. The `smoothingFactor` defines the value for additive smoothing; its default value is **1**. The `defineConfig()` must be called before attempting to learn.
#### learn( input, label )
Simply learns that the `input` belongs to the `label`. If the input is a JavaScript String, then `definePrepTasks()` must be called before learning.
#### consolidate()
Consolidates the learning. It is a prerequisite for `evaluate()` and/or `predict()`.
#### evaluate( input, label )
It is used to evaluate the learning against a test data set. The `input` is used to predict the label, which is compared with the `label` to populate a confusion matrix.
#### metrics()
It computes a detailed metrics consisting of macro-averaged *precision*, *recall* and *f-measure* along with their label-wise values and the *confusion matrix*.
#### predict( input )
Predicts the label for the `input`. If it is unable to predict then it returns a value **`'unknown'`**.
#### computeOdds( input )
Computes the log base-2 of odds of every label for the `input`; and returns the array of `[ label, odds ]` in descending
order of `odds`. Here is an example of the returned array:
```javascript
[
[ 'prepay', 6.169686751688911 ],
[ 'autoloan', -6.169686751688911 ]
]
```
If it is unable to make prediction then it returns a value **`[ [ 'unknown', 0 ] ]`**.
#### exportJSON()
The learning can be exported as JSON text that may be saved in a file.
#### importJSON( json )
An existing JSON learning can be imported for prediction. It is essential to `definePrepTasks()` and `consolidate()` before attempting to predict.
#### stats()
Returns basic stats of learning in terms of count of samples under each label, total words, and the size of vocabulary.
#### reset()
It completely resets the classifier by re-initializing all the learning related variables, except the preparatory tasks. It is useful during cross fold-validation.
## Need Help?
### Need Help?
If you spot a bug and the same has not yet been reported, raise a new [issue](https://github.com/winkjs/wink-naive-bayes-text-classifier/issues) or consider fixing it and sending a pull request.
## Copyright & License
**wink-naive-bayes-text-classifier** is copyright 2017 [GRAYPE Systems Private Limited](http://graype.in/).
### Copyright & License
**wink-naive-bayes-text-classifier** is copyright 2017-18 [GRAYPE Systems Private Limited](http://graype.in/).
It is licensed under the under the terms of the GNU Affero General Public License as published by the Free
Software Foundation, version 3 of the License.

@@ -48,3 +48,15 @@ // wink-naive-bayes-text-classifier

// cross-validation.
var textNBC = function () {
/**
*
* Creates an instance of a {@link NaiveBayesTextClassifier}.
*
* @return {NaiveBayesTextClassifier} object conatining set of API methods for tasks like configuration,
* data ingestion, learning, and prediction etc.
* @example
* // Load wink Naive Bayes Text Classifier.
* var naiveBayesTextClassifier = require( 'wink-naive-bayes-text-classifier' );
* // Create your instance of classifier.
* var myClassifier = naiveBayesTextClassifier();
*/
var naiveBayesTextClassifier = function () {
// Total samples encountered under each label during learning.

@@ -78,3 +90,8 @@ var samples = Object.create( null );

var fmeasure = Object.create( null );
// Returned!
/**
* @classdesc Naive Bayes Text Classifier class.
* @class NaiveBayesTextClassifier
* @hideconstructor
*/
var methods = Object.create( null );

@@ -93,5 +110,11 @@ // Define unknown prediction.

// #### Prepare Input
// Prepares the `input` by building a pipeline of tasks defined in the variable
// `pTasks` via `definePrepTasks()`
/**
*
* Prepares the `input` by building a pipeline of tasks defined in the variable
* `pTasks` via `definePrepTasks()`.
*
* @param {string} input usually a text
* @return {string[]} tokens.
* @private
*/
var prepareInput = function ( input ) {

@@ -107,3 +130,11 @@ var processedInput = input;

// Computes the pre-definable smoothed log likelihood `( w | label )`.
/**
*
* Computes the pre-definable smoothed log likelihood `( w | label )`.
*
* @param {string} w word or token.
* @param {string} label i.e. class.
* @return {number} smoothed log likelihood.
* @private
*/
var logLikelihood = function ( w, label ) {

@@ -128,3 +159,11 @@ // To avoid recomputation.

// Computes the pre-definable smoothed inverse log likelihood `( w | label )`.
/**
*
* Computes the pre-definable smoothed inverse log likelihood `( w | label )`.
*
* @param {string} w word or token.
* @param {string} label i.e. class.
* @return {number} smoothed inverse log likelihood.
* @private
*/
var inverseLogLikelihood = function ( w, label ) {

@@ -163,2 +202,11 @@ // Index and temporary label.

// Computes the odds for `( tokens | label )`.
/**
*
* Computes the odds for `( tokens | label )`.
*
* @param {string[]} tokens of the sentence.
* @param {string} label i.e. class of sentence.
* @return {number} odds for `( tokens | label )`.
* @private
*/
var odds = function ( tokens, label ) {

@@ -214,10 +262,21 @@ // Total number of samples encountered during training.

// Defines the `considerOnlyPresence` and `smoothingFactor` parameters. The
// `considerOnlyPresence` is a boolean parameter. An incorrect value is
// forced to `false`. Setting `considerOnlyPresence` to `true` ignores
// the frequency of each token and instead only considers it's presence.
// The `smoothingFactor` can have any value between 0 and 1. If the input
// value > 1 can have any value between 0 and 1. If the input value > 1
// then it is set to **1** and if it is <0 then it is set to **0**.
// The config can not be set once the learning has started.
/**
*
* Defines the configuration for naive bayes text classifier. This
* must be called before attempting to [learn](#learn); in other words it can not be
* set once learning has started.
*
* @method NaiveBayesTextClassifier#defineConfig
* @param {object} cfg defines the configuration in terms of the following
* parameters:
* @param {boolean} [considerOnlyPresence=false] true indicates a binarized model.
* @param {number} [smoothingFactor=1] defines the value for additive smoothing.
* It can have any value between 0 and 1.
* @return {boolean} Always true.
* @example
* myClassifier.defineConfig( { considerOnlyPresence: true, smoothingFactor: 0.5 } );
* // -> true
* @throws Error if `cfg` is not a valid Javascript object, or `smoothingFactor` is invalid,
* or an attempt to define configuration is made after learning starts.
*/
var defineConfig = function ( cfg ) {

@@ -246,8 +305,31 @@ if ( learned ) {

// Defines the `tasks` required to prepare the input for `learn()` and `predict()`
// The `tasks` should be an array of functions; using these function a simple
// pipeline is built to serially transform the input to the output.
// It validates the `tasks` before updating the `pTasks`.
// If validation fails it throws error; otherwise it sets the
// `pTasks` and returns length of `pTask` array.
// It sets the `pTasks` and returns length of `pTask` array.
/**
* Defines the text preparation `tasks` to transform raw incoming
* text into tokens required during
* [`learn()`](#learn), [`evaluate()`](#evaluate) and [`predict()`](#predict) operations.
* The `tasks` should be an array of functions;
* using these function a simple pipeline is built to serially transform the
* input to the output.
*
* @method NaiveBayesTextClassifier#definePrepTasks
* @param {function[]} tasks the first function
* in this array must accept a string as input and the last function must
* return tokens i.e. array of strings. Please refer to example.
* @return {number} The number of functions in `task` array.
* @example
* // Load wink NLP utilities
* var nlp = require( 'wink-nlp-utils' );
* // Define the text preparation tasks.
* myClassifier.definePrepTasks( [
* // Simple tokenizer to convert input text in to tokens
* nlp.string.tokenize0,
* // Removes stop words from the input tokens
* nlp.tokens.removeWords,
* // Stems each token into its base form
* nlp.tokens.stem
* ] );
* // -> 3
* @throws Error if `tasks` is not an array of functions.
*/
var definePrepTasks = function ( tasks ) {

@@ -272,2 +354,17 @@ if ( !helpers.array.isArray( tasks ) ) {

// If learning was successful then it returns `true`.
/**
*
* Learns from the example pair of `input` and its `label`.
*
* @method NaiveBayesTextClassifier#learn
* @param {string|string[]} input if it is a string, then [`definePrepTasks()`](#definePrepTasks)
* must be called before learning so that `input` string is transformed
* into tokens on the fly.
* @param {string} label of class to which `input` belongs.
* @return {boolean} Always true.
* @example
* myClassifier.learn( 'I need loan for a new vehicle', 'autoloan' );
* // -> true
* @throws Error if learnings have been already [consolidated](#consolidate).
*/
var learn = function ( input, label ) {

@@ -300,2 +397,15 @@ // No point in learning further, if learnings so far have been consolidated.

// 2. Initializes the confusion matrix and metrics.
/**
*
* Consolidates the learning. It is a prerequisite for [`evaluate()`](#evaluate)
* and/or [`predict()`](#predict).
*
* @method NaiveBayesTextClassifier#consolidate
* @return {boolean} Always true.
* @example
* myClassifier.consolidate();
* // -> true
* @throws Error if training data belongs to only a single class label or
* the training data is too small for learning.
*/
var consolidate = function () {

@@ -337,2 +447,17 @@ var row, col;

// is actually the **log2** of odds.
/**
* Computes the log base-2 of odds of every label for the input; and returns
* the array of `[ label, odds ]` in descending order of odds.
*
* @method NaiveBayesTextClassifier#computeOdds
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @return {array[]} Array of `[ label, odds ]` in descending order of odds.
* @example
* myClassifier.computeOdds( 'I want to pay my car loan early' );
* // -> [
* [ 'prepay', 6.169686751688911 ],
* [ 'autoloan', -6.169686751688911 ]
* ]
*/
var computeOdds = function ( input ) {

@@ -364,2 +489,15 @@ // Predict only if learnings have been consolidated!

// It throws error if the learnings have not been consolidated.
/**
*
* Predicts the class label for the `input`. If it is unable to predict then it
* returns a value **`unknown`**.
*
* @method NaiveBayesTextClassifier#predict
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @return {String} The predicted class label for the `input`.
* @example
* myClassifier.predict( 'I want to pay my car loan early' );
* // -> prepay
*/
var predict = function ( input ) {

@@ -369,7 +507,27 @@ // Contains label & the corresponding odds pairs.

return ( allOdds[ 0 ][ 0 ] );
};
}; // predict()
// #### Stats
// Returns basic stats of learning.
/**
* Returns basic stats of learning in terms of count of samples under
* each label, total words, and the size of vocabulary.
*
* @method NaiveBayesTextClassifier#stats
* @return {object} An object containing count of samples under
* each label, total words, and the size of vocabulary.
* @example
* myClassifier.stats();
* // -> {
* // labelWiseSamples: {
* // autoloan: 5,
* // prepay: 4
* // },
* // labelWiseWords: {
* // autoloan: 36,
* // prepay: 26
* // },
* // vocabulary: 24
* // };
*/
var stats = function () {

@@ -387,3 +545,3 @@ return (

);
}; // predict()
}; // stats()

@@ -393,2 +551,12 @@ // #### Export JSON

// Returns the learnings, without any consolidation check, in JSON format.
/**
* Exports the learning as a JSON, which may be saved as a text file for
* later use via [`importJSON()`](#importjson).
*
* @method NaiveBayesTextClassifier#exportJSON
* @return {string} Learning in JSON format.
* @example
* myClassifier.exportJSON();
* // returns JSON.
*/
var exportJSON = function ( ) {

@@ -408,2 +576,13 @@ var vocArray = [];

// validation.
/**
* It completely resets the classifier by re-initializing all the learning
* related variables, except the preparatory tasks. It is useful during
* cross fold-validation.
*
* @method NaiveBayesTextClassifier#reset
* @return {boolean} Always true.
* @example
* myClassifier.reset();
* // -> true
*/
var reset = function () {

@@ -433,2 +612,12 @@ // Reset values of variables that are associated with learning; Therefore

// returns `true`. Note, importing leads to resetting the classifier.
/**
* Imports an existing JSON learning for prediction.
* It is essential to [`definePrepTasks()`]()#definepreptasks and
* [`consolidate()`](#consolidate) before attempting to predict.
*
* @method NaiveBayesTextClassifier#importJSON
* @param {JSON} json containing learnings in as exported by [`exportJSON`](#exportjson).
* @return {boolean} Always true.
* @throws Error if `json` is invalid.
*/
var importJSON = function ( json ) {

@@ -476,5 +665,20 @@ if ( !json ) {

// then it throws error; errors may be thrown by the `predict()`. If
// prediction fails (nunknown), then it does not uppdate
// prediction fails (unknown), then it does not uppdate
// the confusion matrix and returns `false`; otherwise it updates the matrix
// and returns `true`.
/**
*
* Evaluates the learning against a test data set.
* The `input` is used to predict the class label, which is compared with the
* actual class `label` to populate confusion matrix incrementally.
*
* @method NaiveBayesTextClassifier#evaluate
* @param {String|String[]} input is either text or tokens determined by the
* choice of [`preparatory tasks`](#definePrepTasks).
* @param {string} label of class to which `input` belongs.
* @return {boolean} Always true.
* @example
* myClassifier.evaluate( 'can i close my loan', 'prepay' );
* // -> true
*/
var evaluate = function ( input, label ) {

@@ -503,2 +707,43 @@ // In case of unknown label, indicate failure

// it returns an object containing summary metrics along with the details.
/**
*
* Computes a detailed metrics consisting of macro-averaged precision, recall
* and f-measure along with their label-wise values and the confusion matrix.
*
* @method NaiveBayesTextClassifier#metrics
* @return {object} Detailed metrics.
* @example
* // Assuming that evaluation has been already carried out
* JSON.stringify( myClassifier.metrics(), null, 2 );
* // -> {
* // "avgPrecision": 0.75,
* // "avgRecall": 0.75,
* // "avgFMeasure": 0.6667,
* // "details": {
* // "confusionMatrix": {
* // "prepay": {
* // "prepay": 1,
* // "autoloan": 1
* // },
* // "autoloan": {
* // "prepay": 0,
* // "autoloan": 1
* // }
* // },
* // "precision": {
* // "prepay": 0.5,
* // "autoloan": 1
* // },
* // "recall": {
* // "prepay": 1,
* // "autoloan": 0.5
* // },
* // "fmeasure": {
* // "prepay": 0.6667,
* // "autoloan": 0.6667
* // }
* // }
* // }
* @throws Error if attempt to generate metrics is made prior to proper evaluation.
*/
var metrics = function () {

@@ -586,5 +831,5 @@ if ( !evaluated ) {

return ( methods );
};
}; // naiveBayesTextClassifier()
// Export textNBC.
module.exports = textNBC;
module.exports = naiveBayesTextClassifier;
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc