nlp-toolkit - npm Package Compare versions

Comparing version 0.2.3 to 0.2.4

examples/bayes_cross_validation_from_file.js

		@@ -34,4 +34,7 @@ /**
		}))
		.on('data', function (sentence) {
		console.log(sentence);
		.pipe(nlp.crossValidation({
		classifiers: [nlp.classifiers.bayes]
		}))
		.on('data', function (data) {
		console.log(nlp.calculate.accuracy(data));
		})
		@@ -38,0 +41,0 @@ .on('error', function (err) {

examples/texts/sentiment.txt

		@@ -1,1 +0,4 @@
		positive\|amazing, awesome movie!! Yeah!! Oh boy.
		positive\|amazing, awesome movie!! Yeah!! Oh boy.
		positive\|Sweet, this is incredibly, amazing, perfect, great!!
		negative\|terrible, shitty thing. Damn. Sucks!!
		positive\|awesome, cool, amazing!! Yay.

lib/stemmer.js

		@@ -91,3 +91,3 @@ /**
		function stemWords(text, lang) {
		if (!stemmerLookup.hasOwnProperty(lang)) {
		if (!stemmerLookup.hasOwnProperty(lang) && lang !== 'default') {
		console.log(lang + ' is not a supported language for stemming.');
		@@ -94,0 +94,0 @@ return text;

lib/validateClassifier.js

		@@ -20,4 +20,2 @@ /**

		debug('validateClassifier');

		options = options \|\| {};
		@@ -29,7 +27,15 @@ var trainingSet = options.trainingSet \|\| [];

		debug('trainingSet.length', trainingSet.length);
		debug('testSet.length', testSet.length);

		trainingSet.forEach(function (sentence) {
		_classifier.learn(sentence, sentence.feature);
		if (sentence) {
		_classifier.learn(sentence, sentence.feature);
		}
		});

		return testSet.map(function (sentence) {
		if (!sentence) {
		return false;
		}
		return {
		@@ -39,2 +45,4 @@ feature: sentence.feature,
		};
		}).filter(function (sentence) {
		return !!sentence;
		});
		@@ -41,0 +49,0 @@

package.json

		@@ -26,3 +26,3 @@ {
		},
		"version": "0.2.3",
		"version": "0.2.4",
		"keywords": [
		@@ -29,0 +29,0 @@ "nlp",

README.md

		@@ -34,1 +34,83 @@ # Natural Language Processing Toolkit for node.js #

		## Modules ##

		### Tokenizer ###

		```javascript
		.pipe(nlp.tokenizer(options))
		```

		`options`:

		\| attribute \| type \| description \|
		\|-----------\|------\|-------------\|
		\| characters \| RegExp \| regular expression that describes what characters to strip of off (default `/[^\w]/g`). \|
		\| separator \| RegExp \| regular expression that describes where to split words (default `/\s/g`). \|
		\| eliminateNumbers\| boolean \| discard tokens that only contain numbers (default `false`). \|
		\| toLowerCase \| boolean \| transform every token to lower case (default `true`). \|
		\| emptyStrings \| boolean \| keep empty string when through some previous steps tokens result in length === 0 (default `false`). \|

		Tokenizer also work in a non-stream context:

		```javascript
		var tokens = nlp.tokenizer(string, options);
		```

		### Stopwords ###

		```javascript
		.pipe(nlp.stopwords(options))
		```

		`options`:

		\| attribute \| type \| description \|
		\|-----------\|------\|-------------\|
		\| defaultLang \| string \| default language if processed object does not provide a `lang` attribute (default `en`). \|
		\| additionalWords \| object \| add additional stopwords to the list of stopwords \|

		`additionalWords`:

		\| attribute \| type \| description \|
		\|-----------\|------\|-------------\|
		\| all \| array \| list of stopwords to add to every language \|
		\| default \| array \| list of stopwords if language is not supported \|
		\| _lang_ \| array \| list of stopwords specific to _lang_ \|

		Supported languages: `da, de, en, es, fi, fr, hu, it, nl, no, pt, ro, ru, se, tr`.

		Stopwords also work in a non-stream context:

		```javascript
		nlp.stopwords(sentence, options)
		.then(function (tokens) {}})
		.catch(function (err) { console.error(err); });
		```

		### Stemmer ###

		```javascript
		.pipe(nlp.stemmer(options))
		```

		`options`:

		\| attribute \| type \| description \|
		\|-----------\|------\|-------------\|
		\| defaultStemmer \| string \| default stemmer for language if processed object does not provide a `lang` attribute (default `en`). \|

		Supported languages: `da, de, en, es, fi, fr, hu, it, nl, no, pt, ro, ru, se, tr`.

		Stopwords also work in a non-stream context:

		```javascript
		var tokens = nlp.stemmer(sentence, options);
		```

		This module uses the stemmer implementation of [Snowball-Stemmer](https://github.com/shibukawa/snowball-stemmer.jsx).

		### Frequency Distribution ###

		```javascript
		.pipe(nlp.frequency())
		```

		@@ -1,1 +0,4 @@
		positive\|amazing, awesome movie!! Yeah!! Oh boy.
		positive\|amazing, awesome movie!! Yeah!! Oh boy.
		positive\|Sweet, this is incredibly, amazing, perfect, great!!
		negative\|terrible, shitty thing. Damn. Sucks!!
		positive\|awesome, cool, amazing!! Yay.

nlp-toolkit - npm Package Compare versions

Improved metrics