nlp-toolkit - npm Package Compare versions

Comparing version 0.2.2 to 0.2.3

examples/no_streaming_context.js

examples/tokenize_stem_freqDist.js

		@@ -20,3 +20,5 @@ /**
		.pipe(nlp.stopwords({
		words: ['mr', 'mrs', 'miss']
		additionalWords: {
		all: ['mr', 'mrs', 'miss']
		}
		}))
		@@ -23,0 +25,0 @@ .pipe(nlp.stemmer())

lib/stemmer.js

		@@ -52,2 +52,3 @@ /**
		var DEFAULT_STEMMER = 'en';
		var stemmerCache = {};

		@@ -58,10 +59,15 @@
		*/
		function stemmer(options) {
		function stemmer(text, options) {

		if (!options && Object.prototype.toString.call(text) !== '[object Array]') {
		options = text;
		text = '';
		}

		options = options \|\| {};
		options.defaultStemmer = options.defaultStemmer \|\| DEFAULT_STEMMER;

		var stemmerCache = {
		default: new stemmerLookup[options.defaultStemmer]()
		};
		if (!stemmerCache.default) {
		stemmerCache.default = new stemmerLookup[options.defaultStemmer]();
		}

		@@ -82,2 +88,19 @@ var getLang = function () {

		if (text) {
		return stemWords(text, options.lang \|\| options.defaultStemmer);
		}

		function stemWords(text, lang) {
		if (!stemmerLookup.hasOwnProperty(lang)) {
		console.log(lang + ' is not a supported language for stemming.');
		return text;
		}
		if (!stemmerCache.hasOwnProperty(lang)) {
		stemmerCache[lang] = new stemmerLookup[lang]();
		}
		return text.map(function (token) {
		return stemmerCache[lang].stemWord(token);
		});
		}

		debug('defaultStemmer', options.defaultStemmer);
		@@ -92,11 +115,4 @@

		var lang = getLang(chunk);
		if (!stemmerCache.hasOwnProperty(lang)) {
		stemmerCache[lang] = new stemmerLookup[lang]();
		}
		var tokens = stemWords(_chunk, getLang(chunk));

		var tokens = _chunk.map(function (token) {
		return stemmerCache[lang].stemWord(token);
		});

		var response;
		@@ -103,0 +119,0 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {

lib/stopwords.js

		@@ -24,2 +24,3 @@ /**
		var DEFAULT_LANG = 'en';
		var stopwordsCache = {};

		@@ -30,4 +31,9 @@
		*/
		function stopwords(options) {
		function stopwords(text, options) {

		if (!options && Object.prototype.toString.call(text) !== '[object Array]') {
		options = text;
		text = '';
		}

		options = options \|\| {};
		@@ -47,6 +53,2 @@ options.defaultLang = options.defaultLang \|\| DEFAULT_LANG;

		var stopwordsCache = {
		default: getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default')
		};

		var getLang = function () {
		@@ -66,5 +68,24 @@ return 'default';

		stopwordsCache.default = getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default');

		if (text) {
		return removeStopwords(text, options.lang \|\| options.defaultLang);
		}

		debug('defaultLang', options.defaultLang);
		debug('defaultFilename', options.defaultFilename);

		function removeStopwords(text, lang) {
		if (!stopwordsCache.hasOwnProperty(lang)) {
		stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang);
		}
		return stopwordsCache[lang]()
		.then(function (stopwordsCache) {
		var tokens = text.filter(function (token) {
		return !stopwordsCache.hasOwnProperty(token);
		});
		return tokens;
		});
		}

		return through2.obj(function (chunk, enc, callback) {
		@@ -77,14 +98,4 @@

		var lang = getLang(chunk);
		if (!stopwordsCache.hasOwnProperty(lang)) {
		stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang);
		}

		stopwordsCache[lang]()
		.then(function (stopwordsCache) {

		var tokens = _chunk.filter(function (token) {
		return !stopwordsCache.hasOwnProperty(token);
		});

		removeStopwords(_chunk, getLang(chunk))
		.then(function (tokens) {
		var response;
		@@ -97,14 +108,12 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {
		}

		return callback(null, response);
		})
		.catch(callback);

		})
		.catch(function (err) {
		return callback(err);
		});
		});

		}

		function getStopwordsWrapper(filename, additionalWords, lang) {
		var stopwordsCache;
		var singleStopwordsCache;
		var _words = [].concat(additionalWords.all);
		@@ -114,4 +123,4 @@ _words = _words.concat(additionalWords[lang] \|\| additionalWords.default);
		return new Promise(function (resolve, reject) {
		if (stopwordsCache) {
		return resolve(stopwordsCache);
		if (singleStopwordsCache) {
		return resolve(singleStopwordsCache);
		}
		@@ -125,7 +134,7 @@ var _stopwordsCache = {};
		.on('end', function () {
		stopwordsCache = _stopwordsCache;
		singleStopwordsCache = _stopwordsCache;
		_words.forEach(function (_word) {
		stopwordsCache[_word] = 1;
		singleStopwordsCache[_word] = 1;
		})
		return resolve(stopwordsCache);
		return resolve(singleStopwordsCache);
		})
		@@ -132,0 +141,0 @@ .on('error', function (err) {

lib/tokenizer.js

		@@ -29,4 +29,9 @@ /**
		*/
		function tokenizer(options) {
		function tokenizer(text, options) {

		if (!options && typeof text === 'object') {
		options = text;
		text = '';
		}

		options = options \|\| {};
		@@ -45,2 +50,6 @@ options.characters = (options.characters instanceof RegExp) ? options.characters : DEFAULT_CHARACTERS;

		if (text) {
		return tokenize(text, options);
		}

		return through2.obj(function (chunk, enc, callback) {
		@@ -54,14 +63,3 @@

		var tokens = _chunk.split(options.separator).map(function (token) {
		token = token.replace(options.characters, '');
		if (options.eliminateNumbers) {
		token = token.replace(/^\d+$/, '');
		}
		if (options.toLowerCase) {
		token = token.toLowerCase();
		}
		return token;
		}).filter(function (token) {
		return !!token \|\| options.emptyStrings;
		});
		var tokens = tokenize(_chunk, options);

		@@ -83,3 +81,22 @@ var response;

		function tokenize(text, options) {

		var tokens = text.split(options.separator).map(function (token) {
		token = token.replace(options.characters, '');
		if (options.eliminateNumbers) {
		token = token.replace(/^\d+$/, '');
		}
		if (options.toLowerCase) {
		token = token.toLowerCase();
		}
		return token;
		}).filter(function (token) {
		return !!token \|\| options.emptyStrings;
		});

		return tokens;

		}


		/**
		@@ -86,0 +103,0 @@ * EXPORTS.

package.json

		@@ -26,3 +26,3 @@ {
		},
		"version": "0.2.2",
		"version": "0.2.3",
		"keywords": [
		@@ -29,0 +29,0 @@ "nlp",

nlp-toolkit - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics