Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

nlp-toolkit

Package Overview
Dependencies
Maintainers
1
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

nlp-toolkit - npm Package Compare versions

Comparing version 0.2.0 to 0.2.1

examples/dynamic_language.js

2

examples/bayes_cross_validation_from_file.js

@@ -19,3 +19,3 @@ /**

*/
fs.createReadStream('./sentiment.txt')
fs.createReadStream('./texts/sentiment.txt')
.pipe(es.split())

@@ -22,0 +22,0 @@ .pipe(nlp.prepare(function (sentence) {

@@ -16,3 +16,3 @@ /**

*/
fs.createReadStream('./pride_prejudice.txt')
fs.createReadStream('./texts/pride_prejudice.txt')
.pipe(es.split())

@@ -19,0 +19,0 @@ .pipe(nlp.tokenizer())

@@ -35,14 +35,2 @@ /**

});
// var __xxx = _results.reduce(function (p, c) {
// p.total += c.total;
// p.positive += c.positive;
// p.negative += c.negative;
// return p;
// }, {
// total: 0,
// positive: 0,
// negative: 0
// });
// __xxx.accuracy = Math.round(__xxx.positive * 100 / __xxx.total) / 100;
// results[results.length] = __xxx;
});

@@ -49,0 +37,0 @@ return callback();

@@ -60,11 +60,26 @@ /**

options = options || {};
options.lang = options.lang || DEFAULT_STEMMER;
if (!stemmerLookup.hasOwnProperty(options.lang)) {
throw new Error('Stemmer for ' + options.lang + ' does not exist.');
options.defaultStemmer = options.defaultStemmer || DEFAULT_STEMMER;
var stemmerCache = {
default: new stemmerLookup[options.defaultStemmer]()
};
var getLang = function () {
return 'default';
};
if (options.lang) {
if (typeof options.lang === 'function') {
getLang = options.lang;
} else {
getLang = function () {
return options.lang;
};
}
}
var stemmer = new stemmerLookup[options.lang]();
debug('lang', options.lang);
debug('defaultStemmer', options.defaultStemmer);
return through2.obj(function (chunk, enc, callback) {
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk;

@@ -74,5 +89,12 @@ if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') {

}
var lang = getLang(chunk);
if (!stemmerCache.hasOwnProperty(lang)) {
stemmerCache[lang] = new stemmerLookup[lang]();
}
var tokens = _chunk.map(function (token) {
return stemmer.stemWord(token);
return stemmerCache[lang].stemWord(token);
});
var response;

@@ -85,3 +107,5 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {

}
return callback(null, response);
});

@@ -88,0 +112,0 @@ }

@@ -32,20 +32,55 @@ /**

options = options || {};
var lang = options.lang || DEFAULT_LANG;
var filename = options.filename || path.resolve(__dirname, './stopwords/' + lang + '.txt');
var words = [].concat(options.words || []);
var getStopwords = getStopwordsWrapper(filename);
options.defaultLang = options.defaultLang || DEFAULT_LANG;
options.defaultFilename = options.defaultFilename || getFilename(options.defaultLang);
options.additionalWords = options.additionalWords || {};
debug('lang', lang);
debug('filename', filename);
if (Object.prototype.toString.call(options.additionalWords) === '[object Array]') {
options.additionalWords = {
all: options.additionalWords
};
} else {
options.additionalWords.all = (options.additionalWords.all) ? [].concat(options.additionalWords.all) : [];
options.additionalWords.default = (options.additionalWords.default) ? [].concat(options.additionalWords.default) : [];
}
var stopwordsCache = {
default: getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default')
};
var getLang = function () {
return 'default';
};
if (options.lang) {
if (typeof options.lang === 'function') {
getLang = options.lang;
} else {
getLang = function () {
return options.lang;
};
}
}
debug('defaultLang', options.defaultLang);
debug('defaultFilename', options.defaultFilename);
return through2.obj(function (chunk, enc, callback) {
getStopwords()
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk;
if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') {
return callback(new Error('Chunk is not an array ' + JSON.stringify(chunk)));
}
var lang = getLang(chunk);
if (!stopwordsCache.hasOwnProperty(lang)) {
stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang);
}
stopwordsCache[lang]()
.then(function (stopwordsCache) {
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk;
if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') {
return callback(new Error('Chunk is not an array ' + JSON.stringify(chunk)));
}
var tokens = _chunk.filter(function (token) {
return !stopwordsCache.hasOwnProperty(token) && words.indexOf(token) === -1;
return !stopwordsCache.hasOwnProperty(token);
});
var response;

@@ -58,3 +93,5 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {

}
return callback(null, response);
})

@@ -67,4 +104,6 @@ .catch(function (err) {

function getStopwordsWrapper(filename) {
function getStopwordsWrapper(filename, additionalWords, lang) {
var stopwordsCache;
var _words = [].concat(additionalWords.all);
_words = _words.concat(additionalWords[lang] || additionalWords.default);
return function getStopwords() {

@@ -83,2 +122,5 @@ return new Promise(function (resolve, reject) {

stopwordsCache = _stopwordsCache;
_words.forEach(function (_word) {
stopwordsCache[_word] = 1;
})
return resolve(stopwordsCache);

@@ -93,2 +135,6 @@ })

function getFilename(lang) {
return path.resolve(__dirname, './stopwords/' + lang + '.txt');
}
/**

@@ -95,0 +141,0 @@ * EXPORTS.

@@ -26,3 +26,3 @@ {

},
"version": "0.2.0",
"version": "0.2.1",
"keywords": [

@@ -29,0 +29,0 @@ "nlp",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc