Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

nlp-toolkit

Package Overview
Dependencies
Maintainers
1
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

nlp-toolkit - npm Package Compare versions

Comparing version 0.2.2 to 0.2.3

examples/no_streaming_context.js

4

examples/tokenize_stem_freqDist.js

@@ -20,3 +20,5 @@ /**

.pipe(nlp.stopwords({
words: ['mr', 'mrs', 'miss']
additionalWords: {
all: ['mr', 'mrs', 'miss']
}
}))

@@ -23,0 +25,0 @@ .pipe(nlp.stemmer())

@@ -52,2 +52,3 @@ /**

var DEFAULT_STEMMER = 'en';
var stemmerCache = {};

@@ -58,10 +59,15 @@

*/
function stemmer(options) {
function stemmer(text, options) {
if (!options && Object.prototype.toString.call(text) !== '[object Array]') {
options = text;
text = '';
}
options = options || {};
options.defaultStemmer = options.defaultStemmer || DEFAULT_STEMMER;
var stemmerCache = {
default: new stemmerLookup[options.defaultStemmer]()
};
if (!stemmerCache.default) {
stemmerCache.default = new stemmerLookup[options.defaultStemmer]();
}

@@ -82,2 +88,19 @@ var getLang = function () {

if (text) {
return stemWords(text, options.lang || options.defaultStemmer);
}
function stemWords(text, lang) {
if (!stemmerLookup.hasOwnProperty(lang)) {
console.log(lang + ' is not a supported language for stemming.');
return text;
}
if (!stemmerCache.hasOwnProperty(lang)) {
stemmerCache[lang] = new stemmerLookup[lang]();
}
return text.map(function (token) {
return stemmerCache[lang].stemWord(token);
});
}
debug('defaultStemmer', options.defaultStemmer);

@@ -92,11 +115,4 @@

var lang = getLang(chunk);
if (!stemmerCache.hasOwnProperty(lang)) {
stemmerCache[lang] = new stemmerLookup[lang]();
}
var tokens = stemWords(_chunk, getLang(chunk));
var tokens = _chunk.map(function (token) {
return stemmerCache[lang].stemWord(token);
});
var response;

@@ -103,0 +119,0 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {

@@ -24,2 +24,3 @@ /**

var DEFAULT_LANG = 'en';
var stopwordsCache = {};

@@ -30,4 +31,9 @@

*/
function stopwords(options) {
function stopwords(text, options) {
if (!options && Object.prototype.toString.call(text) !== '[object Array]') {
options = text;
text = '';
}
options = options || {};

@@ -47,6 +53,2 @@ options.defaultLang = options.defaultLang || DEFAULT_LANG;

var stopwordsCache = {
default: getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default')
};
var getLang = function () {

@@ -66,5 +68,24 @@ return 'default';

stopwordsCache.default = getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default');
if (text) {
return removeStopwords(text, options.lang || options.defaultLang);
}
debug('defaultLang', options.defaultLang);
debug('defaultFilename', options.defaultFilename);
function removeStopwords(text, lang) {
if (!stopwordsCache.hasOwnProperty(lang)) {
stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang);
}
return stopwordsCache[lang]()
.then(function (stopwordsCache) {
var tokens = text.filter(function (token) {
return !stopwordsCache.hasOwnProperty(token);
});
return tokens;
});
}
return through2.obj(function (chunk, enc, callback) {

@@ -77,14 +98,4 @@

var lang = getLang(chunk);
if (!stopwordsCache.hasOwnProperty(lang)) {
stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang);
}
stopwordsCache[lang]()
.then(function (stopwordsCache) {
var tokens = _chunk.filter(function (token) {
return !stopwordsCache.hasOwnProperty(token);
});
removeStopwords(_chunk, getLang(chunk))
.then(function (tokens) {
var response;

@@ -97,14 +108,12 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') {

}
return callback(null, response);
})
.catch(callback);
})
.catch(function (err) {
return callback(err);
});
});
}
function getStopwordsWrapper(filename, additionalWords, lang) {
var stopwordsCache;
var singleStopwordsCache;
var _words = [].concat(additionalWords.all);

@@ -114,4 +123,4 @@ _words = _words.concat(additionalWords[lang] || additionalWords.default);

return new Promise(function (resolve, reject) {
if (stopwordsCache) {
return resolve(stopwordsCache);
if (singleStopwordsCache) {
return resolve(singleStopwordsCache);
}

@@ -125,7 +134,7 @@ var _stopwordsCache = {};

.on('end', function () {
stopwordsCache = _stopwordsCache;
singleStopwordsCache = _stopwordsCache;
_words.forEach(function (_word) {
stopwordsCache[_word] = 1;
singleStopwordsCache[_word] = 1;
})
return resolve(stopwordsCache);
return resolve(singleStopwordsCache);
})

@@ -132,0 +141,0 @@ .on('error', function (err) {

@@ -29,4 +29,9 @@ /**

*/
function tokenizer(options) {
function tokenizer(text, options) {
if (!options && typeof text === 'object') {
options = text;
text = '';
}
options = options || {};

@@ -45,2 +50,6 @@ options.characters = (options.characters instanceof RegExp) ? options.characters : DEFAULT_CHARACTERS;

if (text) {
return tokenize(text, options);
}
return through2.obj(function (chunk, enc, callback) {

@@ -54,14 +63,3 @@

var tokens = _chunk.split(options.separator).map(function (token) {
token = token.replace(options.characters, '');
if (options.eliminateNumbers) {
token = token.replace(/^\d+$/, '');
}
if (options.toLowerCase) {
token = token.toLowerCase();
}
return token;
}).filter(function (token) {
return !!token || options.emptyStrings;
});
var tokens = tokenize(_chunk, options);

@@ -83,3 +81,22 @@ var response;

function tokenize(text, options) {
var tokens = text.split(options.separator).map(function (token) {
token = token.replace(options.characters, '');
if (options.eliminateNumbers) {
token = token.replace(/^\d+$/, '');
}
if (options.toLowerCase) {
token = token.toLowerCase();
}
return token;
}).filter(function (token) {
return !!token || options.emptyStrings;
});
return tokens;
}
/**

@@ -86,0 +103,0 @@ * EXPORTS.

@@ -26,3 +26,3 @@ {

},
"version": "0.2.2",
"version": "0.2.3",
"keywords": [

@@ -29,0 +29,0 @@ "nlp",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc