nlp-toolkit
Advanced tools
Comparing version 0.2.0 to 0.2.1
@@ -19,3 +19,3 @@ /** | ||
*/ | ||
fs.createReadStream('./sentiment.txt') | ||
fs.createReadStream('./texts/sentiment.txt') | ||
.pipe(es.split()) | ||
@@ -22,0 +22,0 @@ .pipe(nlp.prepare(function (sentence) { |
@@ -16,3 +16,3 @@ /** | ||
*/ | ||
fs.createReadStream('./pride_prejudice.txt') | ||
fs.createReadStream('./texts/pride_prejudice.txt') | ||
.pipe(es.split()) | ||
@@ -19,0 +19,0 @@ .pipe(nlp.tokenizer()) |
@@ -35,14 +35,2 @@ /** | ||
}); | ||
// var __xxx = _results.reduce(function (p, c) { | ||
// p.total += c.total; | ||
// p.positive += c.positive; | ||
// p.negative += c.negative; | ||
// return p; | ||
// }, { | ||
// total: 0, | ||
// positive: 0, | ||
// negative: 0 | ||
// }); | ||
// __xxx.accuracy = Math.round(__xxx.positive * 100 / __xxx.total) / 100; | ||
// results[results.length] = __xxx; | ||
}); | ||
@@ -49,0 +37,0 @@ return callback(); |
@@ -60,11 +60,26 @@ /** | ||
options = options || {}; | ||
options.lang = options.lang || DEFAULT_STEMMER; | ||
if (!stemmerLookup.hasOwnProperty(options.lang)) { | ||
throw new Error('Stemmer for ' + options.lang + ' does not exist.'); | ||
options.defaultStemmer = options.defaultStemmer || DEFAULT_STEMMER; | ||
var stemmerCache = { | ||
default: new stemmerLookup[options.defaultStemmer]() | ||
}; | ||
var getLang = function () { | ||
return 'default'; | ||
}; | ||
if (options.lang) { | ||
if (typeof options.lang === 'function') { | ||
getLang = options.lang; | ||
} else { | ||
getLang = function () { | ||
return options.lang; | ||
}; | ||
} | ||
} | ||
var stemmer = new stemmerLookup[options.lang](); | ||
debug('lang', options.lang); | ||
debug('defaultStemmer', options.defaultStemmer); | ||
return through2.obj(function (chunk, enc, callback) { | ||
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk; | ||
@@ -74,5 +89,12 @@ if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') { | ||
} | ||
var lang = getLang(chunk); | ||
if (!stemmerCache.hasOwnProperty(lang)) { | ||
stemmerCache[lang] = new stemmerLookup[lang](); | ||
} | ||
var tokens = _chunk.map(function (token) { | ||
return stemmer.stemWord(token); | ||
return stemmerCache[lang].stemWord(token); | ||
}); | ||
var response; | ||
@@ -85,3 +107,5 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') { | ||
} | ||
return callback(null, response); | ||
}); | ||
@@ -88,0 +112,0 @@ } |
@@ -32,20 +32,55 @@ /** | ||
options = options || {}; | ||
var lang = options.lang || DEFAULT_LANG; | ||
var filename = options.filename || path.resolve(__dirname, './stopwords/' + lang + '.txt'); | ||
var words = [].concat(options.words || []); | ||
var getStopwords = getStopwordsWrapper(filename); | ||
options.defaultLang = options.defaultLang || DEFAULT_LANG; | ||
options.defaultFilename = options.defaultFilename || getFilename(options.defaultLang); | ||
options.additionalWords = options.additionalWords || {}; | ||
debug('lang', lang); | ||
debug('filename', filename); | ||
if (Object.prototype.toString.call(options.additionalWords) === '[object Array]') { | ||
options.additionalWords = { | ||
all: options.additionalWords | ||
}; | ||
} else { | ||
options.additionalWords.all = (options.additionalWords.all) ? [].concat(options.additionalWords.all) : []; | ||
options.additionalWords.default = (options.additionalWords.default) ? [].concat(options.additionalWords.default) : []; | ||
} | ||
var stopwordsCache = { | ||
default: getStopwordsWrapper(options.defaultFilename, options.additionalWords, 'default') | ||
}; | ||
var getLang = function () { | ||
return 'default'; | ||
}; | ||
if (options.lang) { | ||
if (typeof options.lang === 'function') { | ||
getLang = options.lang; | ||
} else { | ||
getLang = function () { | ||
return options.lang; | ||
}; | ||
} | ||
} | ||
debug('defaultLang', options.defaultLang); | ||
debug('defaultFilename', options.defaultFilename); | ||
return through2.obj(function (chunk, enc, callback) { | ||
getStopwords() | ||
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk; | ||
if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') { | ||
return callback(new Error('Chunk is not an array ' + JSON.stringify(chunk))); | ||
} | ||
var lang = getLang(chunk); | ||
if (!stopwordsCache.hasOwnProperty(lang)) { | ||
stopwordsCache[lang] = getStopwordsWrapper(getFilename(lang), options.additionalWords, lang); | ||
} | ||
stopwordsCache[lang]() | ||
.then(function (stopwordsCache) { | ||
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk; | ||
if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') { | ||
return callback(new Error('Chunk is not an array ' + JSON.stringify(chunk))); | ||
} | ||
var tokens = _chunk.filter(function (token) { | ||
return !stopwordsCache.hasOwnProperty(token) && words.indexOf(token) === -1; | ||
return !stopwordsCache.hasOwnProperty(token); | ||
}); | ||
var response; | ||
@@ -58,3 +93,5 @@ if (Object.prototype.toString.call(chunk) !== '[object Array]') { | ||
} | ||
return callback(null, response); | ||
}) | ||
@@ -67,4 +104,6 @@ .catch(function (err) { | ||
function getStopwordsWrapper(filename) { | ||
function getStopwordsWrapper(filename, additionalWords, lang) { | ||
var stopwordsCache; | ||
var _words = [].concat(additionalWords.all); | ||
_words = _words.concat(additionalWords[lang] || additionalWords.default); | ||
return function getStopwords() { | ||
@@ -83,2 +122,5 @@ return new Promise(function (resolve, reject) { | ||
stopwordsCache = _stopwordsCache; | ||
_words.forEach(function (_word) { | ||
stopwordsCache[_word] = 1; | ||
}) | ||
return resolve(stopwordsCache); | ||
@@ -93,2 +135,6 @@ }) | ||
function getFilename(lang) { | ||
return path.resolve(__dirname, './stopwords/' + lang + '.txt'); | ||
} | ||
/** | ||
@@ -95,0 +141,0 @@ * EXPORTS. |
@@ -26,3 +26,3 @@ { | ||
}, | ||
"version": "0.2.0", | ||
"version": "0.2.1", | ||
"keywords": [ | ||
@@ -29,0 +29,0 @@ "nlp", |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
752727
46
867
0
6