nlp-toolkit
Advanced tools
Comparing version 0.2.4 to 0.2.5
@@ -13,2 +13,3 @@ /** | ||
var through2 = require('through2'); | ||
var FreqDist = require('./components').FreqDist; | ||
@@ -19,5 +20,19 @@ | ||
*/ | ||
function frequency() { | ||
var _freq = {}; | ||
function frequency(text, options) { | ||
if (!options && Object.prototype.toString.call(text) !== '[object Array]') { | ||
options = text; | ||
text = ''; | ||
} | ||
options = options || {}; | ||
if (text) { | ||
return FreqDist(text); | ||
} | ||
var _freq = FreqDist(); | ||
return through2.obj(function (chunk, enc, callback) { | ||
var _chunk = (typeof chunk === 'object' && Object.prototype.toString.call(chunk) !== '[object Array]') ? chunk.text : chunk; | ||
@@ -27,21 +42,11 @@ if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]') { | ||
} | ||
_chunk.forEach(function (token) { | ||
_freq = add(_freq, token, 1); | ||
}); | ||
return callback(); | ||
if (options.cache) { | ||
_freq.add(_chunk); | ||
return callback(); | ||
} | ||
return callback(null, FreqDist(_chunk)); | ||
}, function (callback) { | ||
var _freqArray = Object.keys(_freq).map(function (item) { | ||
return { | ||
token: item, | ||
count: _freq[item] | ||
}; | ||
}); | ||
_freqArray.sort(function (a, b) { | ||
if (a.count > b.count) { | ||
return -1; | ||
} else { | ||
return 1 | ||
} | ||
}); | ||
this.push(_freqArray); | ||
if (options.cache) { | ||
this.push(_freq); | ||
} | ||
return callback(); | ||
@@ -52,11 +57,3 @@ }); | ||
function add(_freq, token, count) { | ||
if (!_freq.hasOwnProperty(token)) { | ||
_freq[token] = 0; | ||
} | ||
_freq[token] += count; | ||
return _freq; | ||
} | ||
/** | ||
@@ -63,0 +60,0 @@ * EXPORTS. |
@@ -13,2 +13,3 @@ /** | ||
var through2 = require('through2'); | ||
var Idf = require('./components').Idf; | ||
@@ -19,16 +20,31 @@ | ||
*/ | ||
function idf() { | ||
function idf(text, options) { | ||
var countTokens = {}; | ||
var sentences = 0; | ||
if (!options && Object.prototype.toString.call(text) !== '[object Array]') { | ||
options = text; | ||
text = ''; | ||
} | ||
options = options || {}; | ||
if (text) { | ||
return Idf(text); | ||
} | ||
var _idf = Idf(); | ||
return through2.obj(function (chunk, enc, callback) { | ||
sentences++; | ||
for (var key in chunk) { | ||
if (!countTokens.hasOwnProperty(key)) { | ||
countTokens[key] = 0; | ||
} | ||
countTokens[key] += 1; | ||
var _chunk = (chunk.hasOwnProperty('text')) ? chunk.text : chunk; | ||
if (!_chunk || Object.prototype.toString.call(_chunk) !== '[object Array]' && !_chunk.hasOwnProperty('tokens')) { | ||
return callback(new Error('Cannot use document ' + JSON.stringify(chunk))); | ||
} | ||
return callback(null, chunk); | ||
_idf.add(_chunk); | ||
return callback(); | ||
}, function (callback) { | ||
this.push(_idf); | ||
return callback(); | ||
}); | ||
@@ -35,0 +51,0 @@ |
@@ -24,2 +24,3 @@ /** | ||
var calculate = require('./calculate'); | ||
var components = require('./components'); | ||
@@ -42,3 +43,4 @@ | ||
filters: filters, | ||
calculate: calculate | ||
calculate: calculate, | ||
components: components | ||
}; |
@@ -26,3 +26,3 @@ { | ||
}, | ||
"version": "0.2.4", | ||
"version": "0.2.5", | ||
"keywords": [ | ||
@@ -29,0 +29,0 @@ "nlp", |
@@ -19,22 +19,22 @@ /** | ||
*/ | ||
describe('nlp.sentences', function() { | ||
describe('# of sentences', function () { | ||
it('should have 7 sentences', function (done) { | ||
var nlp = NlpToolkit(); | ||
var sentences = []; | ||
fs.createReadStream(path.resolve(__dirname, './texts.txt')) | ||
.pipe(es.split()) | ||
.pipe(nlp.sentences()) | ||
.on('data', function (sentence) { | ||
sentences[sentences.length] = sentence; | ||
}) | ||
.on('end', function () { | ||
assert.lengthOf(sentences, 7, 'array has length of 7'); | ||
done(); | ||
}) | ||
.on('error', function (err) { | ||
throw err; | ||
}); | ||
}); | ||
}); | ||
}); | ||
// describe('nlp.sentences', function() { | ||
// describe('# of sentences', function () { | ||
// it('should have 7 sentences', function (done) { | ||
// var nlp = NlpToolkit(); | ||
// var sentences = []; | ||
// fs.createReadStream(path.resolve(__dirname, './texts.txt')) | ||
// .pipe(es.split()) | ||
// .pipe(nlp.sentences()) | ||
// .on('data', function (sentence) { | ||
// sentences[sentences.length] = sentence; | ||
// }) | ||
// .on('end', function () { | ||
// assert.lengthOf(sentences, 7, 'array has length of 7'); | ||
// done(); | ||
// }) | ||
// .on('error', function (err) { | ||
// throw err; | ||
// }); | ||
// }); | ||
// }); | ||
// }); |
764653
51
1166