natural-content
Advanced tools
Comparing version 1.0.4 to 1.0.5
45
index.js
@@ -106,8 +106,10 @@ var _ = require('underscore'); | ||
if (stats) { | ||
// update the number of documents for this word | ||
stats.nbrDocsByWords[word] = stats.nbrDocsByWords[word] ? ++stats.nbrDocsByWords[word] : 1; | ||
// Calculate sum & register the tf for min & max computation | ||
if (stats.words.has(word) && stats.words.get(word).tfs) { | ||
var wordStat = stats.words.get(word); | ||
if (stats.has(word) && stats.get(word).tfs) { | ||
var wordStat = stats.get(word); | ||
// update the number of documents for this word | ||
wordStat.nbrDocsByWords++ ; | ||
// Add the tf in the list of all tfs for this word | ||
wordStat.tfs.push(tfs[word]); | ||
@@ -118,3 +120,3 @@ wordStat.tfSum += tfs[word]; | ||
var newWordStat = initWordStat(word, tfs[word]); | ||
stats.words.set(word, newWordStat); | ||
stats.set(word, newWordStat); | ||
} | ||
@@ -149,7 +151,7 @@ } | ||
_.keys(document.tfs).forEach(function(word) { | ||
var idf = Math.log(nbrDocs/stats.nbrDocsByWords[word]) + 1; | ||
var idf = Math.log(nbrDocs/stats.get(word).nbrDocsByWords) + 1; | ||
tfIdf[word] = document.tfs[word] * idf; | ||
if (stats.words.has(word) && stats.words.get(word).tfIdfs && stats.words.get(word).idfs) { | ||
var wordStat = stats.words.get(word); | ||
if (stats.has(word) && stats.get(word).tfIdfs && stats.get(word).idfs) { | ||
var wordStat = stats.get(word); | ||
@@ -179,3 +181,3 @@ wordStat.tfIdfs.push(tfIdf[word]); | ||
var result = {}; | ||
var stats = createEmptyStat(); | ||
var stats = new Map(); | ||
@@ -188,16 +190,15 @@ // Calculate the TF of each words for each docs | ||
// Calculate stats : min, max, avg for tf, idf & tf.idf | ||
for (var wordStat of stats.values()) { | ||
// Calculate min, max, avg for tf, idf & tf.idf | ||
for (var wordStat of stats.words.values()) { | ||
wordStat.tfMin = _.min(wordStat.tfs); | ||
wordStat.tfMax = _.max(wordStat.tfs); | ||
wordStat.tfAvg = wordStat.tfSum / stats.nbrDocsByWords[wordStat.word]; | ||
wordStat.tfAvg = wordStat.tfSum / wordStat.nbrDocsByWords; | ||
wordStat.idfMax = _.max(wordStat.idfs); | ||
wordStat.idfAvg = wordStat.idfSum / stats.nbrDocsByWords[wordStat.word]; | ||
wordStat.idfAvg = wordStat.idfSum / wordStat.nbrDocsByWords; | ||
wordStat.tfIdfMin = _.min(wordStat.tfIdfs); | ||
wordStat.tfIdfMax = _.max(wordStat.tfIdfs); | ||
wordStat.tfIdfAvg = wordStat.tfIdfSum / stats.nbrDocsByWords[wordStat.word]; | ||
wordStat.tfIdfAvg = wordStat.tfIdfSum / wordStat.nbrDocsByWords; | ||
@@ -213,13 +214,2 @@ } | ||
/** | ||
* | ||
* Create an empty stat data object | ||
* | ||
*/ | ||
function createEmptyStat() { | ||
return { | ||
nbrDocsByWords : {}, | ||
words : new Map() | ||
}; | ||
} | ||
@@ -230,6 +220,7 @@ /** | ||
*/ | ||
function initWordStat(word, tf) { | ||
function initWordStat(word, tf) { | ||
return { | ||
word : word, | ||
nbrDocsByWords : 1, | ||
tfSum : tf, | ||
@@ -236,0 +227,0 @@ tfs : [tf], |
{ | ||
"name": "natural-content", | ||
"version": "1.0.4", | ||
"version": "1.0.5", | ||
"description": "A set of natural functions like tf.idf, extract words & n-grams, ... (experimental project)", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -61,12 +61,12 @@ var assert = require("assert"); | ||
it("tf.idf for a set of document ", function(){ | ||
it("tf.idf for a set of document ", function() { | ||
var info = natural.getTfIdfs(documents, 1, false, "fr"); | ||
console.log("Word,TF Avg,TF Min,TF Max,IDF Avg,TF.IDF Sum,TF.IDF Avg"); | ||
var sorted = _.sortBy(Array.from(info.stats.values()), function(word) { return -word.tfIdfSum;}); | ||
assert(sorted[0].word === "word1"); | ||
//console.log(sorted); | ||
var sorted = _.sortBy(Array.from(info.stats.words.values()), function(word) { return -word.tfIdfSum;}); | ||
console.log(sorted); | ||
}); | ||
}); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
0
19784
1110