New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

natural-content

Package Overview
Dependencies
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

natural-content - npm Package Compare versions

Comparing version 1.0.0 to 1.0.1

105

index.js

@@ -46,6 +46,6 @@ var _ = require('underscore');

if (withStopWords) {
return _.filter(words, function(word){return (word !== ''); });
return _.filter(words, function(word){return (word !== '') && ! _.isNumber(word); });
}
else {
return _.filter(words, function(word){return (word !== '' && stopwords.indexOf(word) === -1); });
return _.filter(words, function(word){return (word !== '' && ! _.isNumber(word) && stopwords.indexOf(word) === -1); });
}

@@ -87,3 +87,3 @@

*/
function getTf(words, n, nbrDocs) {
function getTf(words, n, stats) {

@@ -98,6 +98,20 @@ var ws = words;

_.keys(count).forEach(function(key) {
tfs[key] = count[key]/max;
if (nbrDocs) {
nbrDocs[key] = nbrDocs[key] ? ++nbrDocs[key] : 1;
_.keys(count).forEach(function(word) {
// Calculate the tf for this word
tfs[word] = count[word]/max;
// Update stats
if (stats) {
// update the number of documents for this word
stats.nbrDocsByWords[word] = stats.nbrDocsByWords[word] ? ++stats.nbrDocsByWords[word] : 1;
// Calculate sum & register the tf for min & max computation
if (stats.words[word]) {
stats.words[word].tfs.push(tfs[word]);
stats.words[word].tfSum += tfs[word];
}
else {
stats.words[word] = initWordStat(tfs[word]);
}
}

@@ -115,2 +129,3 @@

/**

@@ -121,3 +136,3 @@ * Get the tfIdf for each word of a document

* @param the document represented by an term frequency array.
* the function getTf can be used for generating the term frequency array
* the function getTf can be used for generating the term frequency array
* @param the number of document per word (index = word)

@@ -128,16 +143,16 @@ * @param the number of documents

*/
function geTfIdf(document, nbrDocsByWords, nbrDocs, stats) {
function geTfIdf(document, nbrDocs, stats) {
var tfIdf = {};
_.keys(document.tfs).forEach(function(word){
tfIdf[word] = document.tfs[word] * (Math.log(nbrDocs/nbrDocsByWords[word]) + 1);
if (stats[word]) {
stats[word].min = _.min([tfIdf[word], stats[word].min]);
stats[word].max = _.max([tfIdf[word], stats[word].max]);
stats[word].sum += tfIdf[word];
_.keys(document.tfs).forEach(function(word) {
var idf = Math.log(nbrDocs/stats.nbrDocsByWords[word]) + 1;
tfIdf[word] = document.tfs[word] * idf;
if (stats.words[word]) {
stats.words[word].tfIdfs.push(tfIdf[word]);
stats.words[word].tfIdfSum += tfIdf[word];
stats.words[word].idfs.push(idf);
stats.words[word].idfSum += idf;
}
else {
stats[word] = { min : tfIdf[word], max : tfIdf[word], sum : tfIdf[word] };
}
});

@@ -149,2 +164,18 @@

function initWordStat(tf) {
return {
tfSum : tf,
tfs : [tf],
idfSum : 0,
idfs : [],
tfIdfSum : 0,
tfIdfs : []
};
}
/**

@@ -160,15 +191,23 @@ * Get the TF.IDF for each words found in several documents

var result = {};
var nbrDocsByWords = {};
var stats = {};
var stats = createEmptyStat();
// Calculate the TF of each words for each docs
var tfs = _.map(documents, function(content){ return getTf(getWords(content, withStopWords), n, nbrDocsByWords);});
var tfs = _.map(documents, function(document){ return getTf(getWords(document, withStopWords), n, stats);});
// Calculate the tf.idf for each each docs & produce stat per word
var data = _.map(tfs, function(docTfs) {return geTfIdf(docTfs, nbrDocsByWords, documents.length, stats );});
// Calculate the tf.idf for each each docs & produce stats per word
var data = _.map(tfs, function(docTfs) { return geTfIdf(docTfs, documents.length, stats );});
// Calculate the average tf.idf for each word
stats = _.mapObject(stats, function(val, key) {
val.avg = val.sum/nbrDocsByWords[key];
return val;
// Calculate min, max, avg for tf, idf & tf.idf
stats.words = _.mapObject(stats.words, function(word, key){
word.tfMin = _.min(word.tfs);
word.tfMax = _.max(word.tfs);
word.tfAvg = word.tfSum / stats.nbrDocsByWords[key];
word.idfMax = _.max(word.idfs);
word.idfAvg = word.idfSum / stats.nbrDocsByWords[key];
word.tfIdfMin = _.min(word.tfIdfs);
word.tfIdfMax = _.max(word.tfIdfs);
word.tfIdfAvg = word.tfIdfSum / stats.nbrDocsByWords[key];
return word;
});

@@ -179,3 +218,2 @@

result.stats = stats;
result.nbrDocsByWords = nbrDocsByWords;

@@ -185,8 +223,9 @@ return result;

function createEmptyStat() {
return {
nbrDocsByWords : [],
words : []
};
}
/*
tf = nombre occurence du terme/ nombre d'occurence du terme le plus fréquent
idf = log(nbr doc/nbr doc ayant le terme) + 1
*/
exports.getStatements = getStatements;

@@ -193,0 +232,0 @@ exports.getWords = getWords;

{
"name": "natural-content",
"version": "1.0.0",
"version": "1.0.1",
"description": "A set of natural functions like tf.idf, extract words & n-grams, ... (experimental project)",

@@ -5,0 +5,0 @@ "main": "index.js",

var assert = require("assert");
var _ = require("underscore");
var natural = require("../index.js");

@@ -7,8 +8,8 @@

var documents = [
"word1 word2 word3 word4 word5 word6. word7 word1 word8 word9 word10 word11 word6. word1 word12 word13. word1 word1",
"word2 word7 word8 word9 word10 word11.",
"word1 word2 word3 word4 word5 word6. word7 word1 word8 word9 word10 word11 word6. word1 word12 word13. word1 word1 ",
"word2 word7 word8 word9 word10 word7 word11 word7 word11 word11 word11 word11.",
" word7 word2" ];
it('Statements', function() {
var stats = natural.getStatements("word1 word2 word3 word4 word5 word6. word7 word1 word8 word9 word10 word11 word6. word1 word12 word13");
var stats = natural.getStatements("word1 word2 word3 word4 :word5 word6. word7 word1, word8 word9 word10 word11 word6. word1 word12 word13");
assert(stats.length === 3);

@@ -58,4 +59,12 @@

var info = natural.getTfIdfs(documents, 1, false);
console.log(info);
var info = natural.getTfIdfs(documents, 1, false);
//console.log(info);
//console.log(info.stats.words['word7']);
console.log("Word,TF Avg,TF Min,TF Max,IDF Avg,TF.IDF Sum,TF.IDF Avg");
_.keys(info.stats.words).forEach(function(word) {
//console.log(">> ", info.stats.words[word]);
console.log(word + "," + info.stats.words[word].tfAvg + "," + info.stats.words[word].tfMin + "," + info.stats.words[word].tfMax + "," +
info.stats.words[word].idfAvg + ',' + info.stats.words[word].tfIdfSum + ',' + info.stats.words[word].tfIdfAvg);
});
});

@@ -62,0 +71,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc