natural-content
Advanced tools
Comparing version 1.0.14 to 1.0.15
@@ -122,3 +122,3 @@ const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural'); | ||
*/ | ||
function getWords(text, withStopWords, language) { | ||
function getWords(text, withStopWords = false, language = 'fr') { | ||
const words = text.replace(/[\n\r]/g, WORD_SEPARATOR) // Convert end of line | ||
@@ -183,3 +183,3 @@ .replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs | ||
*/ | ||
function getTopKeywords(documents, nbrKeywords) { | ||
function getTopKeywords(documents, nbrKeywords, language = 'fr') { | ||
PorterStemmerFr.attach(); | ||
@@ -189,3 +189,3 @@ | ||
documents.forEach((d) => tfidf.addDocument(d.tokenizeAndStem())); | ||
documents.forEach((d) => tfidf.addDocument(getWords(d, false, language))); | ||
@@ -192,0 +192,0 @@ // Get the 2 first main terms from the stems |
{ | ||
"name": "natural-content", | ||
"version": "1.0.14", | ||
"version": "1.0.15", | ||
"description": "A set of natural functions like tf.idf, extract words & n-grams, remove diacritics, ... (experimental project)", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -27,3 +27,3 @@ const assert = require('assert'); | ||
it('top keywords', () => { | ||
const stats = natural.getTopKeywords([ 'word1 word2 word3 word4 word1 word6 word1 word2 word2 word1 word2 word2 word2 word2' ], 2); | ||
const stats = natural.getTopKeywords([ 'word1 du word2 du word3 du word4 word1 du word6 du word1 du word2 du word2 du word1 word2 word2 word2 word2' ], 2); | ||
@@ -30,0 +30,0 @@ assert(stats.length === 2); |
29704