natural-content
Advanced tools
Comparing version 1.0.13 to 1.0.14
37
index.js
@@ -0,1 +1,2 @@ | ||
const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural'); | ||
const diacritics = require('./lib/diacritics.js'); | ||
@@ -173,2 +174,36 @@ | ||
/** | ||
* getTopKeywords - Return a list of the main keywords found in a set of documents | ||
* based on TfIdf | ||
* | ||
* @param {Arrays} documents The list of the documents | ||
* @param {number} nbrKeywords The number of keywords to return | ||
* @returns {Arrays} The list of keywords | ||
*/ | ||
function getTopKeywords(documents, nbrKeywords) { | ||
PorterStemmerFr.attach(); | ||
const tfidf = new TfIdf(); | ||
documents.forEach((d) => tfidf.addDocument(d.tokenizeAndStem())); | ||
// Get the 2 first main terms from the stems | ||
const terms = tfidf.listTerms(0).slice(0, nbrKeywords).map((token) => token.term); | ||
const tokenizer = new AggressiveTokenizerFr(); | ||
const tokens = tokenizer.tokenize(documents.join('\n')); | ||
return terms.map((t) => findword(t, tokens)); | ||
} | ||
function findword(stem, tokens) { | ||
for (const token of tokens) { | ||
if (token.includes(stem)) { | ||
return token; | ||
} | ||
} | ||
return stem; | ||
} | ||
exports.isFirstCharUpperCase = isFirstCharUpperCase; | ||
@@ -187,1 +222,3 @@ | ||
exports.getNgrams = getNgrams; | ||
exports.getTopKeywords = getTopKeywords; |
{ | ||
"name": "natural-content", | ||
"version": "1.0.13", | ||
"version": "1.0.14", | ||
"description": "A set of natural functions like tf.idf, extract words & n-grams, remove diacritics, ... (experimental project)", | ||
@@ -12,2 +12,3 @@ "main": "index.js", | ||
"dependencies": { | ||
"natural": "^0.6.3", | ||
"underscore": "^1.9.1" | ||
@@ -14,0 +15,0 @@ }, |
@@ -26,2 +26,11 @@ const assert = require('assert'); | ||
it('top keywords', () => { | ||
const stats = natural.getTopKeywords([ 'word1 word2 word3 word4 word1 word6 word1 word2 word2 word1 word2 word2 word2 word2' ], 2); | ||
assert(stats.length === 2); | ||
assert(stats[0] === 'word2'); | ||
// console.log(natural.getStatements(txt)); | ||
}); | ||
it('Special caracters', () => { | ||
@@ -28,0 +37,0 @@ const text = 'ceci est un texte en français ! sans caractères spéciaux !§($€) # 123 avant-hier'; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
29639
1230
2
+ Addednatural@^0.6.3
+ Addedafinn-165@1.0.4(transitive)
+ Addedapparatus@0.0.10(transitive)
+ Addedcall-bind@1.0.7(transitive)
+ Addeddefine-data-property@1.1.4(transitive)
+ Addedes-define-property@1.0.0(transitive)
+ Addedes-errors@1.3.0(transitive)
+ Addedfunction-bind@1.1.2(transitive)
+ Addedget-intrinsic@1.2.4(transitive)
+ Addedgopd@1.0.1(transitive)
+ Addedhas-property-descriptors@1.0.2(transitive)
+ Addedhas-proto@1.0.3(transitive)
+ Addedhas-symbols@1.0.3(transitive)
+ Addedhasown@2.0.2(transitive)
+ Addedisarray@2.0.5(transitive)
+ Addedjson-stable-stringify@1.1.1(transitive)
+ Addedjsonify@0.0.1(transitive)
+ Addednatural@0.6.3(transitive)
+ Addedobject-keys@1.1.1(transitive)
+ Addedset-function-length@1.2.2(transitive)
+ Addedsylvester@0.0.12(transitive)