Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

natural-content

Package Overview
Dependencies
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

natural-content - npm Package Compare versions

Comparing version 1.0.13 to 1.0.14

37

index.js

@@ -0,1 +1,2 @@

const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural');
const diacritics = require('./lib/diacritics.js');

@@ -173,2 +174,36 @@

/**
* getTopKeywords - Return a list of the main keywords found in a set of documents
* based on TfIdf
*
* @param {Arrays} documents The list of the documents
* @param {number} nbrKeywords The number of keywords to return
* @returns {Arrays} The list of keywords
*/
function getTopKeywords(documents, nbrKeywords) {
PorterStemmerFr.attach();
const tfidf = new TfIdf();
documents.forEach((d) => tfidf.addDocument(d.tokenizeAndStem()));
// Get the 2 first main terms from the stems
const terms = tfidf.listTerms(0).slice(0, nbrKeywords).map((token) => token.term);
const tokenizer = new AggressiveTokenizerFr();
const tokens = tokenizer.tokenize(documents.join('\n'));
return terms.map((t) => findword(t, tokens));
}
function findword(stem, tokens) {
for (const token of tokens) {
if (token.includes(stem)) {
return token;
}
}
return stem;
}
exports.isFirstCharUpperCase = isFirstCharUpperCase;

@@ -187,1 +222,3 @@

exports.getNgrams = getNgrams;
exports.getTopKeywords = getTopKeywords;

3

package.json
{
"name": "natural-content",
"version": "1.0.13",
"version": "1.0.14",
"description": "A set of natural functions like tf.idf, extract words & n-grams, remove diacritics, ... (experimental project)",

@@ -12,2 +12,3 @@ "main": "index.js",

"dependencies": {
"natural": "^0.6.3",
"underscore": "^1.9.1"

@@ -14,0 +15,0 @@ },

@@ -26,2 +26,11 @@ const assert = require('assert');

it('top keywords', () => {
const stats = natural.getTopKeywords([ 'word1 word2 word3 word4 word1 word6 word1 word2 word2 word1 word2 word2 word2 word2' ], 2);
assert(stats.length === 2);
assert(stats[0] === 'word2');
// console.log(natural.getStatements(txt));
});
it('Special caracters', () => {

@@ -28,0 +37,0 @@ const text = 'ceci est un texte en français ! sans caractères spéciaux !§($€) # 123 avant-hier';

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc