Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

natural-content

Package Overview
Dependencies
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

natural-content - npm Package Compare versions

Comparing version 1.2.2 to 1.2.3

79

index.js

@@ -18,2 +18,11 @@ const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural');

/**
* Check if a statement contains an acronym which is a substring in uppercase
* @param {string} statement the statement
* @returns {boolean} true if the statement contains an acronym
*/
function containsAcronym(statement) {
return /^(.*)([A-Z]{2,})/.test(statement);
}
/**
* getStatements - Get all statements from a text

@@ -26,21 +35,21 @@ *

return text.replace(/[\n\r]/g, WORD_SEPARATOR) // Convert end of line
.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/ /gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/ +/g, WORD_SEPARATOR) // remove multiple spaces
.replace('...', STATEMENT_SEPARATOR)
.replace(/[.]{3}/g, `.${ STATEMENT_SEPARATOR }`)
.replace(/[.]/g, `.${ STATEMENT_SEPARATOR }`)
.replace(/[!]/g, `!${ STATEMENT_SEPARATOR }`)
.replace(/[?]/g, `?${ STATEMENT_SEPARATOR }`)
.split(STATEMENT_SEPARATOR)
.reduce((result, t) => {
if (t.trim() === '') {
return result;
}
.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/&nbsp;/gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/ +/g, WORD_SEPARATOR) // remove multiple spaces
.replace('...', STATEMENT_SEPARATOR)
.replace(/[.]{3}/g, `.${STATEMENT_SEPARATOR}`)
.replace(/[.]/g, `.${STATEMENT_SEPARATOR}`)
.replace(/[!]/g, `!${STATEMENT_SEPARATOR}`)
.replace(/[?]/g, `?${STATEMENT_SEPARATOR}`)
.split(STATEMENT_SEPARATOR)
.reduce((result, t) => {
if (t.trim() === '') {
return result;
}
result.push(t.trim());
result.push(t.trim());
return result;
}, []);
return result;
}, []);
}

@@ -60,6 +69,6 @@

const cleanText = text.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/[\n\r]/g, WORD_SEPARATOR)
.replace(/&nbsp;/gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/[|&’«»'"\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR);
.replace(/[\n\r]/g, WORD_SEPARATOR)
.replace(/&nbsp;/gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/[|&’«»'"\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR);

@@ -95,6 +104,6 @@ const lower = cleanText.toLowerCase();

return text.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/[\n\r]/g, WORD_SEPARATOR)
.replace(/[\n]/g, WORD_SEPARATOR)
.replace(/\s+/g, WORD_SEPARATOR)
.trim();
.replace(/[\n\r]/g, WORD_SEPARATOR)
.replace(/[\n]/g, WORD_SEPARATOR)
.replace(/\s+/g, WORD_SEPARATOR)
.trim();
}

@@ -132,9 +141,9 @@

const words = text.replace(/[\n\r]/g, WORD_SEPARATOR) // Convert end of line
.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/&nbsp;/gi, WORD_SEPARATOR) // remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/['’«»";:,.\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR) // Remove punctuations
.replace(/\s+/g, WORD_SEPARATOR) // remove multiple spaces
.toLowerCase()
.split(WORD_SEPARATOR);
.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
.replace(/&nbsp;/gi, WORD_SEPARATOR) // remove HTML entities, only non breaking space
.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
.replace(/['’«»";:,.\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR) // Remove punctuations
.replace(/\s+/g, WORD_SEPARATOR) // remove multiple spaces
.toLowerCase()
.split(WORD_SEPARATOR);

@@ -148,3 +157,3 @@ // Remove empty string

const { stopwords } = require(`./lib/stopwords-${ language.toLowerCase() }`);
const { stopwords } = require(`./lib/stopwords-${language.toLowerCase()}`);

@@ -178,3 +187,3 @@ return words.filter((word) => word !== '' && stopwords.indexOf(removeDiacritics(word)) === -1);

// Convert the ngram array into a ngram string and add it in the result list
result.push(slice.reduce((memo, word) => memo ? `${ memo } ${ word }` : word));
result.push(slice.reduce((memo, word) => memo ? `${memo} ${word}` : word));
}

@@ -222,2 +231,4 @@

exports.containsAcronym = containsAcronym;
exports.getStatements = getStatements;

@@ -224,0 +235,0 @@

{
"name": "natural-content",
"version": "1.2.2",
"version": "1.2.3",
"description": "A set of natural functions like extracting words & n-grams, remove diacritics, get top keywords, ... (experimental project)",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -23,3 +23,4 @@ const assert = require('assert');

// console.log(natural.getStatements(txt));
assert(natural.containsAcronym('this is mister LOL'));
assert(!natural.containsAcronym('this is mister John Smith'));
});

@@ -26,0 +27,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc