natural-content - npm Package Compare versions

Comparing version 1.2.2 to 1.2.3

index.js

		@@ -18,2 +18,11 @@ const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural');
		/**
		* Check if a statement contains an acronym which is a substring in uppercase
		* @param {string} statement the statement
		* @returns {boolean} true if the statement contains an acronym
		*/
		function containsAcronym(statement) {
		return /^(.*)([A-Z]{2,})/.test(statement);
		}

		/**
		* getStatements - Get all statements from a text
		@@ -26,21 +35,21 @@ *
		return text.replace(/[\n\r]/g, WORD_SEPARATOR) // Convert end of line
		.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/ /gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/ +/g, WORD_SEPARATOR) // remove multiple spaces
		.replace('...', STATEMENT_SEPARATOR)
		.replace(/[.]{3}/g, `.${ STATEMENT_SEPARATOR }`)
		.replace(/[.]/g, `.${ STATEMENT_SEPARATOR }`)
		.replace(/[!]/g, `!${ STATEMENT_SEPARATOR }`)
		.replace(/[?]/g, `?${ STATEMENT_SEPARATOR }`)
		.split(STATEMENT_SEPARATOR)
		.reduce((result, t) => {
		if (t.trim() === '') {
		return result;
		}
		.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/ /gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/ +/g, WORD_SEPARATOR) // remove multiple spaces
		.replace('...', STATEMENT_SEPARATOR)
		.replace(/[.]{3}/g, `.${STATEMENT_SEPARATOR}`)
		.replace(/[.]/g, `.${STATEMENT_SEPARATOR}`)
		.replace(/[!]/g, `!${STATEMENT_SEPARATOR}`)
		.replace(/[?]/g, `?${STATEMENT_SEPARATOR}`)
		.split(STATEMENT_SEPARATOR)
		.reduce((result, t) => {
		if (t.trim() === '') {
		return result;
		}

		result.push(t.trim());
		result.push(t.trim());

		return result;
		}, []);
		return result;
		}, []);
		}
		@@ -60,6 +69,6 @@
		const cleanText = text.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/[\n\r]/g, WORD_SEPARATOR)
		.replace(/ /gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/[\|&’«»'"\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR);
		.replace(/[\n\r]/g, WORD_SEPARATOR)
		.replace(/ /gi, WORD_SEPARATOR)// remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/[\|&’«»'"\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR);

		@@ -95,6 +104,6 @@ const lower = cleanText.toLowerCase();
		return text.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/[\n\r]/g, WORD_SEPARATOR)
		.replace(/[\n]/g, WORD_SEPARATOR)
		.replace(/\s+/g, WORD_SEPARATOR)
		.trim();
		.replace(/[\n\r]/g, WORD_SEPARATOR)
		.replace(/[\n]/g, WORD_SEPARATOR)
		.replace(/\s+/g, WORD_SEPARATOR)
		.trim();
		}
		@@ -132,9 +141,9 @@
		const words = text.replace(/[\n\r]/g, WORD_SEPARATOR) // Convert end of line
		.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/ /gi, WORD_SEPARATOR) // remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/['’«»";:,.\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR) // Remove punctuations
		.replace(/\s+/g, WORD_SEPARATOR) // remove multiple spaces
		.toLowerCase()
		.split(WORD_SEPARATOR);
		.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs
		.replace(/ /gi, WORD_SEPARATOR) // remove HTML entities, only non breaking space
		.replace(/(<([^>]+)>)/ig, WORD_SEPARATOR) // remove HTML tags
		.replace(/['’«»";:,.\/(\/)\/!\/?\\-]/g, WORD_SEPARATOR) // Remove punctuations
		.replace(/\s+/g, WORD_SEPARATOR) // remove multiple spaces
		.toLowerCase()
		.split(WORD_SEPARATOR);

		@@ -148,3 +157,3 @@ // Remove empty string

		const { stopwords } = require(`./lib/stopwords-${ language.toLowerCase() }`);
		const { stopwords } = require(`./lib/stopwords-${language.toLowerCase()}`);

		@@ -178,3 +187,3 @@ return words.filter((word) => word !== '' && stopwords.indexOf(removeDiacritics(word)) === -1);
		// Convert the ngram array into a ngram string and add it in the result list
		result.push(slice.reduce((memo, word) => memo ? `${ memo } ${ word }` : word));
		result.push(slice.reduce((memo, word) => memo ? `${memo} ${word}` : word));
		}
		@@ -222,2 +231,4 @@

		exports.containsAcronym = containsAcronym;

		exports.getStatements = getStatements;
		@@ -224,0 +235,0 @@

package.json

		{
		"name": "natural-content",
		"version": "1.2.2",
		"version": "1.2.3",
		"description": "A set of natural functions like extracting words & n-grams, remove diacritics, get top keywords, ... (experimental project)",
		@@ -5,0 +5,0 @@ "main": "index.js",

test/test.js

		@@ -23,3 +23,4 @@ const assert = require('assert');

		// console.log(natural.getStatements(txt));
		assert(natural.containsAcronym('this is mister LOL'));
		assert(!natural.containsAcronym('this is mister John Smith'));
		});
		@@ -26,0 +27,0 @@

natural-content - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics