natural-content
Advanced tools
Comparing version 1.0.15 to 1.1.0
15
index.js
@@ -81,8 +81,8 @@ const { PorterStemmerFr, TfIdf, AggressiveTokenizerFr } = require('natural'); | ||
/** | ||
* removeLineBreaks - Remove line breaks in a text | ||
* removeLineBreaks - Remove line breaks & tabs in a text | ||
* | ||
* @param {type} text the text | ||
* @returns {type} the text without line breaks | ||
* @returns {type} the text without line breaks and tabs | ||
*/ | ||
function removeLineBreaks(text) { | ||
function removeLineBreakTabs(text) { | ||
if (!text) { | ||
@@ -92,3 +92,7 @@ return ''; | ||
return text.replace(/(\r\n|\n|\r)/gm, '').trim(); | ||
return text.replace(/[\t]/g, WORD_SEPARATOR) // Remove Tabs | ||
.replace(/[\n\r]/g, WORD_SEPARATOR) | ||
.replace(/[\n]/g, WORD_SEPARATOR) | ||
.replace(/\s+/g, WORD_SEPARATOR) | ||
.trim(); | ||
} | ||
@@ -182,2 +186,3 @@ | ||
* @param {number} nbrKeywords The number of keywords to return | ||
* @param {number} language The language to use for finding the stopwords | ||
* @returns {Arrays} The list of keywords | ||
@@ -219,3 +224,3 @@ */ | ||
exports.removeLineBreaks = removeLineBreaks; | ||
exports.removeLineBreakTabs = removeLineBreakTabs; | ||
@@ -222,0 +227,0 @@ exports.getWords = getWords; |
{ | ||
"name": "natural-content", | ||
"version": "1.0.15", | ||
"description": "A set of natural functions like tf.idf, extract words & n-grams, remove diacritics, ... (experimental project)", | ||
"version": "1.1.0", | ||
"description": "A set of natural functions like extracting words & n-grams, remove diacritics, get top keywords, ... (experimental project)", | ||
"main": "index.js", | ||
@@ -6,0 +6,0 @@ "scripts": { |
@@ -42,2 +42,8 @@ const assert = require('assert'); | ||
it('Line Break, tabs and spaces', () => { | ||
const text = 'Ceci est un texte en français ! \t sans caractères spéciaux ! \n L\'autre texte\n\r'; | ||
const result = natural.removeLineBreakTabs(text); | ||
assert(result === 'Ceci est un texte en français ! sans caractères spéciaux ! L\'autre texte'); | ||
}); | ||
it('apostrophe', () => { | ||
@@ -44,0 +50,0 @@ const text = 'ceci est un texte en français. l\'été sera chaud. Les conditions d\'utilisation de l\'objet'; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
30312
1240