synonym-optimizer
Advanced tools
Comparing version 2.5.3 to 2.5.4
export declare type Languages = 'en_US' | 'fr_FR' | 'de_DE' | 'it_IT' | string; | ||
export declare function getStandardStopWords(lang: Languages): string[]; | ||
export declare function getStopWords(lang: Languages, stopWordsToAdd: string[], stopWordsToRemove: string[], stopWordsOverride: string[]): string[]; | ||
export declare function extractWords(input: string): string[]; | ||
export declare function extractWords(input: string, lang: Languages): string[]; | ||
interface WordsWithPos { | ||
@@ -20,4 +20,5 @@ [key: string]: number[]; | ||
} | ||
export declare function getStemmedWords(text: string, stopwords: string[], lang: Languages): string[]; | ||
export declare function scoreAlternative(lang: Languages, alternative: string, stopWordsToAdd: string[], stopWordsToRemove: string[], stopWordsOverride: string[], identicals: string[][], debugHolder: DebugHolder): number; | ||
export declare function getBest(lang: Languages, alternatives: string[], stopWordsToAdd: string[], stopWordsToRemove: string[], stopWordsOverride: string[], identicals: string[][]): number; | ||
export {}; |
@@ -12,2 +12,3 @@ "use strict"; | ||
const italianStemmer = require("snowball-stemmer.jsx/dest/italian-stemmer.common.js"); | ||
const rosaenlg_filter_1 = require("rosaenlg-filter"); | ||
const fullySupportedLanguages = ['en_US', 'de_DE', 'fr_FR', 'it_IT']; | ||
@@ -54,3 +55,4 @@ // exported for testing purposes | ||
exports.getStopWords = getStopWords; | ||
function extractWords(input) { | ||
function extractWords(input, lang) { | ||
// console.log(`tokenizing: ${input}`); | ||
const myTokenizer = new tokenizer(); | ||
@@ -65,9 +67,20 @@ myTokenizer.defineConfig({ | ||
const tokenized = myTokenizer.tokenize(input); | ||
//console.log(tokenized); | ||
const res = []; | ||
// console.log(`tokenized: ${tokenized}`); | ||
let res = []; | ||
tokenized.forEach(function (elt) { | ||
if (elt.tag != 'alien') { | ||
// no alien tags and no html elements | ||
if (elt.tag != 'alien' && rosaenlg_filter_1.blockLevelHtmlElts.indexOf(elt.value) == -1 && rosaenlg_filter_1.inlineHtmlElts.indexOf(elt.value) == -1) { | ||
res.push(elt.value); | ||
} | ||
}); | ||
if (lang == 'fr_FR') { | ||
// we just leave [Pp]uisqu [Jj]usqu [Ll]orsqu as they are | ||
const regexp = new RegExp("^(D|d|Q|q|L|l|S|s|J|j|T|t|M|m|N|n)'", 'g'); | ||
res = res.map((elt) => { | ||
return elt.replace(regexp, ''); | ||
}); | ||
// sometimes it results in having empty elements | ||
res = res.filter(elt => elt.length > 0); | ||
} | ||
// console.log(`res: ${res}`); | ||
return res; | ||
@@ -157,9 +170,5 @@ } | ||
exports.getScore = getScore; | ||
function scoreAlternative(lang, alternative, stopWordsToAdd, stopWordsToRemove, stopWordsOverride, identicals, debugHolder) { | ||
// console.log(stemmer.stemWord("baby")); | ||
// console.log(stopWordsToAdd); | ||
const stopwords = getStopWords(lang, stopWordsToAdd, stopWordsToRemove, stopWordsOverride); | ||
// console.log(stopwords); | ||
const filteredAlt = []; | ||
const extractedWords = extractWords(alternative) | ||
function getStemmedWords(text, stopwords, lang) { | ||
// console.log(`getStemmedWords: ${text}`); | ||
const res = extractWords(text, lang) | ||
.map(function (alt) { | ||
@@ -170,7 +179,16 @@ return alt.toLowerCase(); | ||
return !stopwords.includes(alt); | ||
}) | ||
.map(elt => { | ||
return stemWordForLang(elt, lang); | ||
}); | ||
//console.log(extractedWords); | ||
extractedWords.forEach(function (extractedWord) { | ||
filteredAlt.push(stemWordForLang(extractedWord, lang)); | ||
}); | ||
// console.log(`getStemmedWords result: ${res}`); | ||
return res; | ||
} | ||
exports.getStemmedWords = getStemmedWords; | ||
function scoreAlternative(lang, alternative, stopWordsToAdd, stopWordsToRemove, stopWordsOverride, identicals, debugHolder) { | ||
// console.log(stemmer.stemWord("baby")); | ||
// console.log(stopWordsToAdd); | ||
const stopwords = getStopWords(lang, stopWordsToAdd, stopWordsToRemove, stopWordsOverride); | ||
// console.log(stopwords); | ||
const filteredAlt = getStemmedWords(alternative, stopwords, lang); | ||
if (debugHolder) { | ||
@@ -177,0 +195,0 @@ debugHolder.filteredAlt = filteredAlt; |
{ | ||
"name": "synonym-optimizer", | ||
"version": "2.5.3", | ||
"version": "2.5.4", | ||
"description": "Finds the text which has the least number of repetitions", | ||
@@ -60,2 +60,4 @@ "main": "dist/index.js", | ||
"debug": "^4.1.1", | ||
"rosaenlg-filter": "2.5.4", | ||
"sha1": "^1.1.1", | ||
"snowball-stemmer.jsx": "^0.2.3", | ||
@@ -68,3 +70,3 @@ "stopwords-de": "^0.2.0", | ||
}, | ||
"gitHead": "3dcac44372256f9a23f9016675f63205f3b9a9ee" | ||
"gitHead": "7d80b030b06e15b2c25ae11435d1e47ae07071aa" | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
23393
261
11
+ Addedrosaenlg-filter@2.5.4
+ Addedsha1@^1.1.1
+ Addedbetter-title-case@1.0.1(transitive)
+ Addedcharenc@0.0.2(transitive)
+ Addedcompromise@11.14.3(transitive)
+ Addedcrypt@0.0.2(transitive)
+ Addedefrt-unpack@2.2.0(transitive)
+ Addedfrench-h-muet-aspire@2.5.4(transitive)
+ Addedrosaenlg-filter@2.5.4(transitive)
+ Addedsha1@1.1.1(transitive)
+ Addedtitlecase-french@1.0.1(transitive)