Comparing version 0.0.13 to 0.0.14
@@ -16,2 +16,3 @@ export type FuzzyMatch = { | ||
export declare const replaceSmartQuotes: (s: string) => string; | ||
export declare const removeDiacritics: (x: string) => string; | ||
export declare const simplify: (x: string) => string; | ||
@@ -22,18 +23,18 @@ export declare const approximateSemanticEquality: (x: string, y: string) => boolean; | ||
export declare const concatRegexp: (x: RegExp, y: RegExp) => RegExp; | ||
export declare const regexpEntireString: (x: RegExp) => RegExp; | ||
export declare const regexpEntireString: ({ source, flags }: RegExp) => RegExp; | ||
export declare const stringToRegexp: (x: string) => RegExp; | ||
export declare const caseInsensitive: (x: RegExp) => RegExp; | ||
export declare const caseInsensitive: ({ source, flags }: RegExp) => RegExp; | ||
export declare const regExpOr: (x: RegExp, y: RegExp) => RegExp; | ||
export declare const selectionGroup: (x: RegExp) => RegExp; | ||
export declare const optional: (x: RegExp) => RegExp; | ||
export declare const zeroOrMore: (x: RegExp) => RegExp; | ||
export declare const oneOrMore: (x: RegExp) => RegExp; | ||
export declare const globalize: (x: RegExp) => RegExp; | ||
export declare const regexpTimes: (min: number, max: number, x: RegExp) => RegExp; | ||
export declare const negativeLookBehind: (x: RegExp) => RegExp; | ||
export declare const selectionGroup: ({ source, flags }: RegExp) => RegExp; | ||
export declare const optional: ({ source, flags }: RegExp) => RegExp; | ||
export declare const zeroOrMore: ({ source, flags }: RegExp) => RegExp; | ||
export declare const oneOrMore: ({ source, flags }: RegExp) => RegExp; | ||
export declare const globalize: ({ source, flags }: RegExp) => RegExp; | ||
export declare const regexpTimes: (min: number, max: number, { source, flags }: RegExp) => RegExp; | ||
export declare const negativeLookBehind: ({ source, flags }: RegExp) => RegExp; | ||
export declare const matchesRegexp: (r: RegExp) => (txt: string) => boolean; | ||
export declare const cleanSpeakers: (s: string) => string; | ||
export declare const ngramsOfAtLeastNWords: (n: number) => (s: string) => string[]; | ||
export declare const wholeWord: (r: RegExp) => RegExp; | ||
export declare const wholeWord: ({ source, flags }: RegExp) => RegExp; | ||
export declare const someKewyordMatches: (keywords: string[]) => (x: string) => boolean; | ||
export declare const urlsInText: (text: string) => [] | RegExpMatchArray; |
@@ -91,3 +91,4 @@ import { includedIn, join, letIn, lowercase, max, nonempty, pipe, range, replace, reverse, sort, split, take, trim, trimWhitespace, } from "gamla"; | ||
const replaceDidgitNames = pipe(replace(/\bten\b/g, "10"), replace(/\bnine\b/g, "9"), replace(/\beight\b/g, "8"), replace(/\bseven\b/g, "7"), replace(/\bsix\b/g, "6"), replace(/\bfive\b/g, "5"), replace(/\bfour\b/g, "4"), replace(/\bthree\b/g, "3"), replace(/\btwo\b/g, "2"), replace(/\bone\b/g, "1"), replace(/\bzero\b/g, "0")); | ||
export const simplify = pipe((x) => x.trim(), replaceSmartQuotes, lowercase, replace(/\s/g, " "), replaceDidgitNames, replace(/\[.*\]/, ""), replace(/[*:'"♪]/g, ""), replace(/[,.?!\n-]/g, " "), replace(/\s+/g, " "), replace(/<\/?i>/g, ""), replace(/\bdoctor\b/g, "dr"), (x) => x.trim()); | ||
export const removeDiacritics = (x) => x.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); | ||
export const simplify = pipe((x) => x.trim(), replaceSmartQuotes, lowercase, replace(/\s/g, " "), replaceDidgitNames, replace(/\[.*\]/, ""), replace(/[*:'"♪]/g, ""), replace(/[,.?!\n-]/g, " "), replace(/\s+/g, " "), replace(/<\/?i>/g, ""), replace(/\bdoctor\b/g, "dr"), removeDiacritics, (x) => x.trim()); | ||
const allEnglishWordsAsSet = new Set(englishWords); | ||
@@ -109,3 +110,3 @@ const fixMissingSpaceInOneWord = (x) => allEnglishWordsAsSet.has(x) ? x : letIn(range(1, x.length - 1).find((index) => allEnglishWordsAsSet.has(x.slice(0, index)) && | ||
export const concatRegexp = (x, y) => new RegExp(x.source + y.source, combineFlags(x, y)); | ||
export const regexpEntireString = (x) => new RegExp(`^${x.source}$`, x.flags); | ||
export const regexpEntireString = ({ source, flags }) => new RegExp(`^${source}$`, flags); | ||
const combineFlags = (x, y) => (x.flags + y.flags) | ||
@@ -116,9 +117,7 @@ .split("") | ||
.replace(/(.)(?=.*\1)/g, ""); | ||
const addFlag = (flag) => (x) => new RegExp(x.source, x.flags.includes(flag) | ||
? x.flags | ||
: (x.flags + flag).split("").sort().join("")); | ||
const addFlag = (flag) => ({ source, flags }) => new RegExp(source, flags.includes(flag) ? flags : (flags + flag).split("").sort().join("")); | ||
export const stringToRegexp = (x) => new RegExp(x.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")); | ||
export const caseInsensitive = addFlag("i"); | ||
export const regExpOr = (x, y) => new RegExp(`(?:${bracketIfNeeded(x.source)}|${bracketIfNeeded(y.source)})`, combineFlags(x, y)); | ||
export const selectionGroup = (x) => new RegExp(`(${x.source})`, x.flags); | ||
export const selectionGroup = ({ source, flags }) => new RegExp(`(${source})`, flags); | ||
const bracketIfNeeded = (s) => (s.startsWith("(") && s.endsWith(")")) || | ||
@@ -128,7 +127,7 @@ (s.startsWith("[") && s.endsWith("]")) | ||
: `(?:${s})`; | ||
export const optional = (x) => new RegExp(`${bracketIfNeeded(x.source)}?`, x.flags); | ||
export const zeroOrMore = (x) => new RegExp(`${bracketIfNeeded(x.source)}*`, x.flags); | ||
export const oneOrMore = (x) => new RegExp(`${bracketIfNeeded(x.source)}+`, x.flags); | ||
export const optional = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}?`, flags); | ||
export const zeroOrMore = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}*`, flags); | ||
export const oneOrMore = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}+`, flags); | ||
export const globalize = addFlag("g"); | ||
export const regexpTimes = (min, max, x) => new RegExp(`${bracketIfNeeded(x.source)}{${min},${max}}`, x.flags); | ||
export const regexpTimes = (min, max, { source, flags }) => new RegExp(`${bracketIfNeeded(source)}{${min},${max}}`, flags); | ||
const namePrefix = ["ms", "mrs", "mr", "dr", "prof"] | ||
@@ -151,3 +150,3 @@ .map((x) => new RegExp(`${x}\\.?`)) | ||
const speakerInEnd = [hyphen, /\s*/, personName, /$/].reduce(concatRegexp); | ||
export const negativeLookBehind = (x) => new RegExp(`(?<!${x.source})`, x.flags); | ||
export const negativeLookBehind = ({ source, flags }) => new RegExp(`(?<!${source})`, flags); | ||
const splitSentences = split(/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=[,!.?:])\s/); | ||
@@ -166,3 +165,3 @@ export const matchesRegexp = (r) => (txt) => r.test(txt); | ||
}; | ||
export const wholeWord = (r) => new RegExp(`(^|${boundry.source})${r.source}($|${boundry.source})`, r.flags); | ||
export const wholeWord = ({ source, flags }) => new RegExp(`(^|${boundry.source})${source}($|${boundry.source})`, flags); | ||
const containsPhrase = (str) => (re) => re.test(str); | ||
@@ -169,0 +168,0 @@ const strToRegexp = (s) => new RegExp(s); |
{ | ||
"name": "silly-nlp", | ||
"version": "0.0.13", | ||
"version": "0.0.14", | ||
"description": "Silly nlp utils", | ||
@@ -5,0 +5,0 @@ "repository": { |
@@ -16,2 +16,3 @@ export type FuzzyMatch = { | ||
export declare const replaceSmartQuotes: (s: string) => string; | ||
export declare const removeDiacritics: (x: string) => string; | ||
export declare const simplify: (x: string) => string; | ||
@@ -22,18 +23,18 @@ export declare const approximateSemanticEquality: (x: string, y: string) => boolean; | ||
export declare const concatRegexp: (x: RegExp, y: RegExp) => RegExp; | ||
export declare const regexpEntireString: (x: RegExp) => RegExp; | ||
export declare const regexpEntireString: ({ source, flags }: RegExp) => RegExp; | ||
export declare const stringToRegexp: (x: string) => RegExp; | ||
export declare const caseInsensitive: (x: RegExp) => RegExp; | ||
export declare const caseInsensitive: ({ source, flags }: RegExp) => RegExp; | ||
export declare const regExpOr: (x: RegExp, y: RegExp) => RegExp; | ||
export declare const selectionGroup: (x: RegExp) => RegExp; | ||
export declare const optional: (x: RegExp) => RegExp; | ||
export declare const zeroOrMore: (x: RegExp) => RegExp; | ||
export declare const oneOrMore: (x: RegExp) => RegExp; | ||
export declare const globalize: (x: RegExp) => RegExp; | ||
export declare const regexpTimes: (min: number, max: number, x: RegExp) => RegExp; | ||
export declare const negativeLookBehind: (x: RegExp) => RegExp; | ||
export declare const selectionGroup: ({ source, flags }: RegExp) => RegExp; | ||
export declare const optional: ({ source, flags }: RegExp) => RegExp; | ||
export declare const zeroOrMore: ({ source, flags }: RegExp) => RegExp; | ||
export declare const oneOrMore: ({ source, flags }: RegExp) => RegExp; | ||
export declare const globalize: ({ source, flags }: RegExp) => RegExp; | ||
export declare const regexpTimes: (min: number, max: number, { source, flags }: RegExp) => RegExp; | ||
export declare const negativeLookBehind: ({ source, flags }: RegExp) => RegExp; | ||
export declare const matchesRegexp: (r: RegExp) => (txt: string) => boolean; | ||
export declare const cleanSpeakers: (s: string) => string; | ||
export declare const ngramsOfAtLeastNWords: (n: number) => (s: string) => string[]; | ||
export declare const wholeWord: (r: RegExp) => RegExp; | ||
export declare const wholeWord: ({ source, flags }: RegExp) => RegExp; | ||
export declare const someKewyordMatches: (keywords: string[]) => (x: string) => boolean; | ||
export declare const urlsInText: (text: string) => [] | RegExpMatchArray; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.urlsInText = exports.someKewyordMatches = exports.wholeWord = exports.ngramsOfAtLeastNWords = exports.cleanSpeakers = exports.matchesRegexp = exports.negativeLookBehind = exports.regexpTimes = exports.globalize = exports.oneOrMore = exports.zeroOrMore = exports.optional = exports.selectionGroup = exports.regExpOr = exports.caseInsensitive = exports.stringToRegexp = exports.regexpEntireString = exports.concatRegexp = exports.quotedTexts = exports.isStopWord = exports.approximateSemanticEquality = exports.simplify = exports.replaceSmartQuotes = exports.equivalence = exports.topByCount = exports.appearMoreThan = exports.majority = exports.suffixesWithPrefix = exports.prefixesWithSuffix = exports.capitalizedSuffix = exports.capitalizedPrefix = exports.paragraphToSentences = exports.fuzzySearch = void 0; | ||
exports.urlsInText = exports.someKewyordMatches = exports.wholeWord = exports.ngramsOfAtLeastNWords = exports.cleanSpeakers = exports.matchesRegexp = exports.negativeLookBehind = exports.regexpTimes = exports.globalize = exports.oneOrMore = exports.zeroOrMore = exports.optional = exports.selectionGroup = exports.regExpOr = exports.caseInsensitive = exports.stringToRegexp = exports.regexpEntireString = exports.concatRegexp = exports.quotedTexts = exports.isStopWord = exports.approximateSemanticEquality = exports.simplify = exports.removeDiacritics = exports.replaceSmartQuotes = exports.equivalence = exports.topByCount = exports.appearMoreThan = exports.majority = exports.suffixesWithPrefix = exports.prefixesWithSuffix = exports.capitalizedSuffix = exports.capitalizedPrefix = exports.paragraphToSentences = exports.fuzzySearch = void 0; | ||
const gamla_1 = require("gamla"); | ||
@@ -100,3 +100,5 @@ const filter_js_1 = require("./deps/deno.land/x/gamla@43.0.0/src/filter.js"); | ||
const replaceDidgitNames = (0, gamla_1.pipe)((0, gamla_1.replace)(/\bten\b/g, "10"), (0, gamla_1.replace)(/\bnine\b/g, "9"), (0, gamla_1.replace)(/\beight\b/g, "8"), (0, gamla_1.replace)(/\bseven\b/g, "7"), (0, gamla_1.replace)(/\bsix\b/g, "6"), (0, gamla_1.replace)(/\bfive\b/g, "5"), (0, gamla_1.replace)(/\bfour\b/g, "4"), (0, gamla_1.replace)(/\bthree\b/g, "3"), (0, gamla_1.replace)(/\btwo\b/g, "2"), (0, gamla_1.replace)(/\bone\b/g, "1"), (0, gamla_1.replace)(/\bzero\b/g, "0")); | ||
exports.simplify = (0, gamla_1.pipe)((x) => x.trim(), exports.replaceSmartQuotes, gamla_1.lowercase, (0, gamla_1.replace)(/\s/g, " "), replaceDidgitNames, (0, gamla_1.replace)(/\[.*\]/, ""), (0, gamla_1.replace)(/[*:'"♪]/g, ""), (0, gamla_1.replace)(/[,.?!\n-]/g, " "), (0, gamla_1.replace)(/\s+/g, " "), (0, gamla_1.replace)(/<\/?i>/g, ""), (0, gamla_1.replace)(/\bdoctor\b/g, "dr"), (x) => x.trim()); | ||
const removeDiacritics = (x) => x.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); | ||
exports.removeDiacritics = removeDiacritics; | ||
exports.simplify = (0, gamla_1.pipe)((x) => x.trim(), exports.replaceSmartQuotes, gamla_1.lowercase, (0, gamla_1.replace)(/\s/g, " "), replaceDidgitNames, (0, gamla_1.replace)(/\[.*\]/, ""), (0, gamla_1.replace)(/[*:'"♪]/g, ""), (0, gamla_1.replace)(/[,.?!\n-]/g, " "), (0, gamla_1.replace)(/\s+/g, " "), (0, gamla_1.replace)(/<\/?i>/g, ""), (0, gamla_1.replace)(/\bdoctor\b/g, "dr"), exports.removeDiacritics, (x) => x.trim()); | ||
const allEnglishWordsAsSet = new Set(englishWords_js_1.englishWords); | ||
@@ -121,3 +123,3 @@ const fixMissingSpaceInOneWord = (x) => allEnglishWordsAsSet.has(x) ? x : (0, gamla_1.letIn)((0, gamla_1.range)(1, x.length - 1).find((index) => allEnglishWordsAsSet.has(x.slice(0, index)) && | ||
exports.concatRegexp = concatRegexp; | ||
const regexpEntireString = (x) => new RegExp(`^${x.source}$`, x.flags); | ||
const regexpEntireString = ({ source, flags }) => new RegExp(`^${source}$`, flags); | ||
exports.regexpEntireString = regexpEntireString; | ||
@@ -129,5 +131,3 @@ const combineFlags = (x, y) => (x.flags + y.flags) | ||
.replace(/(.)(?=.*\1)/g, ""); | ||
const addFlag = (flag) => (x) => new RegExp(x.source, x.flags.includes(flag) | ||
? x.flags | ||
: (x.flags + flag).split("").sort().join("")); | ||
const addFlag = (flag) => ({ source, flags }) => new RegExp(source, flags.includes(flag) ? flags : (flags + flag).split("").sort().join("")); | ||
const stringToRegexp = (x) => new RegExp(x.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")); | ||
@@ -138,3 +138,3 @@ exports.stringToRegexp = stringToRegexp; | ||
exports.regExpOr = regExpOr; | ||
const selectionGroup = (x) => new RegExp(`(${x.source})`, x.flags); | ||
const selectionGroup = ({ source, flags }) => new RegExp(`(${source})`, flags); | ||
exports.selectionGroup = selectionGroup; | ||
@@ -145,10 +145,10 @@ const bracketIfNeeded = (s) => (s.startsWith("(") && s.endsWith(")")) || | ||
: `(?:${s})`; | ||
const optional = (x) => new RegExp(`${bracketIfNeeded(x.source)}?`, x.flags); | ||
const optional = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}?`, flags); | ||
exports.optional = optional; | ||
const zeroOrMore = (x) => new RegExp(`${bracketIfNeeded(x.source)}*`, x.flags); | ||
const zeroOrMore = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}*`, flags); | ||
exports.zeroOrMore = zeroOrMore; | ||
const oneOrMore = (x) => new RegExp(`${bracketIfNeeded(x.source)}+`, x.flags); | ||
const oneOrMore = ({ source, flags }) => new RegExp(`${bracketIfNeeded(source)}+`, flags); | ||
exports.oneOrMore = oneOrMore; | ||
exports.globalize = addFlag("g"); | ||
const regexpTimes = (min, max, x) => new RegExp(`${bracketIfNeeded(x.source)}{${min},${max}}`, x.flags); | ||
const regexpTimes = (min, max, { source, flags }) => new RegExp(`${bracketIfNeeded(source)}{${min},${max}}`, flags); | ||
exports.regexpTimes = regexpTimes; | ||
@@ -172,3 +172,3 @@ const namePrefix = ["ms", "mrs", "mr", "dr", "prof"] | ||
const speakerInEnd = [hyphen, /\s*/, personName, /$/].reduce(exports.concatRegexp); | ||
const negativeLookBehind = (x) => new RegExp(`(?<!${x.source})`, x.flags); | ||
const negativeLookBehind = ({ source, flags }) => new RegExp(`(?<!${source})`, flags); | ||
exports.negativeLookBehind = negativeLookBehind; | ||
@@ -190,3 +190,3 @@ const splitSentences = (0, gamla_1.split)(/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=[,!.?:])\s/); | ||
exports.ngramsOfAtLeastNWords = ngramsOfAtLeastNWords; | ||
const wholeWord = (r) => new RegExp(`(^|${boundry.source})${r.source}($|${boundry.source})`, r.flags); | ||
const wholeWord = ({ source, flags }) => new RegExp(`(^|${boundry.source})${source}($|${boundry.source})`, flags); | ||
exports.wholeWord = wholeWord; | ||
@@ -193,0 +193,0 @@ const containsPhrase = (str) => (re) => re.test(str); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
9551641
3644