New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@diplodoc/sentenizer

Package Overview
Dependencies
Maintainers
9
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@diplodoc/sentenizer - npm Package Compare versions

Comparing version 0.0.6 to 0.0.7

2

lib/index.d.ts

@@ -1,1 +0,1 @@

export { sentenize } from './sentenize';
export declare function sentenize(text: string): string[];

@@ -25,4 +25,2 @@ var __defProp = Object.defineProperty;

module.exports = __toCommonJS(src_exports);
// src/sentenize.ts
var import_ramda8 = require("ramda");

@@ -33,167 +31,11 @@

// src/constants.ts
// src/constants/markers.ts
var SENTENCE_END_MARKERS = ".?!\u2026";
var QUOTATION_GENERIC_MARKERS = `\xAB"\u201E'`;
var QUOTATION_CLOSE_MARKERS = '\xBB"\u201D\u2019';
var QUOTATION_GENERIC_MARKERS = `"\u201E'`;
var QUOTATION_CLOSE_MARKERS = "\xBB\u201D\u2019";
var BRACKETS_CLOSE_MARKERS = "\\)\\]\\}>";
// src/constants/parameters.ts
var WINDOW_WIDTH = 10;
// src/lenses/index.ts
var import_ramda = require("ramda");
var first = () => (0, import_ramda.lensIndex)(0);
var second = () => (0, import_ramda.lensIndex)(1);
var last = () => (0, import_ramda.lensIndex)(-1);
// src/parsers/index.ts
var firstString = first();
var fst = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(firstString));
var secondString = second();
var snd = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(secondString));
var lastString = last();
var lst = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(lastString));
var sentencePattern = `([^${SENTENCE_END_MARKERS}]*?[${SENTENCE_END_MARKERS}]+)`;
var senteceFlags = "gmu";
var sentenceRegExp = new RegExp(sentencePattern, senteceFlags);
var sentences = (0, import_ramda2.compose)((0, import_ramda2.filter)(Boolean), (0, import_ramda2.split)(sentenceRegExp));
var sentenceDelimitersPattern = `([${SENTENCE_END_MARKERS}]+)$`;
var sentenceDelimitersFlags = "gmu";
var sentenceDelimitersRegExp = new RegExp(sentenceDelimitersPattern, sentenceDelimitersFlags);
var words = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.replace)(sentenceDelimitersRegExp)(""));
var delimiters = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(sentenceDelimitersRegExp));
var fstTokenPattern = /^\s*([^\s]+?)(?=\s|$)/;
var fstTokenFlags = "mu";
var fstTokenRegExp = new RegExp(fstTokenPattern, fstTokenFlags);
var fstToken = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(fstTokenRegExp));
var fstWord = (0, import_ramda2.compose)(fstToken, words);
var lstTokenPattern = /([^\s]+)\s*$/;
var lstTokenFlags = "mu";
var lstTokenRegExp = new RegExp(lstTokenPattern, lstTokenFlags);
var lstToken = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(lstTokenRegExp));
var nonAlphaStartPattern = /^[^\wа-яА-Я]*/;
var nonAlphaStartFlags = "gmu";
var nonAlphaStartRegExp = new RegExp(nonAlphaStartPattern, nonAlphaStartFlags);
var omitNonAlphaStart = (0, import_ramda2.replace)(nonAlphaStartRegExp, "");
var lstWord = (0, import_ramda2.compose)(lstToken, words);
var fstChars = (width = WINDOW_WIDTH) => {
const fstCharsPattern = `^[\\s\\S]{0,${width}}`;
const fstCharsFlags = "gmu";
const fstCharsRegExp = new RegExp(fstCharsPattern, fstCharsFlags);
return (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(fstCharsRegExp));
};
var lstChars = (width = WINDOW_WIDTH) => {
const lstCharsPattern = `.{0,${width}}$`;
const lstCharsFlags = "gmu";
const lstCharsRegExp = new RegExp(lstCharsPattern, lstCharsFlags);
return (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(lstCharsRegExp));
};
var spacePrefixPattern = /^\s/;
var spacePrefixFlags = "gmu";
var spacePrefixRegExp = new RegExp(spacePrefixPattern, spacePrefixFlags);
var spacePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(spacePrefixRegExp));
var spaceSuffixPattern = /\s$/;
var spaceSuffixFlags = "mu";
var spaceSuffixRegExp = new RegExp(spaceSuffixPattern, spaceSuffixFlags);
var spaceSuffix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(spaceSuffixRegExp));
var quotationGenericPrefixPattern = `^([${QUOTATION_GENERIC_MARKERS}]+)`;
var quotationGenericPrefixFlags = "mu";
var quotationGenericPrefixRegExp = new RegExp(
quotationGenericPrefixPattern,
quotationGenericPrefixFlags
);
var quotationGenericPrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(quotationGenericPrefixRegExp));
var quotationClosePrefixPattern = `^([${QUOTATION_CLOSE_MARKERS}]+)`;
var quotationClosePrefixFlags = "mu";
var quotationClosePrefixRegExp = new RegExp(
quotationClosePrefixPattern,
quotationClosePrefixFlags
);
var quotationClosePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(quotationClosePrefixRegExp));
var delimiterPrefixPattern = `^([${SENTENCE_END_MARKERS}]+)`;
var delimiterPrefixFlags = "mu";
var delimiterPrefixRegExp = new RegExp(delimiterPrefixPattern, delimiterPrefixFlags);
var delimiterPrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(delimiterPrefixRegExp));
var bracketsClosePrefixPattern = `^([${BRACKETS_CLOSE_MARKERS}]+)`;
var bracketsClosePrefixFlags = "mu";
var bracketsClosePrefixRegExp = new RegExp(bracketsClosePrefixPattern, bracketsClosePrefixFlags);
var bracketsClosePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(bracketsClosePrefixRegExp));
var spacesPattern = /^(\s+)$/;
var spacesFlags = "gmu";
var spacesRegExp = new RegExp(spacesPattern, spacesFlags);
var spaces = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(spacesRegExp));
var dotSuffixPattern = /[^.](\.)$/;
var dotSuffixFlags = "mu";
var dotSuffixRegExp = new RegExp(dotSuffixPattern, dotSuffixFlags);
var dotSuffix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(dotSuffixRegExp));
// src/rules/base.ts
var import_ramda5 = require("ramda");
// src/utilities/list.ts
var import_ramda3 = require("ramda");
var lenLte = (len) => (0, import_ramda3.compose)((0, import_ramda3.curry)((0, import_ramda3.flip)(import_ramda3.lte))(len), import_ramda3.length);
var allEqual = (0, import_ramda3.compose)(lenLte(1), import_ramda3.uniq);
var lengthNonZero = (0, import_ramda3.compose)(Boolean, import_ramda3.length);
// src/utilities/string.ts
var import_ramda4 = require("ramda");
var charAt = (0, import_ramda4.invoker)(1, "charAt");
var notAlpha = (0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.toLower, import_ramda4.toUpper]));
var hasAlpha = (0, import_ramda4.compose)(import_ramda4.not, notAlpha);
var startsWithLower = (0, import_ramda4.allPass)([
(0, import_ramda4.compose)((0, import_ramda4.compose)(import_ramda4.not, (0, import_ramda4.match)(/\n/)), charAt(0)),
(0, import_ramda4.compose)((0, import_ramda4.compose)(import_ramda4.not, notAlpha), charAt(0)),
(0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.identity, import_ramda4.toLower]), charAt(0))
]);
var isUpper = (0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.toUpper, import_ramda4.identity]));
// src/rules/base.ts
var isSpaceSuffix = (0, import_ramda5.compose)(lengthNonZero, spaceSuffix);
var isSpacePrefix = (0, import_ramda5.compose)(lengthNonZero, spacePrefix);
var spaceBothSides = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [isSpaceSuffix, isSpacePrefix]),
(0, import_ramda5.map)(words)
);
var rightLacksSpacePrefix = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(import_ramda5.not, isSpacePrefix)]),
(0, import_ramda5.map)(words)
);
var rightStartsWithLowercase = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(startsWithLower, fstToken)])
);
var rightDelimiterPrefix = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(lengthNonZero, delimiterPrefix, fstToken)])
);
var rightQuotationGenericPrefix = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(lengthNonZero, quotationGenericPrefix)])
);
var rightQuotationClosePrefix = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(lengthNonZero, quotationClosePrefix, fstToken)])
);
var rightBracketsClosePrefix = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(lengthNonZero, bracketsClosePrefix, fstToken)])
);
var rightOnlySpaces = (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [(0, import_ramda5.always)(true), (0, import_ramda5.compose)(lengthNonZero, spaces)])
);
// src/rules/initials.ts
var import_ramda6 = require("ramda");
var isLeftDotDelimiter = (0, import_ramda6.compose)(lengthNonZero, dotSuffix);
var isLeftSingleLetter = (0, import_ramda6.compose)((0, import_ramda6.equals)(1), import_ramda6.length, lstWord);
var isLeftUpper = (0, import_ramda6.compose)(allEqual, (0, import_ramda6.juxt)([import_ramda6.toUpper, import_ramda6.identity]), lstWord);
var leftHasAlpha = (0, import_ramda6.compose)(hasAlpha, lstWord);
var isLeftInitials = (0, import_ramda6.allPass)([isLeftDotDelimiter, isLeftSingleLetter, isLeftUpper, leftHasAlpha]);
var leftInitials = (0, import_ramda6.compose)((0, import_ramda6.all)(Boolean), (0, import_ramda6.zipWith)(import_ramda6.call, [isLeftInitials, (0, import_ramda6.always)(true)]));
// src/rules/abbreviations.ts
var import_ramda7 = require("ramda");
// src/constants/abbreviations.ts

@@ -444,3 +286,153 @@ var INITIALS = {

// src/lenses/index.ts
var import_ramda = require("ramda");
var first = () => (0, import_ramda.lensIndex)(0);
var second = () => (0, import_ramda.lensIndex)(1);
var last = () => (0, import_ramda.lensIndex)(-1);
// src/parsers/index.ts
var firstString = first();
var fst = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(firstString));
var secondString = second();
var snd = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(secondString));
var lastString = last();
var lst = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.view)(lastString));
var sentencePattern = `([^${SENTENCE_END_MARKERS}]*?[${SENTENCE_END_MARKERS}]+)`;
var senteceFlags = "gmu";
var sentenceRegExp = new RegExp(sentencePattern, senteceFlags);
var sentences = (0, import_ramda2.compose)((0, import_ramda2.filter)(Boolean), (0, import_ramda2.split)(sentenceRegExp));
var sentenceDelimitersPattern = `([${SENTENCE_END_MARKERS}]+)$`;
var sentenceDelimitersFlags = "gmu";
var sentenceDelimitersRegExp = new RegExp(sentenceDelimitersPattern, sentenceDelimitersFlags);
var words = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), (0, import_ramda2.replace)(sentenceDelimitersRegExp)(""));
var delimiters = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(sentenceDelimitersRegExp));
var fstTokenPattern = /^\s*([^\s]+?)(?=\s|$)/;
var fstTokenFlags = "mu";
var fstTokenRegExp = new RegExp(fstTokenPattern, fstTokenFlags);
var fstToken = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(fstTokenRegExp));
var fstWord = (0, import_ramda2.compose)(fstToken, words);
var lstTokenPattern = /([^\s]+)\s*$/;
var lstTokenFlags = "mu";
var lstTokenRegExp = new RegExp(lstTokenPattern, lstTokenFlags);
var lstToken = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(lstTokenRegExp));
var nonAlphaStartPattern = /^[^\wа-яА-Я]*/;
var nonAlphaStartFlags = "gmu";
var nonAlphaStartRegExp = new RegExp(nonAlphaStartPattern, nonAlphaStartFlags);
var omitNonAlphaStart = (0, import_ramda2.replace)(nonAlphaStartRegExp, "");
var lstWord = (0, import_ramda2.compose)(lstToken, words);
var fstChars = (width = WINDOW_WIDTH) => {
const fstCharsPattern = `^[\\s\\S]{0,${width}}`;
const fstCharsFlags = "gmu";
const fstCharsRegExp = new RegExp(fstCharsPattern, fstCharsFlags);
return (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(fstCharsRegExp));
};
var lstChars = (width = WINDOW_WIDTH) => {
const lstCharsPattern = `.{0,${width}}$`;
const lstCharsFlags = "gmu";
const lstCharsRegExp = new RegExp(lstCharsPattern, lstCharsFlags);
return (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(lstCharsRegExp));
};
var spacePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(/^\s/));
var spaceSuffix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(/\s$/));
var quotationGenericPrefixPattern = `^([${QUOTATION_GENERIC_MARKERS}]+)`;
var quotationGenericPrefixFlags = "mu";
var quotationGenericPrefixRegExp = new RegExp(
quotationGenericPrefixPattern,
quotationGenericPrefixFlags
);
var quotationGenericPrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(quotationGenericPrefixRegExp));
var quotationClosePrefixPattern = `^([${QUOTATION_CLOSE_MARKERS}]+)`;
var quotationClosePrefixFlags = "mu";
var quotationClosePrefixRegExp = new RegExp(
quotationClosePrefixPattern,
quotationClosePrefixFlags
);
var quotationClosePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(quotationClosePrefixRegExp));
var delimiterPrefixPattern = `^([${SENTENCE_END_MARKERS}]+)`;
var delimiterPrefixFlags = "mu";
var delimiterPrefixRegExp = new RegExp(delimiterPrefixPattern, delimiterPrefixFlags);
var delimiterPrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(delimiterPrefixRegExp));
var bracketsClosePrefixPattern = `^([${BRACKETS_CLOSE_MARKERS}]+)`;
var bracketsClosePrefixFlags = "mu";
var bracketsClosePrefixRegExp = new RegExp(bracketsClosePrefixPattern, bracketsClosePrefixFlags);
var bracketsClosePrefix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(bracketsClosePrefixRegExp));
var spacesPattern = /^(\s+)$/;
var spacesFlags = "gmu";
var spacesRegExp = new RegExp(spacesPattern, spacesFlags);
var spaces = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), fst, (0, import_ramda2.match)(spacesRegExp));
var dotSuffixPattern = /[^.](\.)$/;
var dotSuffixFlags = "mu";
var dotSuffixRegExp = new RegExp(dotSuffixPattern, dotSuffixFlags);
var dotSuffix = (0, import_ramda2.compose)((0, import_ramda2.defaultTo)(""), snd, (0, import_ramda2.match)(dotSuffixRegExp));
// src/rules/base.ts
var import_ramda5 = require("ramda");
// src/utilities/list.ts
var import_ramda3 = require("ramda");
var lenLte = (len) => (0, import_ramda3.compose)((0, import_ramda3.curry)((0, import_ramda3.flip)(import_ramda3.lte))(len), import_ramda3.length);
var allEqual = (0, import_ramda3.compose)(lenLte(1), import_ramda3.uniq);
var lengthNonZero = (0, import_ramda3.compose)(Boolean, import_ramda3.length);
// src/utilities/string.ts
var import_ramda4 = require("ramda");
var charAt = (0, import_ramda4.invoker)(1, "charAt");
var notAlpha = (0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.toLower, import_ramda4.toUpper]));
var hasAlpha = (0, import_ramda4.compose)(import_ramda4.not, notAlpha);
var startsWithLower = (0, import_ramda4.allPass)([
(0, import_ramda4.compose)(hasAlpha, charAt(0)),
(0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.identity, import_ramda4.toLower]), charAt(0))
]);
var startsWithUpper = (0, import_ramda4.allPass)([
(0, import_ramda4.compose)(hasAlpha, charAt(0)),
(0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.identity, import_ramda4.toUpper]), charAt(0))
]);
var startsWithNewline = (0, import_ramda4.compose)(lengthNonZero, (0, import_ramda4.match)(/^\n/));
var startsWithHardbreak = (0, import_ramda4.compose)(lengthNonZero, (0, import_ramda4.match)(/^\n\n/));
var endsWithHardbreak = (0, import_ramda4.compose)(lengthNonZero, (0, import_ramda4.match)(/\n\n$/));
var isUpper = (0, import_ramda4.compose)(allEqual, (0, import_ramda4.juxt)([import_ramda4.toUpper, import_ramda4.identity]));
// src/rules/base.ts
var isSpaceSuffix = (0, import_ramda5.compose)(lengthNonZero, spaceSuffix);
var isSpacePrefix = (0, import_ramda5.compose)(lengthNonZero, spacePrefix);
var log = (name, action) => {
return (...args) => {
const result = action(...args);
if (process.env.DEBUG) {
console.log(name, args, result);
}
return result;
};
};
var _ = (0, import_ramda5.always)(true);
var rule = (name, [left, right], remap = import_ramda5.identity) => {
return log(name, (0, import_ramda5.compose)(
(0, import_ramda5.all)(Boolean),
(0, import_ramda5.zipWith)(import_ramda5.call, [left, right]),
(0, import_ramda5.map)(remap)
));
};
var spaceBothSides = rule("spaceBothSides", [isSpaceSuffix, isSpacePrefix], words);
var rightLacksSpacePrefix = rule("rightLacksSpacePrefix", [_, (0, import_ramda5.compose)(import_ramda5.not, isSpacePrefix)], words);
var rightStartsWithLowercase = rule("rightStartsWithLowercase", [_, (0, import_ramda5.compose)(startsWithLower, fstToken)]);
var rightDelimiterPrefix = rule("rightDelimiterPrefix", [_, (0, import_ramda5.compose)(lengthNonZero, delimiterPrefix, fstToken)]);
var rightQuotationGenericPrefix = rule("rightQuotationGenericPrefix", [_, (0, import_ramda5.compose)(lengthNonZero, quotationGenericPrefix)]);
var rightQuotationClosePrefix = rule("rightQuotationClosePrefix", [_, (0, import_ramda5.compose)(lengthNonZero, quotationClosePrefix, fstToken)]);
var rightBracketsClosePrefix = rule("rightBracketsClosePrefix", [_, (0, import_ramda5.compose)(lengthNonZero, bracketsClosePrefix, fstToken)]);
var rightOnlySpaces = rule("rightOnlySpaces", [_, (0, import_ramda5.compose)(lengthNonZero, spaces)]);
var leftEndsWithHardbreak = rule("leftEndsWithHardbreak", [endsWithHardbreak, _]);
var rightStartsWithHardbreak = rule("rightStartsWithHardbreak", [_, startsWithHardbreak]);
var rightStartsNewlineUppercased = rule("rightStartsNewlineUppercased", [_, (0, import_ramda5.allPass)([startsWithNewline, startsWithUpper])]);
// src/rules/initials.ts
var import_ramda6 = require("ramda");
var isLeftDotDelimiter = (0, import_ramda6.compose)(lengthNonZero, dotSuffix);
var isLeftSingleLetter = (0, import_ramda6.compose)((0, import_ramda6.equals)(1), import_ramda6.length, lstWord);
var isLeftUpper = (0, import_ramda6.compose)(allEqual, (0, import_ramda6.juxt)([import_ramda6.toUpper, import_ramda6.identity]), lstWord);
var leftHasAlpha = (0, import_ramda6.compose)(hasAlpha, lstWord);
var isLeftInitials = (0, import_ramda6.allPass)([isLeftDotDelimiter, isLeftSingleLetter, isLeftUpper, leftHasAlpha]);
var leftInitials = (0, import_ramda6.compose)((0, import_ramda6.all)(Boolean), (0, import_ramda6.zipWith)(import_ramda6.call, [isLeftInitials, (0, import_ramda6.always)(true)]));
// src/rules/abbreviations.ts
var import_ramda7 = require("ramda");
var fst2 = (0, import_ramda7.compose)((0, import_ramda7.defaultTo)(""), (0, import_ramda7.view)(first()));

@@ -504,3 +496,3 @@ var snd2 = (0, import_ramda7.compose)((0, import_ramda7.defaultTo)(""), (0, import_ramda7.view)(second()));

// src/sentenize.ts
// src/index.ts
var leftPreprocessor = lstChars(20);

@@ -523,24 +515,32 @@ var rightPreprocessor = fstChars(20);

]);
var breakCondition = (0, import_ramda8.anyPass)([
leftEndsWithHardbreak,
rightStartsWithHardbreak,
rightStartsNewlineUppercased
]);
var join2 = (0, import_ramda8.compose)(joinCondition, (0, import_ramda8.zipWith)(import_ramda8.call, sidesPreprocessors));
function processor(text) {
const chunks = sentences(text);
let left = null;
var breaks = (0, import_ramda8.compose)(breakCondition, (0, import_ramda8.zipWith)(import_ramda8.call, sidesPreprocessors));
function sentenize(text) {
const parts = text.split(/(\n{2,})/);
const parsed = [];
for (let i = 0; i < chunks.length; i++) {
if (!left) {
left = chunks[i];
continue;
for (const part of parts) {
const chunks = sentences(part);
let left = null;
for (const right of chunks) {
if (!left) {
left = right;
continue;
}
if (!breaks([left, right]) && join2([left, right])) {
left += right;
} else {
parsed.push(left);
left = right;
}
}
if (join2([left, chunks[i]])) {
left += chunks[i];
} else {
if (left)
parsed.push(left);
left = chunks[i];
}
}
if (left)
parsed.push(left);
return parsed;
}
var sentenize = processor;
// Annotate the CommonJS export names for ESM import in node:

@@ -547,0 +547,0 @@ 0 && (module.exports = {

import { Pred } from 'ramda';
declare const pairAbbreviation: Pred<any[]> | ((obj: string[]) => boolean);
declare const leftAbbreviation: (obj: string[]) => boolean;
declare const leftPairsTailAbbreviation: Pred<any[]> | ((obj: string[]) => boolean);
export { pairAbbreviation, leftAbbreviation, leftPairsTailAbbreviation };
export declare const pairAbbreviation: Pred<any[]> | ((obj: string[]) => boolean);
export declare const leftAbbreviation: (obj: string[]) => boolean;
export declare const leftPairsTailAbbreviation: Pred<any[]> | ((obj: string[]) => boolean);

@@ -1,9 +0,11 @@

declare const spaceBothSides: (list: readonly string[]) => boolean;
declare const rightLacksSpacePrefix: (list: readonly string[]) => boolean;
declare const rightStartsWithLowercase: (list2: readonly unknown[]) => boolean;
declare const rightDelimiterPrefix: (list2: readonly unknown[]) => boolean;
declare const rightQuotationGenericPrefix: (list2: readonly unknown[]) => boolean;
declare const rightQuotationClosePrefix: (list2: readonly unknown[]) => boolean;
declare const rightBracketsClosePrefix: (list2: readonly unknown[]) => boolean;
declare const rightOnlySpaces: (list2: readonly unknown[]) => boolean;
export { spaceBothSides, rightLacksSpacePrefix, rightStartsWithLowercase, rightDelimiterPrefix, rightQuotationGenericPrefix, rightQuotationClosePrefix, rightBracketsClosePrefix, rightOnlySpaces, };
export declare const spaceBothSides: (...args: any[]) => any;
export declare const rightLacksSpacePrefix: (...args: any[]) => any;
export declare const rightStartsWithLowercase: (...args: any[]) => any;
export declare const rightDelimiterPrefix: (...args: any[]) => any;
export declare const rightQuotationGenericPrefix: (...args: any[]) => any;
export declare const rightQuotationClosePrefix: (...args: any[]) => any;
export declare const rightBracketsClosePrefix: (...args: any[]) => any;
export declare const rightOnlySpaces: (...args: any[]) => any;
export declare const leftEndsWithHardbreak: (...args: any[]) => any;
export declare const rightStartsWithHardbreak: (...args: any[]) => any;
export declare const rightStartsNewlineUppercased: (...args: any[]) => any;

@@ -1,3 +0,3 @@

export { spaceBothSides, rightLacksSpacePrefix, rightStartsWithLowercase, rightDelimiterPrefix, rightQuotationGenericPrefix, rightQuotationClosePrefix, rightBracketsClosePrefix, rightOnlySpaces, } from './base';
export { leftInitials } from './initials';
export { leftAbbreviation, pairAbbreviation, leftPairsTailAbbreviation } from './abbreviations';
export * from './base';
export * from './initials';
export * from './abbreviations';

@@ -1,2 +0,1 @@

declare const leftInitials: (list2: readonly unknown[]) => boolean;
export { leftInitials };
export declare const leftInitials: (list2: readonly unknown[]) => boolean;

@@ -1,2 +0,2 @@

export { lenLte, allEqual, lengthNonZero } from './list';
export { charAt, notAlpha, hasAlpha, startsWithLower, isUpper } from './string';
export * from './list';
export * from './string';

@@ -1,4 +0,3 @@

declare const lenLte: (len: number) => (...args: any[][]) => boolean;
declare const allEqual: (list: readonly unknown[]) => boolean;
declare const lengthNonZero: (list: any) => boolean;
export { lenLte, allEqual, lengthNonZero };
export declare const lenLte: (len: number) => (...args: any[][]) => boolean;
export declare const allEqual: (list: readonly unknown[]) => boolean;
export declare const lengthNonZero: (list: any) => boolean;
import { Pred } from 'ramda';
declare const charAt: (...args: unknown[]) => any;
declare const notAlpha: (str: string) => boolean;
declare const hasAlpha: (str: string) => boolean;
declare const startsWithLower: Pred<any[]>;
declare const isUpper: (a: string) => boolean;
export { charAt, notAlpha, hasAlpha, startsWithLower, isUpper };
export declare const charAt: (...args: unknown[]) => any;
export declare const notAlpha: (str: string) => boolean;
export declare const hasAlpha: (str: string) => boolean;
export declare const startsWithLower: Pred<any[]>;
export declare const startsWithUpper: Pred<any[]>;
export declare const startsWithNewline: Pred<any[]>;
export declare const startsWithHardbreak: Pred<any[]>;
export declare const endsWithHardbreak: Pred<any[]>;
export declare const isUpper: (a: string) => boolean;
{
"name": "@diplodoc/sentenizer",
"version": "0.0.6",
"version": "0.0.7",
"description": "text segmentation into sentences",

@@ -5,0 +5,0 @@ "homepage": "https://github.com/diplodoc-platform/sentenizer",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc