Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@diplodoc/sentenizer

Package Overview
Dependencies
Maintainers
9
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@diplodoc/sentenizer - npm Package Compare versions

Comparing version 0.0.7 to 0.0.8

97

lib/index.js

@@ -5,5 +5,5 @@ var __defProp = Object.defineProperty;

var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all3) => {
for (var name in all3)
__defProp(target, name, { get: all3[name], enumerable: true });
var __export = (target, all2) => {
for (var name in all2)
__defProp(target, name, { get: all2[name], enumerable: true });
};

@@ -26,3 +26,3 @@ var __copyProps = (to, from, except, desc) => {

module.exports = __toCommonJS(src_exports);
var import_ramda8 = require("ramda");
var import_ramda7 = require("ramda");

@@ -184,4 +184,6 @@ // src/parsers/index.ts

// гос. экзамены
"\u043E\u0442\u043C": true
"\u043E\u0442\u043C": true,
// от отм. 0.000
"\u0434\u043E\u0431": true
// доб. 1243 (телефон)
};

@@ -396,2 +398,6 @@ var TAIL = {

// src/rules/base.ts
var isLeftDotDelimiter = (0, import_ramda5.compose)(lengthNonZero, dotSuffix);
var isLeftSingleLetter = (0, import_ramda5.compose)((0, import_ramda5.equals)(1), import_ramda5.length, lstWord);
var isLeftUpper = (0, import_ramda5.compose)(allEqual, (0, import_ramda5.juxt)([import_ramda5.toUpper, import_ramda5.identity]), lstWord);
var leftHasAlpha = (0, import_ramda5.compose)(hasAlpha, lstWord);
var isSpaceSuffix = (0, import_ramda5.compose)(lengthNonZero, spaceSuffix);

@@ -427,62 +433,55 @@ var isSpacePrefix = (0, import_ramda5.compose)(lengthNonZero, spacePrefix);

var rightStartsNewlineUppercased = rule("rightStartsNewlineUppercased", [_, (0, import_ramda5.allPass)([startsWithNewline, startsWithUpper])]);
var leftInitials = rule("leftInitials", [(0, import_ramda5.allPass)([isLeftDotDelimiter, isLeftSingleLetter, isLeftUpper, leftHasAlpha]), _]);
// src/rules/initials.ts
// src/rules/abbreviations.ts
var import_ramda6 = require("ramda");
var isLeftDotDelimiter = (0, import_ramda6.compose)(lengthNonZero, dotSuffix);
var isLeftSingleLetter = (0, import_ramda6.compose)((0, import_ramda6.equals)(1), import_ramda6.length, lstWord);
var isLeftUpper = (0, import_ramda6.compose)(allEqual, (0, import_ramda6.juxt)([import_ramda6.toUpper, import_ramda6.identity]), lstWord);
var leftHasAlpha = (0, import_ramda6.compose)(hasAlpha, lstWord);
var isLeftInitials = (0, import_ramda6.allPass)([isLeftDotDelimiter, isLeftSingleLetter, isLeftUpper, leftHasAlpha]);
var leftInitials = (0, import_ramda6.compose)((0, import_ramda6.all)(Boolean), (0, import_ramda6.zipWith)(import_ramda6.call, [isLeftInitials, (0, import_ramda6.always)(true)]));
// src/rules/abbreviations.ts
var import_ramda7 = require("ramda");
var fst2 = (0, import_ramda7.compose)((0, import_ramda7.defaultTo)(""), (0, import_ramda7.view)(first()));
var snd2 = (0, import_ramda7.compose)((0, import_ramda7.defaultTo)(""), (0, import_ramda7.view)(second()));
var isDotDelimiter = (0, import_ramda7.compose)(lengthNonZero, dotSuffix);
var hash = (0, import_ramda7.compose)(import_ramda7.toLower, (0, import_ramda7.join)("."));
var insidePairAbbreviationMap = (0, import_ramda7.anyPass)([
(0, import_ramda7.prop)(import_ramda7.__, HEAD_PAIR),
(0, import_ramda7.prop)(import_ramda7.__, TAIL_PAIR),
(0, import_ramda7.prop)(import_ramda7.__, OTHER_PAIR)
var fst2 = (0, import_ramda6.compose)((0, import_ramda6.defaultTo)(""), (0, import_ramda6.view)(first()));
var snd2 = (0, import_ramda6.compose)((0, import_ramda6.defaultTo)(""), (0, import_ramda6.view)(second()));
var isDotDelimiter = (0, import_ramda6.compose)(lengthNonZero, dotSuffix);
var hash = (0, import_ramda6.compose)(import_ramda6.toLower, (0, import_ramda6.join)("."));
var insidePairAbbreviationMap = (0, import_ramda6.anyPass)([
(0, import_ramda6.prop)(import_ramda6.__, HEAD_PAIR),
(0, import_ramda6.prop)(import_ramda6.__, TAIL_PAIR),
(0, import_ramda6.prop)(import_ramda6.__, OTHER_PAIR)
]);
var isPairAbbreviation = (0, import_ramda7.compose)(
var isPairAbbreviation = (0, import_ramda6.compose)(
insidePairAbbreviationMap,
hash,
(0, import_ramda7.zipWith)(import_ramda7.call, [
(0, import_ramda7.compose)(omitNonAlphaStart, lstWord, lstToken),
(0, import_ramda7.compose)(fstWord, fstToken)
(0, import_ramda6.zipWith)(import_ramda6.call, [
(0, import_ramda6.compose)(omitNonAlphaStart, lstWord, lstToken),
(0, import_ramda6.compose)(fstWord, fstToken)
])
);
var pairAbbreviation = (0, import_ramda7.allPass)([
(0, import_ramda7.compose)(isDotDelimiter, lstToken, fst2),
var pairAbbreviation = (0, import_ramda6.allPass)([
(0, import_ramda6.compose)(isDotDelimiter, lstToken, fst2),
isPairAbbreviation
]);
var insideAbbreviationMap = (0, import_ramda7.anyPass)([
var insideAbbreviationMap = (0, import_ramda6.anyPass)([
// @ts-ignore
(0, import_ramda7.prop)(import_ramda7.__, INITIALS),
(0, import_ramda6.prop)(import_ramda6.__, INITIALS),
// @ts-ignore
(0, import_ramda7.prop)(import_ramda7.__, HEAD),
(0, import_ramda6.prop)(import_ramda6.__, HEAD),
// @ts-ignore
(0, import_ramda7.prop)(import_ramda7.__, TAIL),
(0, import_ramda6.prop)(import_ramda6.__, TAIL),
// @ts-ignore
(0, import_ramda7.prop)(import_ramda7.__, OTHER)
(0, import_ramda6.prop)(import_ramda6.__, OTHER)
]);
var isLeftAbbreviation = (0, import_ramda7.compose)(
var isLeftAbbreviation = (0, import_ramda6.compose)(
insideAbbreviationMap,
omitNonAlphaStart,
import_ramda7.toLower,
import_ramda6.toLower,
lstWord,
lstToken
);
var leftAbbreviation = (0, import_ramda7.compose)(
(0, import_ramda7.allPass)([(0, import_ramda7.compose)(isDotDelimiter, lstToken), isLeftAbbreviation]),
var leftAbbreviation = (0, import_ramda6.compose)(
(0, import_ramda6.allPass)([(0, import_ramda6.compose)(isDotDelimiter, lstToken), isLeftAbbreviation]),
fst2
);
var rightLowercaseOrCaps = (0, import_ramda7.compose)((0, import_ramda7.anyPass)([startsWithLower, isUpper]), fstWord, snd2);
var isCaps = (0, import_ramda6.allPass)([isUpper, (0, import_ramda6.compose)((0, import_ramda6.lt)(1), import_ramda6.length)]);
var rightLowercaseOrCaps = (0, import_ramda6.compose)((0, import_ramda6.anyPass)([startsWithLower, isCaps]), fstWord, snd2);
var before = (s) => (t) => s.slice(0, Math.max(s.indexOf(t), 0));
var isLeftPairsTail = (left) => {
const rest = before(left);
const head = (0, import_ramda7.compose)(words, lstWord, rest, lstWord, lstToken);
return (0, import_ramda7.or)(
const head = (0, import_ramda6.compose)(words, lstWord, rest, lstWord, lstToken);
return (0, import_ramda6.or)(
isPairAbbreviation([head(left), lstWord(left)]),

@@ -492,5 +491,5 @@ isPairAbbreviation(lstWord(left).split("."))

};
var leftPairsTailAbbreviation = (0, import_ramda7.allPass)([
(0, import_ramda7.compose)(isDotDelimiter, lstToken, fst2),
(0, import_ramda7.compose)(isLeftPairsTail, fst2),
var leftPairsTailAbbreviation = (0, import_ramda6.allPass)([
(0, import_ramda6.compose)(isDotDelimiter, lstToken, fst2),
(0, import_ramda6.compose)(isLeftPairsTail, fst2),
rightLowercaseOrCaps

@@ -503,3 +502,3 @@ ]);

var sidesPreprocessors = [leftPreprocessor, rightPreprocessor];
var joinCondition = (0, import_ramda8.anyPass)([
var joinCondition = (0, import_ramda7.anyPass)([
spaceBothSides,

@@ -518,3 +517,3 @@ rightLacksSpacePrefix,

]);
var breakCondition = (0, import_ramda8.anyPass)([
var breakCondition = (0, import_ramda7.anyPass)([
leftEndsWithHardbreak,

@@ -524,6 +523,6 @@ rightStartsWithHardbreak,

]);
var join2 = (0, import_ramda8.compose)(joinCondition, (0, import_ramda8.zipWith)(import_ramda8.call, sidesPreprocessors));
var breaks = (0, import_ramda8.compose)(breakCondition, (0, import_ramda8.zipWith)(import_ramda8.call, sidesPreprocessors));
var join2 = (0, import_ramda7.compose)(joinCondition, (0, import_ramda7.zipWith)(import_ramda7.call, sidesPreprocessors));
var breaks = (0, import_ramda7.compose)(breakCondition, (0, import_ramda7.zipWith)(import_ramda7.call, sidesPreprocessors));
function sentenize(text) {
const parts = text.split(/(\n{2,})/);
const parts = text.split(/((?:\n\s*){2,})/);
const parsed = [];

@@ -530,0 +529,0 @@ for (const part of parts) {

@@ -12,1 +12,2 @@ export declare const spaceBothSides: (...args: any[]) => any;

export declare const rightStartsNewlineUppercased: (...args: any[]) => any;
export declare const leftInitials: (...args: any[]) => any;
export * from './base';
export * from './initials';
export * from './abbreviations';
{
"name": "@diplodoc/sentenizer",
"version": "0.0.7",
"version": "0.0.8",
"description": "text segmentation into sentences",

@@ -5,0 +5,0 @@ "homepage": "https://github.com/diplodoc-platform/sentenizer",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc