Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

sentence-splitter

Package Overview
Dependencies
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sentence-splitter - npm Package Compare versions

Comparing version 3.2.3 to 4.0.0

module/logger.d.ts

3

lib/logger.d.ts

@@ -1,4 +0,5 @@

import { SourceCode } from "./parser/SourceCode";
import { SourceCode } from "./parser/SourceCode.js";
export declare function seekLog(offset: number, current?: string | boolean): void;
export declare function nodeLog(message: string, sourceCode?: SourceCode): void;
export declare function debugLog(...message: any[]): void;
//# sourceMappingURL=logger.d.ts.map
"use strict";
var __spreadArray = (this && this.__spreadArray) || function (to, from) {
for (var i = 0, il = from.length, j = to.length; i < il; i++, j++)
to[j] = from[i];
return to;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.debugLog = exports.nodeLog = exports.seekLog = void 0;
const isDebug = typeof process === "object" && process?.env?.DEBUG === "sentence-splitter";
function seekLog(offset, current) {
if (process.env.DEBUG !== "sentence-splitter") {
if (!isDebug) {
return;

@@ -17,3 +13,3 @@ }

function nodeLog(message, sourceCode) {
if (process.env.DEBUG !== "sentence-splitter") {
if (!isDebug) {
return;

@@ -25,3 +21,3 @@ }

}
var currentNode = sourceCode.readNode();
const currentNode = sourceCode.readNode();
if (!currentNode) {

@@ -31,22 +27,26 @@ console.log("sentence-splitter: " + message);

}
var RowLength = 50;
var currentChar = (sourceCode.read() || "").replace(/\n/g, "\\n");
var nodeValue = currentNode.raw.replace(/\n/g, "\\n");
console.log("sentence-splitter: " + sourceCode.offset + " " + message + " |" + currentChar + "| " + " ".repeat(RowLength - currentChar.length - message.length) + nodeValue);
const RowLength = 50;
const currentChar = (sourceCode.read() || "").replace(/\n/g, "\\n");
const nodeValue = currentNode.raw.replace(/\n/g, "\\n");
console.log("sentence-splitter: " +
sourceCode.offset +
" " +
message +
" |" +
currentChar +
"| " +
" ".repeat(RowLength - currentChar.length - message.length) +
nodeValue);
}
exports.nodeLog = nodeLog;
function debugLog() {
var message = [];
for (var _i = 0; _i < arguments.length; _i++) {
message[_i] = arguments[_i];
}
if (process.env.DEBUG !== "sentence-splitter") {
function debugLog(...message) {
if (!isDebug) {
return;
}
console.log.apply(console, __spreadArray(["sentence-splitter: "], message.map(function (m) {
console.log("sentence-splitter: ", ...message.map((m) => {
// make one line if it is multiline
return typeof m === "string" ? m.replace(/\n/g, "\\n") : m;
})));
}));
}
exports.debugLog = debugLog;
//# sourceMappingURL=logger.js.map
{
"name": "sentence-splitter",
"version": "3.2.2",
"version": "4.0.0",
"description": "split {japanese, english} text into sentences.",

@@ -10,9 +10,9 @@ "keywords": [

],
"homepage": "https://github.com/azu/sentence-splitter",
"homepage": "https://github.com/textlint-rule/sentence-splitter",
"bugs": {
"url": "https://github.com/azu/sentence-splitter/issues"
"url": "https://github.com/textlint-rule/sentence-splitter/issues"
},
"repository": {
"type": "git",
"url": "git+https://github.com/azu/sentence-splitter.git"
"url": "git+https://github.com/textlint-rule/sentence-splitter.git"
},

@@ -64,4 +64,2 @@ "license": "MIT",

"devDependencies": {
"@parcel/transformer-typescript-tsc": "^2.8.3",
"@snowpack/plugin-typescript": "^1.1.1",
"@textlint/markdown-to-ast": "^13.2.0",

@@ -68,0 +66,0 @@ "@types/mocha": "^10.0.1",

@@ -1,4 +0,4 @@

import { SourceCode } from "./SourceCode";
import { Language } from "./lang/LanguageInterface";
import { AbstractMarker } from "./AbstractMarker";
import { SourceCode } from "./SourceCode.js";
import { Language } from "./lang/LanguageInterface.js";
import { AbstractMarker } from "./AbstractMarker.js";
/**

@@ -21,1 +21,2 @@ * abbreviation marker

}
//# sourceMappingURL=AbbrMarker.d.ts.map
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AbbrMarker = void 0;
var English_1 = require("./lang/English");
var isCapitalized = function (text) {
const English_js_1 = require("./lang/English.js");
const isCapitalized = (text) => {
if (!text || text.length === 0) {

@@ -11,3 +11,3 @@ return false;

};
var compareNoCaseSensitive = function (a, b) {
const compareNoCaseSensitive = (a, b) => {
return a.toLowerCase() === b.toLowerCase();

@@ -18,5 +18,4 @@ };

*/
var AbbrMarker = /** @class */ (function () {
function AbbrMarker(lang) {
if (lang === void 0) { lang = English_1.English; }
class AbbrMarker {
constructor(lang = English_js_1.English) {
this.lang = lang;

@@ -31,12 +30,11 @@ }

*/
AbbrMarker.prototype.getWord = function (sourceCode, startIndex) {
if (startIndex === void 0) { startIndex = 0; }
var whiteSpace = /\s/;
var prevChar = sourceCode.read(-1);
getWord(sourceCode, startIndex = 0) {
const whiteSpace = /\s/;
const prevChar = sourceCode.read(-1);
if (prevChar && !whiteSpace.test(prevChar)) {
return "";
}
var word = "";
var count = startIndex;
var char = "";
let word = "";
let count = startIndex;
let char = "";
while ((char = sourceCode.read(count))) {

@@ -50,7 +48,7 @@ if (whiteSpace.test(char)) {

return word;
};
AbbrMarker.prototype.getPrevWord = function (sourceCode) {
var whiteSpace = /\s/;
var count = -1;
var char = "";
}
getPrevWord(sourceCode) {
const whiteSpace = /\s/;
let count = -1;
let char = "";
while ((char = sourceCode.read(count))) {

@@ -69,8 +67,8 @@ if (!whiteSpace.test(char)) {

return this.getWord(sourceCode, count + 1);
};
AbbrMarker.prototype.mark = function (sourceCode) {
}
mark(sourceCode) {
if (sourceCode.isInContextRange()) {
return;
}
var currentWord = this.getWord(sourceCode);
const currentWord = this.getWord(sourceCode);
if (currentWord.length === 0) {

@@ -86,3 +84,3 @@ return;

// Example: Yahoo!
var isMatchedEXCALAMATION_WORDS = this.lang.EXCALAMATION_WORDS.some(function (abbr) {
const isMatchedEXCALAMATION_WORDS = this.lang.EXCALAMATION_WORDS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);

@@ -95,3 +93,3 @@ });

// Example: Mr. Fuji
var isMatchedPREPOSITIVE_ABBREVIATIONS = this.lang.PREPOSITIVE_ABBREVIATIONS.some(function (abbr) {
const isMatchedPREPOSITIVE_ABBREVIATIONS = this.lang.PREPOSITIVE_ABBREVIATIONS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);

@@ -103,7 +101,7 @@ });

// ABBREVIATIONS
var isMatched = this.lang.ABBREVIATIONS.some(function (abbr) {
const isMatched = this.lang.ABBREVIATIONS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);
});
var prevWord = this.getPrevWord(sourceCode);
var nextWord = this.getWord(sourceCode, currentWord.length + 1);
const prevWord = this.getPrevWord(sourceCode);
const nextWord = this.getWord(sourceCode, currentWord.length + 1);
// console.log("prevWord", prevWord);

@@ -124,6 +122,5 @@ // console.log("currentWord", currentWord);

}
};
return AbbrMarker;
}());
}
}
exports.AbbrMarker = AbbrMarker;
//# sourceMappingURL=AbbrMarker.js.map

@@ -1,4 +0,5 @@

import { SourceCode } from "./SourceCode";
import { SourceCode } from "./SourceCode.js";
export declare abstract class AbstractMarker {
abstract mark(source: SourceCode): void;
}
//# sourceMappingURL=AbstractMarker.d.ts.map
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AbstractMarker = void 0;
var AbstractMarker = /** @class */ (function () {
function AbstractMarker() {
}
return AbstractMarker;
}());
class AbstractMarker {
}
exports.AbstractMarker = AbstractMarker;
//# sourceMappingURL=AbstractMarker.js.map

@@ -1,2 +0,2 @@

import { SourceCode } from "./SourceCode";
import { SourceCode } from "./SourceCode.js";
export declare abstract class AbstractParser {

@@ -10,1 +10,2 @@ /**

}
//# sourceMappingURL=AbstractParser.d.ts.map
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AbstractParser = void 0;
var AbstractParser = /** @class */ (function () {
function AbstractParser() {
}
return AbstractParser;
}());
class AbstractParser {
}
exports.AbstractParser = AbstractParser;
//# sourceMappingURL=AbstractParser.js.map

@@ -1,4 +0,4 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { AbstractMarker } from "./AbstractMarker";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";
import { AbstractMarker } from "./AbstractMarker.js";
export interface AnyValueParserOptions {

@@ -21,1 +21,2 @@ parsers: AbstractParser[];

}
//# sourceMappingURL=AnyValueParser.d.ts.map
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AnyValueParser = void 0;
var logger_1 = require("../logger");
const logger_js_1 = require("../logger.js");
/**
* Any value without `parsers`
*/
var AnyValueParser = /** @class */ (function () {
class AnyValueParser {
/**
* Eat any value without `parsers.test`
*/
function AnyValueParser(options) {
constructor(options) {
this.parsers = options.parsers;
this.markers = options.markers;
}
AnyValueParser.prototype.test = function (sourceCode) {
test(sourceCode) {
if (sourceCode.hasEnd) {
return false;
}
return this.parsers.every(function (parser) { return !parser.test(sourceCode); });
};
AnyValueParser.prototype.seek = function (sourceCode) {
var currentNode = sourceCode.readNode();
return this.parsers.every((parser) => !parser.test(sourceCode));
}
seek(sourceCode) {
const currentNode = sourceCode.readNode();
if (!currentNode) {
// Text mode
while (this.test(sourceCode)) {
this.markers.forEach(function (marker) { return marker.mark(sourceCode); });
this.markers.forEach((marker) => marker.mark(sourceCode));
sourceCode.peek();

@@ -33,15 +33,14 @@ }

// node - should not over next node
var isInCurrentNode = function () {
var currentOffset = sourceCode.offset;
const isInCurrentNode = () => {
const currentOffset = sourceCode.offset;
return currentNode.range[0] <= currentOffset && currentOffset < currentNode.range[1];
};
while (isInCurrentNode() && this.test(sourceCode)) {
logger_1.seekLog(sourceCode.offset, sourceCode.read());
this.markers.forEach(function (marker) { return marker.mark(sourceCode); });
(0, logger_js_1.seekLog)(sourceCode.offset, sourceCode.read());
this.markers.forEach((marker) => marker.mark(sourceCode));
sourceCode.peek();
}
};
return AnyValueParser;
}());
}
}
exports.AnyValueParser = AnyValueParser;
//# sourceMappingURL=AnyValueParser.js.map

@@ -1,2 +0,3 @@

import { Language } from "./LanguageInterface";
import type { Language } from "./LanguageInterface.js";
export declare const English: Language;
//# sourceMappingURL=English.d.ts.map

@@ -1,2 +0,2 @@

export declare type Language = {
export type Language = {
ABBREVIATIONS: string[];

@@ -6,1 +6,2 @@ PREPOSITIVE_ABBREVIATIONS: string[];

};
//# sourceMappingURL=LanguageInterface.d.ts.map

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";
/**

@@ -10,1 +10,2 @@ * New Line Parser

}
//# sourceMappingURL=NewLineParser.d.ts.map

@@ -7,7 +7,5 @@ "use strict";

*/
var NewLineParser = /** @class */ (function () {
function NewLineParser() {
}
NewLineParser.prototype.test = function (sourceCode) {
var string = sourceCode.read();
class NewLineParser {
test(sourceCode) {
const string = sourceCode.read();
if (!string) {

@@ -17,11 +15,10 @@ return false;

return /[\r\n]/.test(string);
};
NewLineParser.prototype.seek = function (sourceCode) {
}
seek(sourceCode) {
while (this.test(sourceCode)) {
sourceCode.peek();
}
};
return NewLineParser;
}());
}
}
exports.NewLineParser = NewLineParser;
//# sourceMappingURL=NewLineParser.js.map

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractMarker } from "./AbstractMarker";
import { SourceCode } from "./SourceCode.js";
import { AbstractMarker } from "./AbstractMarker.js";
/**

@@ -18,1 +18,2 @@ * Mark pair character

}
//# sourceMappingURL=PairMaker.d.ts.map
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.PairMaker = void 0;
var logger_1 = require("../logger");
// @ts-ignore
var object_values_1 = __importDefault(require("object_values"));
const logger_js_1 = require("../logger.js");
/**

@@ -19,19 +14,18 @@ * Mark pair character

*/
var PairMaker = /** @class */ (function () {
function PairMaker() {
var _a;
this.pairs = (_a = {},
_a["\""] = "\"",
_a["\u300C"] = "\u300D",
_a["\uFF08"] = "\uFF09",
_a["("] = ")",
_a["\u300E"] = "\u300F",
_a["\u3010"] = "\u3011",
_a["\u300A"] = "\u300B",
_a);
class PairMaker {
constructor() {
this.pairs = {
[`"`]: `"`,
[`「`]: `」`,
[`(`]: `)`,
[`(`]: `)`,
[`『`]: `』`,
[`【`]: `】`,
[`《`]: `》`
};
this.pairKeys = Object.keys(this.pairs);
this.pairValues = object_values_1.default(this.pairs);
this.pairValues = Object.values(this.pairs);
}
PairMaker.prototype.mark = function (sourceCode) {
var string = sourceCode.read();
mark(sourceCode) {
const string = sourceCode.read();
if (!string) {

@@ -43,6 +37,6 @@ return;

if (!sourceCode.isInContext()) {
var keyIndex = this.pairKeys.indexOf(string);
const keyIndex = this.pairKeys.indexOf(string);
if (keyIndex !== -1) {
var key = this.pairKeys[keyIndex];
logger_1.debugLog("PairMaker -> enterContext: " + key + " ", { keyIndex: keyIndex });
const key = this.pairKeys[keyIndex];
(0, logger_js_1.debugLog)(`PairMaker -> enterContext: ${key} `, { keyIndex });
sourceCode.enterContext(key);

@@ -53,13 +47,12 @@ }

// check that string is end mark?
var valueIndex = this.pairValues.indexOf(string);
const valueIndex = this.pairValues.indexOf(string);
if (valueIndex !== -1) {
var key = this.pairKeys[valueIndex];
logger_1.debugLog("PairMaker -> leaveContext: " + this.pairValues[valueIndex] + " ", { valueIndex: valueIndex });
const key = this.pairKeys[valueIndex];
(0, logger_js_1.debugLog)(`PairMaker -> leaveContext: ${this.pairValues[valueIndex]} `, { valueIndex });
sourceCode.leaveContext(key);
}
}
};
return PairMaker;
}());
}
}
exports.PairMaker = PairMaker;
//# sourceMappingURL=PairMaker.js.map

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";
export declare const DefaultOptions: {

@@ -23,1 +23,2 @@ separatorCharacters: string[];

}
//# sourceMappingURL=SeparatorParser.d.ts.map

@@ -18,4 +18,4 @@ "use strict";

*/
var SeparatorParser = /** @class */ (function () {
function SeparatorParser(options) {
class SeparatorParser {
constructor(options) {
this.options = options;

@@ -25,3 +25,3 @@ this.separatorCharacters =

}
SeparatorParser.prototype.test = function (sourceCode) {
test(sourceCode) {
if (sourceCode.isInContext()) {

@@ -33,4 +33,4 @@ return false;

}
var firstChar = sourceCode.read();
var nextChar = sourceCode.read(1);
const firstChar = sourceCode.read();
const nextChar = sourceCode.read(1);
if (!firstChar) {

@@ -54,11 +54,10 @@ return false;

return true;
};
SeparatorParser.prototype.seek = function (sourceCode) {
}
seek(sourceCode) {
while (this.test(sourceCode)) {
sourceCode.peek();
}
};
return SeparatorParser;
}());
}
}
exports.SeparatorParser = SeparatorParser;
//# sourceMappingURL=SeparatorParser.js.map
import { TxtNode, TxtParentNode } from "@textlint/ast-node-types";
import { AbstractParser } from "./AbstractParser";
import { AbstractParser } from "./AbstractParser.js";
export declare class SourceCode {

@@ -47,3 +47,3 @@ private index;

*/
readNode(over?: number): false | TxtNode;
readNode(over?: number): false | import("@textlint/ast-node-types/lib/src/NodeType.js").Content;
/**

@@ -81,1 +81,2 @@ * Increment current index

}
//# sourceMappingURL=SourceCode.d.ts.map
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.SourceCode = void 0;
var structured_source_1 = __importDefault(require("structured-source"));
var SourceCode = /** @class */ (function () {
function SourceCode(input) {
const structured_source_1 = require("structured-source");
class SourceCode {
constructor(input) {
this.index = 0;

@@ -15,3 +12,3 @@ this.contexts = [];

this.textCharacters = input.split("");
this.source = new structured_source_1.default(input);
this.source = new structured_source_1.StructuredSource(input);
this.startOffset = 0;

@@ -28,7 +25,7 @@ this.firstChildPadding = 0;

// before line count of Paragraph node
var lineBreaks = Array.from(new Array(this.sourceNode.loc.start.line - 1)).fill("\n");
const lineBreaks = Array.from(new Array(this.sourceNode.loc.start.line - 1)).fill("\n");
// filled with dummy text
var offset = Array.from(new Array(this.startOffset - lineBreaks.length)).fill("∯");
const offset = Array.from(new Array(this.startOffset - lineBreaks.length)).fill("∯");
this.textCharacters = offset.concat(lineBreaks, input.raw.split(""));
this.source = new structured_source_1.default(this.textCharacters.join(""));
this.source = new structured_source_1.StructuredSource(this.textCharacters.join(""));
if (this.sourceNode.children[0]) {

@@ -45,44 +42,40 @@ // Header Node's children does not start with index 0

}
SourceCode.prototype.markContextRange = function (range) {
markContextRange(range) {
this.contextRanges.push(range);
};
SourceCode.prototype.isInContextRange = function () {
var offset = this.offset;
return this.contextRanges.some(function (range) {
}
isInContextRange() {
const offset = this.offset;
return this.contextRanges.some((range) => {
return range[0] <= offset && offset < range[1];
});
};
SourceCode.prototype.enterContext = function (context) {
}
enterContext(context) {
this.contexts.push(context);
};
SourceCode.prototype.isInContext = function (context) {
}
isInContext(context) {
if (!context) {
return this.contexts.length > 0;
}
return this.contexts.some(function (targetContext) { return targetContext === context; });
};
SourceCode.prototype.leaveContext = function (context) {
var index = this.contexts.lastIndexOf(context);
return this.contexts.some((targetContext) => targetContext === context);
}
leaveContext(context) {
const index = this.contexts.lastIndexOf(context);
if (index !== -1) {
this.contexts.splice(index, 1);
}
};
Object.defineProperty(SourceCode.prototype, "offset", {
/**
* Return current offset value
* @returns {number}
*/
get: function () {
return this.index + this.firstChildPadding;
},
enumerable: false,
configurable: true
});
}
/**
* Return current offset value
* @returns {number}
*/
get offset() {
return this.index + this.firstChildPadding;
}
/**
* Return current position object.
* It includes line, column, offset.
*/
SourceCode.prototype.now = function () {
var indexWithChildrenOffset = this.offset;
var position = this.source.indexToPosition(indexWithChildrenOffset);
now() {
const indexWithChildrenOffset = this.offset;
const position = this.source.indexToPosition(indexWithChildrenOffset);
return {

@@ -93,14 +86,10 @@ line: position.line,

};
};
Object.defineProperty(SourceCode.prototype, "hasEnd", {
/**
* Return true, no more read char
*/
get: function () {
return this.read() === false;
},
enumerable: false,
configurable: true
});
}
/**
* Return true, no more read char
*/
get hasEnd() {
return this.read() === false;
}
/**
* read char

@@ -110,5 +99,4 @@ * if can not read, return empty string

*/
SourceCode.prototype.read = function (over) {
if (over === void 0) { over = 0; }
var index = this.offset + over;
read(over = 0) {
const index = this.offset + over;
if (index < this.startOffset) {

@@ -121,3 +109,3 @@ return false;

return false;
};
}
/**

@@ -128,12 +116,11 @@ * read node

*/
SourceCode.prototype.readNode = function (over) {
if (over === void 0) { over = 0; }
readNode(over = 0) {
if (!this.sourceNode) {
return false;
}
var index = this.offset + over;
const index = this.offset + over;
if (index < this.startOffset) {
return false;
}
var matchNodeList = this.sourceNode.children.filter(function (node) {
const matchNodeList = this.sourceNode.children.filter((node) => {
// <p>[node]</p>

@@ -152,29 +139,29 @@ // ^

return false;
};
}
/**
* Increment current index
*/
SourceCode.prototype.peek = function () {
peek() {
this.index += 1;
};
}
/**
* Increment node range
*/
SourceCode.prototype.peekNode = function (node) {
peekNode(node) {
this.index += node.range[1] - node.range[0];
};
}
/**
* Seek and Peek
*/
SourceCode.prototype.seekNext = function (parser) {
var startPosition = this.now();
seekNext(parser) {
const startPosition = this.now();
parser.seek(this);
var endPosition = this.now();
var value = this.sliceRange(startPosition.offset, endPosition.offset);
const endPosition = this.now();
const value = this.sliceRange(startPosition.offset, endPosition.offset);
return {
value: value,
startPosition: startPosition,
endPosition: endPosition
value,
startPosition,
endPosition
};
};
}
/**

@@ -186,8 +173,7 @@ * Slice text form the range.

*/
SourceCode.prototype.sliceRange = function (start, end) {
sliceRange(start, end) {
return this.textCharacters.slice(start, end).join("");
};
return SourceCode;
}());
}
}
exports.SourceCode = SourceCode;
//# sourceMappingURL=SourceCode.js.map

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";
/**

@@ -10,1 +10,2 @@ * Space parser

}
//# sourceMappingURL=SpaceParser.d.ts.map

@@ -7,7 +7,5 @@ "use strict";

*/
var SpaceParser = /** @class */ (function () {
function SpaceParser() {
}
SpaceParser.prototype.test = function (sourceCode) {
var string = sourceCode.read();
class SpaceParser {
test(sourceCode) {
const string = sourceCode.read();
if (!string) {

@@ -18,11 +16,10 @@ return false;

return /[^\S\n\r]/.test(string);
};
SpaceParser.prototype.seek = function (sourceCode) {
}
seek(sourceCode) {
while (this.test(sourceCode)) {
sourceCode.peek();
}
};
return SpaceParser;
}());
}
}
exports.SpaceParser = SpaceParser;
//# sourceMappingURL=SpaceParser.js.map

@@ -1,26 +0,22 @@

import { TxtNode, TxtParentNode, TxtTextNode } from "@textlint/ast-node-types";
import { SourceCode } from "./parser/SourceCode";
import { AbstractParser } from "./parser/AbstractParser";
import { SeparatorParserOptions } from "./parser/SeparatorParser";
export declare const Syntax: {
WhiteSpace: string;
Punctuation: string;
Sentence: string;
Str: string;
import type { AnyTxtNode, TxtParentNode, TxtStrNode, TxtTextNode } from "@textlint/ast-node-types";
import { SourceCode } from "./parser/SourceCode.js";
import { AbstractParser } from "./parser/AbstractParser.js";
import { SeparatorParserOptions } from "./parser/SeparatorParser.js";
export declare const SentenceSplitterSyntax: {
readonly WhiteSpace: "WhiteSpace";
readonly Punctuation: "Punctuation";
readonly Sentence: "Sentence";
readonly Str: "Str";
};
export interface ToTypeNode<T extends string> extends TxtTextNode {
readonly type: T;
}
export interface WhiteSpaceNode extends TxtTextNode {
export type TxtSentenceNode = Omit<TxtParentNode, "type"> & {
readonly type: "Sentence";
};
export type TxtWhiteSpaceNode = Omit<TxtTextNode, "type"> & {
readonly type: "WhiteSpace";
}
export interface PunctuationNode extends TxtTextNode {
};
export type TxtPunctuationNode = Omit<TxtTextNode, "type"> & {
readonly type: "Punctuation";
}
export interface StrNode extends TxtTextNode {
readonly type: "Str";
}
export interface SentenceNode extends TxtParentNode {
readonly type: "Sentence";
}
};
export type SentenceSplitterTxtNode = TxtSentenceNode | TxtWhiteSpaceNode | TxtPunctuationNode | TxtStrNode | AnyTxtNode;
export type SentenceSplitterTxtNodeType = (typeof SentenceSplitterSyntax)[keyof typeof SentenceSplitterSyntax];
export declare class SplitParser {

@@ -31,5 +27,5 @@ private nodeList;

constructor(text: string | TxtParentNode);
get current(): TxtParentNode | undefined;
pushNodeToCurrent(node: TxtNode): void;
open(parentNode: TxtParentNode): void;
get current(): TxtSentenceNode | undefined;
pushNodeToCurrent(node: SentenceSplitterTxtNode): void;
open(parentNode: TxtSentenceNode): void;
isOpened(): boolean;

@@ -44,3 +40,3 @@ nextLine(parser: AbstractParser): {

close(parser: AbstractParser): void;
toList(): (TxtParentNode | TxtNode)[];
toList(): SentenceSplitterTxtNode[];
}

@@ -56,6 +52,3 @@ export interface splitOptions {

*/
export declare function split(text: string, options?: splitOptions): (TxtParentNode | TxtNode)[];
export interface SentenceParentNode extends TxtNode {
children: Array<TxtNode | TxtTextNode | SentenceNode>;
}
export declare function split(text: string, options?: splitOptions): SentenceSplitterTxtNode[];
/**

@@ -66,42 +59,3 @@ * Convert Paragraph Node to Paragraph node that convert children to Sentence node

*/
export declare function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): SentenceParentNode;
/**
* WhiteSpace is space or linebreak
*/
export declare function createWhiteSpaceNode(text: string, startPosition: {
line: number;
column: number;
offset: number;
}, endPosition: {
line: number;
column: number;
offset: number;
}): ToTypeNode<"WhiteSpace">;
export declare function createPunctuationNode(text: string, startPosition: {
line: number;
column: number;
offset: number;
}, endPosition: {
line: number;
column: number;
offset: number;
}): PunctuationNode;
export declare function createTextNode(text: string, startPosition: {
line: number;
column: number;
offset: number;
}, endPosition: {
line: number;
column: number;
offset: number;
}): StrNode;
export declare function createEmptySentenceNode(): SentenceNode;
export declare function createNode<T extends string>(type: T, text: string, startPosition: {
line: number;
column: number;
offset: number;
}, endPosition: {
line: number;
column: number;
offset: number;
}): ToTypeNode<T>;
export declare function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): SentenceSplitterTxtNode;
//# sourceMappingURL=sentence-splitter.d.ts.map

@@ -1,26 +0,14 @@

// LICENSE : MIT
"use strict";
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.createNode = exports.createEmptySentenceNode = exports.createTextNode = exports.createPunctuationNode = exports.createWhiteSpaceNode = exports.splitAST = exports.split = exports.SplitParser = exports.Syntax = void 0;
var ast_node_types_1 = require("@textlint/ast-node-types");
var SourceCode_1 = require("./parser/SourceCode");
var NewLineParser_1 = require("./parser/NewLineParser");
var SpaceParser_1 = require("./parser/SpaceParser");
var SeparatorParser_1 = require("./parser/SeparatorParser");
var AnyValueParser_1 = require("./parser/AnyValueParser");
var AbbrMarker_1 = require("./parser/AbbrMarker");
var PairMaker_1 = require("./parser/PairMaker");
var logger_1 = require("./logger");
exports.Syntax = {
exports.splitAST = exports.split = exports.SplitParser = exports.SentenceSplitterSyntax = void 0;
const ast_node_types_1 = require("@textlint/ast-node-types");
const SourceCode_js_1 = require("./parser/SourceCode.js");
const NewLineParser_js_1 = require("./parser/NewLineParser.js");
const SpaceParser_js_1 = require("./parser/SpaceParser.js");
const SeparatorParser_js_1 = require("./parser/SeparatorParser.js");
const AnyValueParser_js_1 = require("./parser/AnyValueParser.js");
const AbbrMarker_js_1 = require("./parser/AbbrMarker.js");
const PairMaker_js_1 = require("./parser/PairMaker.js");
const logger_js_1 = require("./logger.js");
exports.SentenceSplitterSyntax = {
WhiteSpace: "WhiteSpace",

@@ -31,17 +19,13 @@ Punctuation: "Punctuation",

};
var SplitParser = /** @class */ (function () {
function SplitParser(text) {
class SplitParser {
constructor(text) {
this.nodeList = [];
this.results = [];
this.source = new SourceCode_1.SourceCode(text);
this.source = new SourceCode_js_1.SourceCode(text);
}
Object.defineProperty(SplitParser.prototype, "current", {
get: function () {
return this.nodeList[this.nodeList.length - 1];
},
enumerable: false,
configurable: true
});
SplitParser.prototype.pushNodeToCurrent = function (node) {
var current = this.current;
get current() {
return this.nodeList[this.nodeList.length - 1];
}
pushNodeToCurrent(node) {
const current = this.current;
if (current) {

@@ -54,30 +38,30 @@ current.children.push(node);

}
};
}
// open with ParentNode
SplitParser.prototype.open = function (parentNode) {
open(parentNode) {
this.nodeList.push(parentNode);
};
SplitParser.prototype.isOpened = function () {
}
isOpened() {
return this.nodeList.length > 0;
};
SplitParser.prototype.nextLine = function (parser) {
var _a = this.source.seekNext(parser), value = _a.value, startPosition = _a.startPosition, endPosition = _a.endPosition;
}
nextLine(parser) {
const { value, startPosition, endPosition } = this.source.seekNext(parser);
this.pushNodeToCurrent(createWhiteSpaceNode(value, startPosition, endPosition));
return endPosition;
};
SplitParser.prototype.nextSpace = function (parser) {
var _a = this.source.seekNext(parser), value = _a.value, startPosition = _a.startPosition, endPosition = _a.endPosition;
this.pushNodeToCurrent(createNode("WhiteSpace", value, startPosition, endPosition));
};
SplitParser.prototype.nextValue = function (parser) {
var _a = this.source.seekNext(parser), value = _a.value, startPosition = _a.startPosition, endPosition = _a.endPosition;
}
nextSpace(parser) {
const { value, startPosition, endPosition } = this.source.seekNext(parser);
this.pushNodeToCurrent(createWhiteSpaceNode(value, startPosition, endPosition));
}
nextValue(parser) {
const { value, startPosition, endPosition } = this.source.seekNext(parser);
this.pushNodeToCurrent(createTextNode(value, startPosition, endPosition));
};
}
// close current Node and remove it from list
SplitParser.prototype.close = function (parser) {
var _a = this.source.seekNext(parser), value = _a.value, startPosition = _a.startPosition, endPosition = _a.endPosition;
close(parser) {
const { value, startPosition, endPosition } = this.source.seekNext(parser);
if (startPosition.offset !== endPosition.offset) {
this.pushNodeToCurrent(createPunctuationNode(value, startPosition, endPosition));
}
var currentNode = this.nodeList.pop();
const currentNode = this.nodeList.pop();
if (!currentNode) {

@@ -89,29 +73,30 @@ return;

}
var firstChildNode = currentNode.children[0];
var endNow = this.source.now();
const firstChildNode = currentNode.children[0];
const endNow = this.source.now();
currentNode.loc = {
start: firstChildNode.loc.start,
end: nowToLoc(endNow)
end: {
line: endNow.line,
column: endNow.column
}
};
var rawValue = this.source.sliceRange(firstChildNode.range[0], endNow.offset);
const rawValue = this.source.sliceRange(firstChildNode.range[0], endNow.offset);
currentNode.range = [firstChildNode.range[0], endNow.offset];
currentNode.raw = rawValue;
this.results.push(currentNode);
};
SplitParser.prototype.toList = function () {
}
toList() {
return this.results;
};
return SplitParser;
}());
}
}
exports.SplitParser = SplitParser;
var createParsers = function (options) {
if (options === void 0) { options = {}; }
var newLine = new NewLineParser_1.NewLineParser();
var space = new SpaceParser_1.SpaceParser();
var separator = new SeparatorParser_1.SeparatorParser(options.SeparatorParser);
var abbrMarker = new AbbrMarker_1.AbbrMarker();
var pairMaker = new PairMaker_1.PairMaker();
const createParsers = (options = {}) => {
const newLine = new NewLineParser_js_1.NewLineParser();
const space = new SpaceParser_js_1.SpaceParser();
const separator = new SeparatorParser_js_1.SeparatorParser(options.SeparatorParser);
const abbrMarker = new AbbrMarker_js_1.AbbrMarker();
const pairMaker = new PairMaker_js_1.PairMaker();
// anyValueParser has multiple parser and markers.
// anyValueParse eat any value if it reach to other value.
var anyValueParser = new AnyValueParser_1.AnyValueParser({
const anyValueParser = new AnyValueParser_js_1.AnyValueParser({
parsers: [newLine, separator],

@@ -121,7 +106,7 @@ markers: [abbrMarker, pairMaker]

return {
newLine: newLine,
space: space,
separator: separator,
abbrMarker: abbrMarker,
anyValueParser: anyValueParser
newLine,
space,
separator,
abbrMarker,
anyValueParser
};

@@ -133,5 +118,5 @@ };

function split(text, options) {
var _a = createParsers(options), newLine = _a.newLine, space = _a.space, separator = _a.separator, anyValueParser = _a.anyValueParser;
var splitParser = new SplitParser(text);
var sourceCode = splitParser.source;
const { newLine, space, separator, anyValueParser } = createParsers(options);
const splitParser = new SplitParser(text);
const sourceCode = splitParser.source;
while (!sourceCode.hasEnd) {

@@ -164,7 +149,7 @@ if (newLine.test(sourceCode)) {

function splitAST(paragraphNode, options) {
var _a = createParsers(options), newLine = _a.newLine, space = _a.space, separator = _a.separator, anyValueParser = _a.anyValueParser;
var splitParser = new SplitParser(paragraphNode);
var sourceCode = splitParser.source;
const { newLine, space, separator, anyValueParser } = createParsers(options);
const splitParser = new SplitParser(paragraphNode);
const sourceCode = splitParser.source;
while (!sourceCode.hasEnd) {
var currentNode = sourceCode.readNode();
const currentNode = sourceCode.readNode();
if (!currentNode) {

@@ -175,11 +160,11 @@ break;

if (space.test(sourceCode)) {
logger_1.nodeLog("space", sourceCode);
(0, logger_js_1.nodeLog)("space", sourceCode);
splitParser.nextSpace(space);
}
else if (separator.test(sourceCode)) {
logger_1.nodeLog("separator", sourceCode);
(0, logger_js_1.nodeLog)("separator", sourceCode);
splitParser.close(separator);
}
else if (newLine.test(sourceCode)) {
logger_1.nodeLog("newline", sourceCode);
(0, logger_js_1.nodeLog)("newline", sourceCode);
splitParser.nextLine(newLine);

@@ -189,6 +174,6 @@ }

if (!splitParser.isOpened()) {
logger_1.nodeLog("open -> createEmptySentenceNode()");
(0, logger_js_1.nodeLog)("open -> createEmptySentenceNode()");
splitParser.open(createEmptySentenceNode());
}
logger_1.nodeLog("other str value", sourceCode);
(0, logger_js_1.nodeLog)("other str value", sourceCode);
splitParser.nextValue(anyValueParser);

@@ -198,3 +183,3 @@ }

else if (currentNode.type === ast_node_types_1.ASTNodeTypes.Break) {
logger_1.nodeLog("break", sourceCode);
(0, logger_js_1.nodeLog)("break", sourceCode);
// Break

@@ -207,6 +192,6 @@ // https://github.com/azu/sentence-splitter/issues/23

if (!splitParser.isOpened()) {
logger_1.nodeLog("open -> createEmptySentenceNode()");
(0, logger_js_1.nodeLog)("open -> createEmptySentenceNode()");
splitParser.open(createEmptySentenceNode());
}
logger_1.nodeLog("other node", sourceCode);
(0, logger_js_1.nodeLog)("other node", sourceCode);
splitParser.pushNodeToCurrent(currentNode);

@@ -216,7 +201,10 @@ sourceCode.peekNode(currentNode);

}
logger_1.nodeLog("end separator");
(0, logger_js_1.nodeLog)("end separator");
// It follow some text that is not ended with period.
// TODO: space is correct?
splitParser.close(space);
return __assign(__assign({}, paragraphNode), { children: splitParser.toList() });
return {
...paragraphNode,
children: splitParser.toList()
};
}

@@ -228,34 +216,51 @@ exports.splitAST = splitAST;

function createWhiteSpaceNode(text, startPosition, endPosition) {
return createNode("WhiteSpace", text, startPosition, endPosition);
return {
type: exports.SentenceSplitterSyntax.WhiteSpace,
raw: text,
value: text,
loc: {
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},
range: [startPosition.offset, endPosition.offset]
};
}
exports.createWhiteSpaceNode = createWhiteSpaceNode;
function createPunctuationNode(text, startPosition, endPosition) {
return createNode("Punctuation", text, startPosition, endPosition);
}
exports.createPunctuationNode = createPunctuationNode;
function createTextNode(text, startPosition, endPosition) {
return createNode("Str", text, startPosition, endPosition);
}
exports.createTextNode = createTextNode;
function createEmptySentenceNode() {
return {
type: "Sentence",
raw: "",
type: exports.SentenceSplitterSyntax.Punctuation,
raw: text,
value: text,
loc: {
start: { column: NaN, line: NaN },
end: { column: NaN, line: NaN }
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},
range: [NaN, NaN],
children: []
range: [startPosition.offset, endPosition.offset]
};
}
exports.createEmptySentenceNode = createEmptySentenceNode;
function createNode(type, text, startPosition, endPosition) {
function createTextNode(text, startPosition, endPosition) {
return {
type: type,
type: exports.SentenceSplitterSyntax.Str,
raw: text,
value: text,
loc: {
start: nowToLoc(startPosition),
end: nowToLoc(endPosition)
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},

@@ -265,9 +270,14 @@ range: [startPosition.offset, endPosition.offset]

}
exports.createNode = createNode;
function nowToLoc(now) {
function createEmptySentenceNode() {
return {
line: now.line,
column: now.column
type: exports.SentenceSplitterSyntax.Sentence,
raw: "",
loc: {
start: { column: NaN, line: NaN },
end: { column: NaN, line: NaN }
},
range: [NaN, NaN],
children: []
};
}
//# sourceMappingURL=sentence-splitter.js.map
{
"name": "sentence-splitter",
"version": "3.2.3",
"version": "4.0.0",
"description": "split {japanese, english} text into sentences.",

@@ -10,17 +10,31 @@ "keywords": [

],
"homepage": "https://github.com/azu/sentence-splitter",
"homepage": "https://github.com/textlint-rule/sentence-splitter",
"bugs": {
"url": "https://github.com/azu/sentence-splitter/issues"
"url": "https://github.com/textlint-rule/sentence-splitter/issues"
},
"repository": {
"type": "git",
"url": "git+https://github.com/azu/sentence-splitter.git"
"url": "git+https://github.com/textlint-rule/sentence-splitter.git"
},
"license": "MIT",
"author": "azu",
"main": "lib/sentence-splitter.js",
"types": "lib/sentence-splitter.d.ts",
"bin": {
"sentence-splitter": "./bin/cmd.js"
"sideEffects": false,
"type": "module",
"exports": {
".": {
"import": {
"types": "./module/sentence-splitter.d.ts",
"default": "./module/sentence-splitter.js"
},
"require": {
"types": "./lib/sentence-splitter.d.ts",
"default": "./lib/sentence-splitter.js"
},
"default": "./lib/sentence-splitter.js"
},
"./package.json": "./package.json"
},
"main": "./lib/sentence-splitter.js",
"module": "./module/sentence-splitter.js",
"types": "./module/sentence-splitter.d.ts",
"directories": {

@@ -30,23 +44,26 @@ "test": "test"

"files": [
"bin",
"lib",
"src"
"bin/",
"lib/",
"module/",
"src/"
],
"scripts": {
"build": "tsc -p .",
"watch": "tsc -p . --watch",
"build": "tsc -p . && tsc -p ./tsconfig.cjs.json && tsconfig-to-dual-package",
"clean": "git clean -fx lib/ module/",
"createInputJson": "ts-node scripts/generate-input-json-from-_input-md.ts",
"dev": "vite",
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,css}\"",
"postcommit": "git reset",
"precommit": "lint-staged",
"prepare": "git config --local core.hooksPath .githooks",
"prepublishOnly": "npm run clean && npm run build",
"prepublish": "npm run --if-present build",
"test": "mocha \"test/**/*.{js,ts}\"",
"createInputJson": "ts-node scripts/generate-input-json-from-_input-md.ts",
"site:build": "vite build",
"test": "mocha",
"updateSnapshot": "UPDATE_SNAPSHOT=1 npm test",
"prettier": "prettier --write '**/*.{js,jsx,ts,tsx,css}'",
"precommit": "lint-staged",
"postcommit": "git reset",
"dev": "vite",
"site:build": "vite build"
"watch": "tsc -p . --watch"
},
"lint-staged": {
"*.{js,jsx,ts,tsx,css}": [
"prettier --write",
"git add"
"prettier --write"
]

@@ -56,28 +73,24 @@ },

"printWidth": 120,
"tabWidth": 4
"singleQuote": false,
"tabWidth": 4,
"trailingComma": "none"
},
"dependencies": {
"@textlint/ast-node-types": "^4.4.2",
"concat-stream": "^2.0.0",
"object_values": "^0.1.2",
"structured-source": "^3.0.2"
"@textlint/ast-node-types": "^13.2.0",
"structured-source": "^4.0.0"
},
"devDependencies": {
"@parcel/transformer-typescript-tsc": "^2.0.0-alpha.3",
"@snowpack/plugin-typescript": "^1.1.1",
"@textlint/markdown-to-ast": "^6.1.6",
"@types/mocha": "^8.0.4",
"@types/node": "^14.14.10",
"@types/structured-source": "^3.0.0",
"husky": "^4.3.0",
"lint-staged": "^10.5.2",
"mocha": "^8.2.1",
"power-assert": "^1.6.1",
"prettier": "^2.2.1",
"ts-node": "^9.1.0",
"ts-node-test-register": "^9.0.0",
"typescript": "^4.1.2",
"vite": "^2.3.3"
"@textlint/markdown-to-ast": "^13.2.0",
"@types/mocha": "^10.0.1",
"@types/node": "^18.13.0",
"lint-staged": "^13.1.1",
"mocha": "^10.2.0",
"prettier": "^2.8.4",
"ts-node": "^10.9.1",
"tsconfig-to-dual-package": "^1.1.1",
"typescript": "^4.9.5",
"vite": "^4.1.1"
},
"packageManager": "yarn@1.22.19",
"email": "azuciao@gmail.com"
}

@@ -9,14 +9,2 @@ # sentence-splitter

**Requirements:**
- `Array.from`
- `Array#fill`
### CLI
$ npm install -g sentence-splitter
$ echo "This is a pen. But, this is not pen" | sentence-splitter
This is a pen.
But This is not pen
## Usage

@@ -43,300 +31,18 @@

*/
export declare function split(text: string, options?: splitOptions): (TxtParentNode | TxtNode)[];
export declare function split(text: string, options?: splitOptions): SentenceSplitterTxtNode[];
/**
* Convert Paragraph Node to Paragraph node that convert children to Sentence node
* This Node is based on TxtAST.
* Convert Paragraph Node to Sentence node.
* Paragraph Node is defined in textlint's TxtAST.
* See https://github.com/textlint/textlint/blob/master/docs/txtnode.md
*/
export declare function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): TxtParentNode;
export declare function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): SentenceSplitterTxtNode;
```
`TxtParentNode` and `TxtNode` is defined
in [TxtAST](https://github.com/textlint/textlint/blob/master/docs/txtnode.md "TxtAST").
See also [TxtAST](https://github.com/textlint/textlint/blob/master/docs/txtnode.md "TxtAST").
### Example
```js
import { split, Syntax } from "sentence-splitter";
- Online playground: <https://sentence-splitter.netlify.app/>
let sentences = split(`There it is! I found it.
Hello World. My name is Jonas.`);
console.log(JSON.stringify(sentences, null, 4));
/*
{
"type": "Paragraph",
"children": [
{
"type": "Sentence",
"raw": "There it is!",
"value": "There it is!",
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 1,
"column": 12
}
},
"range": [
0,
12
],
"children": [
{
"type": "Str",
"raw": "There it is",
"value": "There it is",
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 1,
"column": 11
}
},
"range": [
0,
11
]
},
{
"type": "Punctuation",
"raw": "!",
"value": "!",
"loc": {
"start": {
"line": 1,
"column": 11
},
"end": {
"line": 1,
"column": 12
}
},
"range": [
11,
12
]
}
]
},
{
"type": "WhiteSpace",
"raw": " ",
"value": " ",
"loc": {
"start": {
"line": 1,
"column": 12
},
"end": {
"line": 1,
"column": 13
}
},
"range": [
12,
13
]
},
{
"type": "Sentence",
"raw": "I found it.\nHello World.",
"value": "I found it.\nHello World.",
"loc": {
"start": {
"line": 1,
"column": 13
},
"end": {
"line": 2,
"column": 12
}
},
"range": [
13,
37
],
"children": [
{
"type": "Str",
"raw": "I found it.",
"value": "I found it.",
"loc": {
"start": {
"line": 1,
"column": 13
},
"end": {
"line": 1,
"column": 24
}
},
"range": [
13,
24
]
},
{
"type": "WhiteSpace",
"raw": "\n",
"value": "\n",
"loc": {
"start": {
"line": 1,
"column": 24
},
"end": {
"line": 2,
"column": 0
}
},
"range": [
24,
25
]
},
{
"type": "Str",
"raw": "Hello World",
"value": "Hello World",
"loc": {
"start": {
"line": 2,
"column": 0
},
"end": {
"line": 2,
"column": 11
}
},
"range": [
25,
36
]
},
{
"type": "Punctuation",
"raw": ".",
"value": ".",
"loc": {
"start": {
"line": 2,
"column": 11
},
"end": {
"line": 2,
"column": 12
}
},
"range": [
36,
37
]
}
]
},
{
"type": "WhiteSpace",
"raw": " ",
"value": " ",
"loc": {
"start": {
"line": 2,
"column": 12
},
"end": {
"line": 2,
"column": 13
}
},
"range": [
37,
38
]
},
{
"type": "Sentence",
"raw": "My name is Jonas.",
"value": "My name is Jonas.",
"loc": {
"start": {
"line": 2,
"column": 13
},
"end": {
"line": 2,
"column": 30
}
},
"range": [
38,
55
],
"children": [
{
"type": "Str",
"raw": "My name is Jonas",
"value": "My name is Jonas",
"loc": {
"start": {
"line": 2,
"column": 13
},
"end": {
"line": 2,
"column": 29
}
},
"range": [
38,
54
]
},
{
"type": "Punctuation",
"raw": ".",
"value": ".",
"loc": {
"start": {
"line": 2,
"column": 29
},
"end": {
"line": 2,
"column": 30
}
},
"range": [
54,
55
]
}
]
}
],
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 2,
"column": 30
}
},
"range": [
0,
55
],
"raw": "There it is! I found it.\nHello World. My name is Jonas."
}
*/
```
## Node

@@ -348,3 +54,3 @@

- `Str`: Str node has `value`
- `Str`: Str node has `value`. It is same as TxtAST's `Str` node.
- `Sentence`: Sentence Node has `Str`, `WhiteSpace`, or `Punctuation` nodes as children

@@ -354,8 +60,8 @@ - `WhiteSpace`: WhiteSpace Node has `\n`.

Get these `Syntax` constants value from the module:
Get these `SentenceSplitterSyntax` constants value from the module:
```js
import { Syntax } from "sentence-splitter";
import { SentenceSplitterSyntax } from "sentence-splitter";
console.log(Syntax.Sentence);// "Sentence"
console.log(SentenceSplitterSyntax.Sentence);// "Sentence"
```

@@ -366,17 +72,11 @@

```ts
export interface WhiteSpaceNode extends TxtTextNode {
export type TxtSentenceNode = Omit<TxtParentNode, "type"> & {
readonly type: "Sentence";
};
export type TxtWhiteSpaceNode = Omit<TxtTextNode, "type"> & {
readonly type: "WhiteSpace";
}
export interface PunctuationNode extends TxtTextNode {
};
export type TxtPunctuationNode = Omit<TxtTextNode, "type"> & {
readonly type: "Punctuation";
}
export interface StrNode extends TxtTextNode {
readonly type: "Str";
}
export interface SentenceNode extends TxtParentNode {
readonly type: "Sentence";
}
};
```

@@ -390,11 +90,16 @@

- Example: <https://sentence-splitter.netlify.app/#This%20is%201st%20sentence.%20This%20is%202nd%20sentence.>
> This is 1st sentence. This is 2nd sentence.
```
<WhiteSpace />
<Sentence>
<Str />
<Punctuation />
<Str />
<Punctuation />
<Str /> |This is 1st sentence|
<Punctuation /> |.|
</Sentence>
<WhiteSpace /> | |
<Sentence>
<Str /> |This is 2nd sentence|
<Punctuation /> |.|
</Sentence>
<WhiteSpace />
```

@@ -405,9 +110,9 @@

### in textlint rule
### For textlint rule
You can use `splitAST` in textlint rule.
`splitAST` function can preverse original AST's position unlike `split` function.
You can use `splitAST` for textlint rule.
`splitAST` function can preserve original AST's position unlike `split` function.
```ts
import { splitAST, Syntax as SentenceSyntax } from "sentence-splitter";
import { splitAST, SentenceSplitterSyntax } from "sentence-splitter";

@@ -418,4 +123,4 @@ export default function(context, options = {}) {

[Syntax.Paragraph](node) {
const resultNode = splitAST(node);
const sentenceNodes = resultNode.children.filter(childNode => childNode.type === SentenceSyntax.Sentence);
const parsedNode = splitAST(node);
const sentenceNodes = parsedNode.children.filter(childNode => childNode.type === SentenceSplitterSyntax.Sentence);
console.log(sentenceNodes); // => Sentence nodes

@@ -427,3 +132,3 @@ }

Example
Examples

@@ -434,3 +139,3 @@ - [textlint-ja/textlint-rule-max-ten: textlint rule that limit maxinum ten(、) count of sentence.](https://github.com/textlint-ja/textlint-rule-max-ten)

This library use ["Golden Rule" test](test/pragmatic_segmenter/test.ts) of `pragmatic_segmenter`.
This library use ["Golden Rule" test](test/pragmatic_segmenter/test.ts) of `pragmatic_segmenter` for testing.

@@ -437,0 +142,0 @@ - [diasks2/pragmatic_segmenter: Pragmatic Segmenter is a rule-based sentence boundary detection gem that works out-of-the-box across many languages.](https://github.com/diasks2/pragmatic_segmenter "diasks2/pragmatic_segmenter: Pragmatic Segmenter is a rule-based sentence boundary detection gem that works out-of-the-box across many languages.")

@@ -1,5 +0,6 @@

import { SourceCode } from "./parser/SourceCode";
import { SourceCode } from "./parser/SourceCode.js";
const isDebug = typeof process === "object" && process?.env?.DEBUG === "sentence-splitter";
export function seekLog(offset: number, current?: string | boolean) {
if (process.env.DEBUG !== "sentence-splitter") {
if (!isDebug) {
return;

@@ -11,3 +12,3 @@ }

export function nodeLog(message: string, sourceCode?: SourceCode) {
if (process.env.DEBUG !== "sentence-splitter") {
if (!isDebug) {
return;

@@ -28,14 +29,27 @@ }

const nodeValue = currentNode.raw.replace(/\n/g, "\\n");
console.log("sentence-splitter: " + sourceCode.offset + " " + message + " |" + currentChar + "| " + " ".repeat(RowLength - currentChar.length - message.length) + nodeValue);
console.log(
"sentence-splitter: " +
sourceCode.offset +
" " +
message +
" |" +
currentChar +
"| " +
" ".repeat(RowLength - currentChar.length - message.length) +
nodeValue
);
}
export function debugLog(...message: any[]) {
if (process.env.DEBUG !== "sentence-splitter") {
if (!isDebug) {
return;
}
console.log("sentence-splitter: ", ...message.map((m) => {
// make one line if it is multiline
return typeof m === "string" ? m.replace(/\n/g, "\\n") : m;
}));
console.log(
"sentence-splitter: ",
...message.map((m) => {
// make one line if it is multiline
return typeof m === "string" ? m.replace(/\n/g, "\\n") : m;
})
);
}

@@ -1,5 +0,5 @@

import { SourceCode } from "./SourceCode";
import { Language } from "./lang/LanguageInterface";
import { English } from "./lang/English";
import { AbstractMarker } from "./AbstractMarker";
import { SourceCode } from "./SourceCode.js";
import { Language } from "./lang/LanguageInterface.js";
import { English } from "./lang/English.js";
import { AbstractMarker } from "./AbstractMarker.js";

@@ -87,3 +87,3 @@ const isCapitalized = (text: string) => {

// Example: Yahoo!
const isMatchedEXCALAMATION_WORDS = this.lang.EXCALAMATION_WORDS.some(abbr => {
const isMatchedEXCALAMATION_WORDS = this.lang.EXCALAMATION_WORDS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);

@@ -96,3 +96,3 @@ });

// Example: Mr. Fuji
const isMatchedPREPOSITIVE_ABBREVIATIONS = this.lang.PREPOSITIVE_ABBREVIATIONS.some(abbr => {
const isMatchedPREPOSITIVE_ABBREVIATIONS = this.lang.PREPOSITIVE_ABBREVIATIONS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);

@@ -104,3 +104,3 @@ });

// ABBREVIATIONS
const isMatched = this.lang.ABBREVIATIONS.some(abbr => {
const isMatched = this.lang.ABBREVIATIONS.some((abbr) => {
return compareNoCaseSensitive(abbr, currentWord);

@@ -107,0 +107,0 @@ });

@@ -1,2 +0,2 @@

import { SourceCode } from "./SourceCode";
import { SourceCode } from "./SourceCode.js";

@@ -3,0 +3,0 @@ export abstract class AbstractMarker {

@@ -1,2 +0,2 @@

import { SourceCode } from "./SourceCode";
import { SourceCode } from "./SourceCode.js";

@@ -3,0 +3,0 @@ export abstract class AbstractParser {

@@ -1,5 +0,5 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { AbstractMarker } from "./AbstractMarker";
import { seekLog } from "../logger";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";
import { AbstractMarker } from "./AbstractMarker.js";
import { seekLog } from "../logger.js";

@@ -30,3 +30,3 @@ export interface AnyValueParserOptions {

}
return this.parsers.every(parser => !parser.test(sourceCode));
return this.parsers.every((parser) => !parser.test(sourceCode));
}

@@ -39,3 +39,3 @@

while (this.test(sourceCode)) {
this.markers.forEach(marker => marker.mark(sourceCode));
this.markers.forEach((marker) => marker.mark(sourceCode));
sourceCode.peek();

@@ -52,3 +52,3 @@ }

seekLog(sourceCode.offset, sourceCode.read());
this.markers.forEach(marker => marker.mark(sourceCode));
this.markers.forEach((marker) => marker.mark(sourceCode));
sourceCode.peek();

@@ -55,0 +55,0 @@ }

@@ -1,2 +0,2 @@

import { Language } from "./LanguageInterface";
import type { Language } from "./LanguageInterface.js";

@@ -3,0 +3,0 @@ export const English: Language = {

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";

@@ -4,0 +4,0 @@ /**

@@ -1,6 +0,4 @@

import { SourceCode } from "./SourceCode";
import { AbstractMarker } from "./AbstractMarker";
import { debugLog } from "../logger";
// @ts-ignore
import values from "object_values";
import { SourceCode } from "./SourceCode.js";
import { AbstractMarker } from "./AbstractMarker.js";
import { debugLog } from "../logger.js";

@@ -27,3 +25,3 @@ /**

private pairKeys = Object.keys(this.pairs);
private pairValues = values(this.pairs);
private pairValues = Object.values(this.pairs);

@@ -30,0 +28,0 @@ mark(sourceCode: SourceCode): void {

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";

@@ -4,0 +4,0 @@ export const DefaultOptions = {

import { TxtNode, TxtParentNode } from "@textlint/ast-node-types";
import { AbstractParser } from "./AbstractParser";
import StructureSource from "structured-source";
import { AbstractParser } from "./AbstractParser.js";
import { StructuredSource } from "structured-source";

@@ -18,3 +18,3 @@ export class SourceCode {

this.textCharacters = input.split("");
this.source = new StructureSource(input);
this.source = new StructuredSource(input);
this.startOffset = 0;

@@ -34,3 +34,3 @@ this.firstChildPadding = 0;

this.textCharacters = offset.concat(lineBreaks, input.raw.split(""));
this.source = new StructureSource(this.textCharacters.join(""));
this.source = new StructuredSource(this.textCharacters.join(""));
if (this.sourceNode.children[0]) {

@@ -53,3 +53,3 @@ // Header Node's children does not start with index 0

const offset = this.offset;
return this.contextRanges.some(range => {
return this.contextRanges.some((range) => {
return range[0] <= offset && offset < range[1];

@@ -67,3 +67,3 @@ });

}
return this.contexts.some(targetContext => targetContext === context);
return this.contexts.some((targetContext) => targetContext === context);
}

@@ -136,3 +136,3 @@

}
const matchNodeList = this.sourceNode.children.filter(node => {
const matchNodeList = this.sourceNode.children.filter((node) => {
// <p>[node]</p>

@@ -170,5 +170,3 @@ // ^

*/
seekNext(
parser: AbstractParser
): {
seekNext(parser: AbstractParser): {
value: string;

@@ -175,0 +173,0 @@ startPosition: {

@@ -1,3 +0,3 @@

import { SourceCode } from "./SourceCode";
import { AbstractParser } from "./AbstractParser";
import { SourceCode } from "./SourceCode.js";
import { AbstractParser } from "./AbstractParser.js";

@@ -4,0 +4,0 @@ /**

@@ -1,15 +0,15 @@

// LICENSE : MIT
"use strict";
import { TxtNode, TxtParentNode, ASTNodeTypes, TxtTextNode } from "@textlint/ast-node-types";
import { SourceCode } from "./parser/SourceCode";
import { AbstractParser } from "./parser/AbstractParser";
import { NewLineParser } from "./parser/NewLineParser";
import { SpaceParser } from "./parser/SpaceParser";
import { SeparatorParser, SeparatorParserOptions } from "./parser/SeparatorParser";
import { AnyValueParser } from "./parser/AnyValueParser";
import { AbbrMarker } from "./parser/AbbrMarker";
import { PairMaker } from "./parser/PairMaker";
import { nodeLog } from "./logger";
import type { AnyTxtNode, TxtNode, TxtParentNode, TxtStrNode, TxtTextNode } from "@textlint/ast-node-types";
import { ASTNodeTypes } from "@textlint/ast-node-types";
export const Syntax = {
import { SourceCode } from "./parser/SourceCode.js";
import { AbstractParser } from "./parser/AbstractParser.js";
import { NewLineParser } from "./parser/NewLineParser.js";
import { SpaceParser } from "./parser/SpaceParser.js";
import { SeparatorParser, SeparatorParserOptions } from "./parser/SeparatorParser.js";
import { AnyValueParser } from "./parser/AnyValueParser.js";
import { AbbrMarker } from "./parser/AbbrMarker.js";
import { PairMaker } from "./parser/PairMaker.js";
import { nodeLog } from "./logger.js";
export const SentenceSplitterSyntax = {
WhiteSpace: "WhiteSpace",

@@ -19,27 +19,24 @@ Punctuation: "Punctuation",

Str: "Str"
} as const;
export type TxtSentenceNode = Omit<TxtParentNode, "type"> & {
readonly type: "Sentence";
};
export interface ToTypeNode<T extends string> extends TxtTextNode {
readonly type: T;
}
export interface WhiteSpaceNode extends TxtTextNode {
export type TxtWhiteSpaceNode = Omit<TxtTextNode, "type"> & {
readonly type: "WhiteSpace";
}
};
export interface PunctuationNode extends TxtTextNode {
export type TxtPunctuationNode = Omit<TxtTextNode, "type"> & {
readonly type: "Punctuation";
}
};
export type SentenceSplitterTxtNode =
| TxtSentenceNode
| TxtWhiteSpaceNode
| TxtPunctuationNode
| TxtStrNode
| AnyTxtNode;
export type SentenceSplitterTxtNodeType = (typeof SentenceSplitterSyntax)[keyof typeof SentenceSplitterSyntax];
export interface StrNode extends TxtTextNode {
readonly type: "Str";
}
export interface SentenceNode extends TxtParentNode {
readonly type: "Sentence";
}
export class SplitParser {
private nodeList: TxtParentNode[] = [];
private results: (TxtParentNode | TxtNode)[] = [];
private nodeList: TxtSentenceNode[] = [];
private results: SentenceSplitterTxtNode[] = [];
public source: SourceCode;

@@ -51,7 +48,7 @@

get current(): TxtParentNode | undefined {
get current(): TxtSentenceNode | undefined {
return this.nodeList[this.nodeList.length - 1];
}
pushNodeToCurrent(node: TxtNode) {
pushNodeToCurrent(node: SentenceSplitterTxtNode) {
const current = this.current;

@@ -67,3 +64,3 @@ if (current) {

// open with ParentNode
open(parentNode: TxtParentNode) {
open(parentNode: TxtSentenceNode) {
this.nodeList.push(parentNode);

@@ -84,3 +81,3 @@ }

const { value, startPosition, endPosition } = this.source.seekNext(parser);
this.pushNodeToCurrent(createNode("WhiteSpace", value, startPosition, endPosition));
this.pushNodeToCurrent(createWhiteSpaceNode(value, startPosition, endPosition));
}

@@ -110,3 +107,6 @@

start: firstChildNode.loc.start,
end: nowToLoc(endNow)
end: {
line: endNow.line,
column: endNow.column
}
};

@@ -155,3 +155,3 @@ const rawValue = this.source.sliceRange(firstChildNode.range[0], endNow.offset);

*/
export function split(text: string, options?: splitOptions): (TxtParentNode | TxtNode)[] {
export function split(text: string, options?: splitOptions): SentenceSplitterTxtNode[] {
const { newLine, space, separator, anyValueParser } = createParsers(options);

@@ -177,7 +177,2 @@ const splitParser = new SplitParser(text);

}
export interface SentenceParentNode extends TxtNode {
children: Array<TxtNode | TxtTextNode | SentenceNode>;
}
/**

@@ -188,3 +183,3 @@ * Convert Paragraph Node to Paragraph node that convert children to Sentence node

*/
export function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): SentenceParentNode {
export function splitAST(paragraphNode: TxtParentNode, options?: splitOptions): SentenceSplitterTxtNode {
const { newLine, space, separator, anyValueParser } = createParsers(options);

@@ -246,3 +241,3 @@ const splitParser = new SplitParser(paragraphNode);

*/
export function createWhiteSpaceNode(
function createWhiteSpaceNode(
text: string,

@@ -260,6 +255,21 @@ startPosition: {

) {
return createNode("WhiteSpace", text, startPosition, endPosition);
return {
type: SentenceSplitterSyntax.WhiteSpace,
raw: text,
value: text,
loc: {
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},
range: [startPosition.offset, endPosition.offset]
};
}
export function createPunctuationNode(
function createPunctuationNode(
text: string,

@@ -276,37 +286,22 @@ startPosition: {

}
): PunctuationNode {
return createNode("Punctuation", text, startPosition, endPosition);
}
export function createTextNode(
text: string,
startPosition: {
line: number;
column: number;
offset: number;
},
endPosition: {
line: number;
column: number;
offset: number;
}
): StrNode {
return createNode("Str", text, startPosition, endPosition);
}
export function createEmptySentenceNode(): SentenceNode {
): TxtPunctuationNode {
return {
type: "Sentence",
raw: "",
type: SentenceSplitterSyntax.Punctuation,
raw: text,
value: text,
loc: {
start: { column: NaN, line: NaN },
end: { column: NaN, line: NaN }
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},
range: [NaN, NaN],
children: []
range: [startPosition.offset, endPosition.offset]
};
}
export function createNode<T extends string>(
type: T,
function createTextNode(
text: string,

@@ -323,10 +318,16 @@ startPosition: {

}
): ToTypeNode<T> {
): TxtStrNode {
return {
type: type,
type: SentenceSplitterSyntax.Str,
raw: text,
value: text,
loc: {
start: nowToLoc(startPosition),
end: nowToLoc(endPosition)
start: {
line: startPosition.line,
column: startPosition.column
},
end: {
line: endPosition.line,
column: endPosition.column
}
},

@@ -337,7 +338,13 @@ range: [startPosition.offset, endPosition.offset]

function nowToLoc(now: { line: number; column: number; offset: number }) {
function createEmptySentenceNode(): TxtSentenceNode {
return {
line: now.line,
column: now.column
type: SentenceSplitterSyntax.Sentence,
raw: "",
loc: {
start: { column: NaN, line: NaN },
end: { column: NaN, line: NaN }
} as const,
range: [NaN, NaN] as const,
children: []
};
}

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc