Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

sentence-splitter

Package Overview
Dependencies
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sentence-splitter - npm Package Compare versions

Comparing version 1.1.1 to 1.2.0

17

lib/sentence-splitter.js

@@ -20,3 +20,3 @@ // LICENSE : MIT

charRegExp: /[\.。\?\!?!]/,
whiteSpaceRegExp: /\n/
newLineCharacters: "\n"
};

@@ -31,3 +31,3 @@ var Syntax = exports.Syntax = {

var matchChar = options.charRegExp || defaultOptions.charRegExp;
var whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp;
var newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters;
var src = new _structuredSource2.default(text);

@@ -50,5 +50,7 @@ var createNode = function createNode(type, start, end) {

var isSplitPoint = false;
var newLineCharactersLength = newLineCharacters.length;
for (; currentIndex < text.length; currentIndex++) {
var char = text[currentIndex];
if (whiteSpace.test(char)) {
var whiteTarget = text.slice(currentIndex, currentIndex + newLineCharactersLength);
if (whiteTarget === newLineCharacters) {
// (string)\n

@@ -58,6 +60,9 @@ if (startPoint !== currentIndex) {

}
// string(\n)
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1));
for (var i = 0; i < newLineCharactersLength; i++) {
// string(\n)
var startIndex = currentIndex + i;
results.push(createNode(Syntax.WhiteSpace, startIndex, startIndex + 1));
}
// string\n|
startPoint = currentIndex + 1;
startPoint = currentIndex + newLineCharactersLength;
isSplitPoint = false;

@@ -64,0 +69,0 @@ } else if (matchChar.test(char)) {

@@ -14,3 +14,3 @@ {

},
"version": "1.1.1",
"version": "1.2.0",
"description": "split {japanese, english} text into sentences.",

@@ -17,0 +17,0 @@ "main": "lib/sentence-splitter.js",

@@ -110,2 +110,12 @@ # sentence-splitter

### Options
- `charRegExp`
- default: `/[\.。\?\!?!]/`
- separator of sentences.
- `newLineCharacters`
- default: `"\n"`
- line break mark
- if you treat Markdown text, set `newLineCharacters: "\n\n"` to this option
### Node's type

@@ -118,3 +128,3 @@

### How to know real sentence?
### How to treat real sentence?

@@ -126,3 +136,2 @@ `sentence-splitter` split text into `Sentence` and `WhiteSpace`

Some markdown parser take cognizance 1 Sentence + 1 WhiteSpace + 1Sentence as 1 Sentence.
if you want to replicate this algorithm, then you should write this algorithm.

@@ -143,17 +152,2 @@ ```markdown

"raw": "TextA",
"value": "TextA",
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 1,
"column": 5
}
},
"range": [
0,
5
]
},

@@ -163,17 +157,2 @@ {

"raw": "\n",
"value": "\n",
"loc": {
"start": {
"line": 1,
"column": 5
},
"end": {
"line": 2,
"column": 0
}
},
"range": [
5,
6
]
},

@@ -183,17 +162,2 @@ {

"raw": "TextB",
"value": "TextB",
"loc": {
"start": {
"line": 2,
"column": 0
},
"end": {
"line": 2,
"column": 5
}
},
"range": [
6,
11
]
},

@@ -203,17 +167,2 @@ {

"raw": "\n",
"value": "\n",
"loc": {
"start": {
"line": 2,
"column": 5
},
"end": {
"line": 3,
"column": 0
}
},
"range": [
11,
12
]
},

@@ -223,17 +172,2 @@ {

"raw": "\n",
"value": "\n",
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 4,
"column": 0
}
},
"range": [
12,
13
]
},

@@ -243,17 +177,2 @@ {

"raw": "TextC",
"value": "TextC",
"loc": {
"start": {
"line": 4,
"column": 0
},
"end": {
"line": 4,
"column": 5
}
},
"range": [
13,
18
]
}

@@ -263,2 +182,40 @@ ]

If you want to treat `\n\n` as a separator of sentences, can use `newLineCharacters` options.
```js
let text = `TextA
TextB
TextC`;
let sentences = splitSentences(text, {
newLineCharacters: "\n\n" // `\n\n` as a separator
});
console.log(JSON.stringify(sentences, null, 4))
```
Output:
```json
[
{
"type": "Sentence",
"raw": "TextA\nTextB",
},
{
"type": "WhiteSpace",
"raw": "\n",
},
{
"type": "WhiteSpace",
"raw": "\n",
},
{
"type": "Sentence",
"raw": "TextC",
}
]
```
## Tests

@@ -265,0 +222,0 @@

@@ -6,3 +6,3 @@ // LICENSE : MIT

charRegExp: /[\.。\?\!?!]/,
whiteSpaceRegExp: /\n/
newLineCharacters: "\n"
};

@@ -15,3 +15,3 @@ export const Syntax = {

const matchChar = options.charRegExp || defaultOptions.charRegExp;
const whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp;
const newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters;
const src = new StructureSource(text);

@@ -34,5 +34,7 @@ let createNode = (type, start, end)=> {

let isSplitPoint = false;
const newLineCharactersLength = newLineCharacters.length;
for (; currentIndex < text.length; currentIndex++) {
let char = text[currentIndex];
if (whiteSpace.test(char)) {
let whiteTarget = text.slice(currentIndex, currentIndex + newLineCharactersLength);
if (whiteTarget === newLineCharacters) {
// (string)\n

@@ -42,6 +44,9 @@ if (startPoint !== currentIndex) {

}
// string(\n)
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1));
for (let i = 0; i < newLineCharactersLength; i++) {
// string(\n)
let startIndex = currentIndex + i;
results.push(createNode(Syntax.WhiteSpace, startIndex, startIndex + 1));
}
// string\n|
startPoint = currentIndex + 1;
startPoint = currentIndex + newLineCharactersLength;
isSplitPoint = false;

@@ -48,0 +53,0 @@ } else if (matchChar.test(char)) {

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc