sentence-splitter
Advanced tools
Comparing version 1.1.1 to 1.2.0
@@ -20,3 +20,3 @@ // LICENSE : MIT | ||
charRegExp: /[\.。\?\!?!]/, | ||
whiteSpaceRegExp: /\n/ | ||
newLineCharacters: "\n" | ||
}; | ||
@@ -31,3 +31,3 @@ var Syntax = exports.Syntax = { | ||
var matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
var whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp; | ||
var newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters; | ||
var src = new _structuredSource2.default(text); | ||
@@ -50,5 +50,7 @@ var createNode = function createNode(type, start, end) { | ||
var isSplitPoint = false; | ||
var newLineCharactersLength = newLineCharacters.length; | ||
for (; currentIndex < text.length; currentIndex++) { | ||
var char = text[currentIndex]; | ||
if (whiteSpace.test(char)) { | ||
var whiteTarget = text.slice(currentIndex, currentIndex + newLineCharactersLength); | ||
if (whiteTarget === newLineCharacters) { | ||
// (string)\n | ||
@@ -58,6 +60,9 @@ if (startPoint !== currentIndex) { | ||
} | ||
// string(\n) | ||
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1)); | ||
for (var i = 0; i < newLineCharactersLength; i++) { | ||
// string(\n) | ||
var startIndex = currentIndex + i; | ||
results.push(createNode(Syntax.WhiteSpace, startIndex, startIndex + 1)); | ||
} | ||
// string\n| | ||
startPoint = currentIndex + 1; | ||
startPoint = currentIndex + newLineCharactersLength; | ||
isSplitPoint = false; | ||
@@ -64,0 +69,0 @@ } else if (matchChar.test(char)) { |
@@ -14,3 +14,3 @@ { | ||
}, | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"description": "split {japanese, english} text into sentences.", | ||
@@ -17,0 +17,0 @@ "main": "lib/sentence-splitter.js", |
141
README.md
@@ -110,2 +110,12 @@ # sentence-splitter | ||
### Options | ||
- `charRegExp` | ||
- default: `/[\.。\?\!?!]/` | ||
- separator of sentences. | ||
- `newLineCharacters` | ||
- default: `"\n"` | ||
- line break mark | ||
- if you treat Markdown text, set `newLineCharacters: "\n\n"` to this option | ||
### Node's type | ||
@@ -118,3 +128,3 @@ | ||
### How to know real sentence? | ||
### How to treat real sentence? | ||
@@ -126,3 +136,2 @@ `sentence-splitter` split text into `Sentence` and `WhiteSpace` | ||
Some markdown parser take cognizance 1 Sentence + 1 WhiteSpace + 1Sentence as 1 Sentence. | ||
if you want to replicate this algorithm, then you should write this algorithm. | ||
@@ -143,17 +152,2 @@ ```markdown | ||
"raw": "TextA", | ||
"value": "TextA", | ||
"loc": { | ||
"start": { | ||
"line": 1, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 1, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
0, | ||
5 | ||
] | ||
}, | ||
@@ -163,17 +157,2 @@ { | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 1, | ||
"column": 5 | ||
}, | ||
"end": { | ||
"line": 2, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
5, | ||
6 | ||
] | ||
}, | ||
@@ -183,17 +162,2 @@ { | ||
"raw": "TextB", | ||
"value": "TextB", | ||
"loc": { | ||
"start": { | ||
"line": 2, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 2, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
6, | ||
11 | ||
] | ||
}, | ||
@@ -203,17 +167,2 @@ { | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 2, | ||
"column": 5 | ||
}, | ||
"end": { | ||
"line": 3, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
11, | ||
12 | ||
] | ||
}, | ||
@@ -223,17 +172,2 @@ { | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 3, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 4, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
12, | ||
13 | ||
] | ||
}, | ||
@@ -243,17 +177,2 @@ { | ||
"raw": "TextC", | ||
"value": "TextC", | ||
"loc": { | ||
"start": { | ||
"line": 4, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 4, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
13, | ||
18 | ||
] | ||
} | ||
@@ -263,2 +182,40 @@ ] | ||
If you want to treat `\n\n` as a separator of sentences, can use `newLineCharacters` options. | ||
```js | ||
let text = `TextA | ||
TextB | ||
TextC`; | ||
let sentences = splitSentences(text, { | ||
newLineCharacters: "\n\n" // `\n\n` as a separator | ||
}); | ||
console.log(JSON.stringify(sentences, null, 4)) | ||
``` | ||
Output: | ||
```json | ||
[ | ||
{ | ||
"type": "Sentence", | ||
"raw": "TextA\nTextB", | ||
}, | ||
{ | ||
"type": "WhiteSpace", | ||
"raw": "\n", | ||
}, | ||
{ | ||
"type": "WhiteSpace", | ||
"raw": "\n", | ||
}, | ||
{ | ||
"type": "Sentence", | ||
"raw": "TextC", | ||
} | ||
] | ||
``` | ||
## Tests | ||
@@ -265,0 +222,0 @@ |
@@ -6,3 +6,3 @@ // LICENSE : MIT | ||
charRegExp: /[\.。\?\!?!]/, | ||
whiteSpaceRegExp: /\n/ | ||
newLineCharacters: "\n" | ||
}; | ||
@@ -15,3 +15,3 @@ export const Syntax = { | ||
const matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
const whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp; | ||
const newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters; | ||
const src = new StructureSource(text); | ||
@@ -34,5 +34,7 @@ let createNode = (type, start, end)=> { | ||
let isSplitPoint = false; | ||
const newLineCharactersLength = newLineCharacters.length; | ||
for (; currentIndex < text.length; currentIndex++) { | ||
let char = text[currentIndex]; | ||
if (whiteSpace.test(char)) { | ||
let whiteTarget = text.slice(currentIndex, currentIndex + newLineCharactersLength); | ||
if (whiteTarget === newLineCharacters) { | ||
// (string)\n | ||
@@ -42,6 +44,9 @@ if (startPoint !== currentIndex) { | ||
} | ||
// string(\n) | ||
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1)); | ||
for (let i = 0; i < newLineCharactersLength; i++) { | ||
// string(\n) | ||
let startIndex = currentIndex + i; | ||
results.push(createNode(Syntax.WhiteSpace, startIndex, startIndex + 1)); | ||
} | ||
// string\n| | ||
startPoint = currentIndex + 1; | ||
startPoint = currentIndex + newLineCharactersLength; | ||
isSplitPoint = false; | ||
@@ -48,0 +53,0 @@ } else if (matchChar.test(char)) { |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
18245
177
225