sentence-splitter
Advanced tools
Comparing version 1.1.0 to 1.1.1
@@ -19,5 +19,5 @@ // LICENSE : MIT | ||
var defaultOptions = { | ||
charRegExp: /[\.。\?\!?!]/ | ||
charRegExp: /[\.。\?\!?!]/, | ||
whiteSpaceRegExp: /\n/ | ||
}; | ||
var whiteSpace = /\n/; | ||
var Syntax = exports.Syntax = { | ||
@@ -28,4 +28,6 @@ "WhiteSpace": "WhiteSpace", | ||
function splitSentences(text) { | ||
var options = arguments.length <= 1 || arguments[1] === undefined ? defaultOptions : arguments[1]; | ||
var options = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1]; | ||
var matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
var whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp; | ||
var src = new _structuredSource2.default(text); | ||
@@ -48,3 +50,2 @@ var createNode = function createNode(type, start, end) { | ||
var isSplitPoint = false; | ||
var matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
for (; currentIndex < text.length; currentIndex++) { | ||
@@ -51,0 +52,0 @@ var char = text[currentIndex]; |
@@ -14,3 +14,3 @@ { | ||
}, | ||
"version": "1.1.0", | ||
"version": "1.1.1", | ||
"description": "split {japanese, english} text into sentences.", | ||
@@ -17,0 +17,0 @@ "main": "lib/sentence-splitter.js", |
143
README.md
@@ -22,2 +22,3 @@ # sentence-splitter | ||
"raw": "text.", | ||
"value": "text.", | ||
"loc": { | ||
@@ -41,2 +42,3 @@ "start": { | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
@@ -60,2 +62,3 @@ "start": { | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
@@ -79,2 +82,3 @@ "start": { | ||
"raw": "text", | ||
"value": "text", | ||
"loc": { | ||
@@ -115,2 +119,141 @@ "start": { | ||
## FAQ | ||
### How to know real sentence? | ||
`sentence-splitter` split text into `Sentence` and `WhiteSpace` | ||
`sentence-splitter` following text to **3** Sentence and **3** WhiteSpace. | ||
Some markdown parser take cognizance 1 Sentence + 1 WhiteSpace + 1Sentence as 1 Sentence. | ||
if you want to replicate this algorithm, then you should write this algorithm. | ||
```markdown | ||
TextA | ||
TextB | ||
TextC | ||
``` | ||
Output: | ||
```json | ||
[ | ||
{ | ||
"type": "Sentence", | ||
"raw": "TextA", | ||
"value": "TextA", | ||
"loc": { | ||
"start": { | ||
"line": 1, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 1, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
0, | ||
5 | ||
] | ||
}, | ||
{ | ||
"type": "WhiteSpace", | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 1, | ||
"column": 5 | ||
}, | ||
"end": { | ||
"line": 2, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
5, | ||
6 | ||
] | ||
}, | ||
{ | ||
"type": "Sentence", | ||
"raw": "TextB", | ||
"value": "TextB", | ||
"loc": { | ||
"start": { | ||
"line": 2, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 2, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
6, | ||
11 | ||
] | ||
}, | ||
{ | ||
"type": "WhiteSpace", | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 2, | ||
"column": 5 | ||
}, | ||
"end": { | ||
"line": 3, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
11, | ||
12 | ||
] | ||
}, | ||
{ | ||
"type": "WhiteSpace", | ||
"raw": "\n", | ||
"value": "\n", | ||
"loc": { | ||
"start": { | ||
"line": 3, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 4, | ||
"column": 0 | ||
} | ||
}, | ||
"range": [ | ||
12, | ||
13 | ||
] | ||
}, | ||
{ | ||
"type": "Sentence", | ||
"raw": "TextC", | ||
"value": "TextC", | ||
"loc": { | ||
"start": { | ||
"line": 4, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"line": 4, | ||
"column": 5 | ||
} | ||
}, | ||
"range": [ | ||
13, | ||
18 | ||
] | ||
} | ||
] | ||
``` | ||
## Tests | ||
@@ -117,0 +260,0 @@ |
@@ -5,5 +5,5 @@ // LICENSE : MIT | ||
const defaultOptions = { | ||
charRegExp: /[\.。\?\!?!]/ | ||
charRegExp: /[\.。\?\!?!]/, | ||
whiteSpaceRegExp: /\n/ | ||
}; | ||
const whiteSpace = /\n/; | ||
export const Syntax = { | ||
@@ -13,3 +13,5 @@ "WhiteSpace": "WhiteSpace", | ||
}; | ||
export default function splitSentences(text, options = defaultOptions) { | ||
export default function splitSentences(text, options = {}) { | ||
const matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
const whiteSpace = options.whiteSpaceRegExp || defaultOptions.whiteSpaceRegExp; | ||
const src = new StructureSource(text); | ||
@@ -32,3 +34,2 @@ let createNode = (type, start, end)=> { | ||
let isSplitPoint = false; | ||
let matchChar = options.charRegExp || defaultOptions.charRegExp; | ||
for (; currentIndex < text.length; currentIndex++) { | ||
@@ -35,0 +36,0 @@ let char = text[currentIndex]; |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
18026
167
268