Comparing version 0.0.2 to 0.0.3
@@ -0,0 +0,0 @@ exports.abbreviations = [ |
@@ -24,2 +24,3 @@ | ||
// Starting a new sentence if beginning with capital letter | ||
// Exception: The word is enclosed in brackets | ||
exports.is_concatenated = function(word) { | ||
@@ -31,3 +32,4 @@ var i = 0; | ||
if (c === c.toUpperCase()) { | ||
// Check if the next word starts with a letter | ||
if (c.match(/[a-zA-Z].*/)) { | ||
return [word.slice(0, i), word.slice(i+1)]; | ||
@@ -34,0 +36,0 @@ } |
@@ -36,3 +36,6 @@ /*jshint node:true, laxcomma:true */ | ||
if (Match.is_boundary_char(words[i]) || String.ends_with_char(words[i], "?!") || words[i] === newline_placeholder_t) { | ||
if (Match.is_boundary_char(words[i]) || | ||
String.ends_with_char(words[i], "?!") || | ||
words[i] === newline_placeholder_t) | ||
{ | ||
if (newline_boundary) { | ||
@@ -117,3 +120,5 @@ current.pop(); | ||
// Single words, could be "enumeration lists" | ||
if (sentences[i].length === 1 && sentences[i][0].length < 4 && sentences[i][0].indexOf('.') > -1) { | ||
if (sentences[i].length === 1 && sentences[i][0].length < 4 && | ||
sentences[i][0].indexOf('.') > -1) | ||
{ | ||
// Check if there is a next sentence | ||
@@ -131,2 +136,2 @@ // It should not be another list item | ||
return result; | ||
}; | ||
}; |
{ | ||
"name": "sbd", | ||
"version": "0.0.2", | ||
"version": "0.0.3", | ||
"description": "Split text into sentences", | ||
@@ -5,0 +5,0 @@ "main": "lib/tokenizer.js", |
@@ -0,0 +0,0 @@ # Sentence Boundary Detection (SBD) |
@@ -54,2 +54,11 @@ /*jshint node:true, laxcomma:true */ | ||
describe('Questionmark is skipped inside brackets', function () { | ||
var entry = "A sentence [example?] that should not (Though sometimes...) be two or more (but one!) sentences."; | ||
var sentences = tokenizer.sentences(entry); | ||
it("should get 1 sentence", function () { | ||
assert.equal(sentences.length, 1); | ||
}); | ||
}); | ||
describe('Skip abbreviations', function () { | ||
@@ -56,0 +65,0 @@ var entry = "In I.C.T we have multiple challenges! There should only be two sentences."; |
26722
1447