@ckirby/sbd
Advanced tools
Comparing version 2.1.1 to 2.1.2
@@ -172,19 +172,16 @@ var abbreviations; | ||
exports.isConcatenated = function(word) { | ||
var i = 0; | ||
let result = /[.!?](?=[A-Z])/.exec(word); | ||
if ((i = word.indexOf(".")) > -1 || | ||
(i = word.indexOf("!")) > -1 || | ||
(i = word.indexOf("?")) > -1) | ||
{ | ||
var c = word.charAt(i + 1); | ||
// If a punctuation mark is found | ||
if (result) { | ||
// Get the index of the punctuation mark in the word | ||
let index = result.index; | ||
// Check if the next word starts with a letter | ||
if (c.match(/[a-zA-Z].*/)) { | ||
return [word.slice(0, i), word.slice(i+1)]; | ||
} | ||
// Split the word at the punctuation mark and return the two parts | ||
return [word.slice(0, index), word.slice(index + 1)]; | ||
} | ||
// If no valid punctuation mark followed by a letter is found, return false | ||
return false; | ||
}; | ||
exports.isBoundaryChar = function(word) { | ||
@@ -191,0 +188,0 @@ return word === "." || |
@@ -32,3 +32,3 @@ /*jshint node:true, laxcomma:true */ | ||
var tokens; | ||
tokens = text.split(/([^-–—\s,;:]+|\n+)/); | ||
tokens = text.split(/(\S+|\n+)/); | ||
// every other token is a word | ||
@@ -35,0 +35,0 @@ words = tokens.filter(function (token, ii) { |
{ | ||
"name": "@ckirby/sbd", | ||
"version": "2.1.1", | ||
"version": "2.1.2", | ||
"description": "Split text into sentences with Sentence Boundary Detection (SBD).", | ||
@@ -5,0 +5,0 @@ "main": "lib/tokenizer.js", |
AI-detected possible typosquat
Supply chain riskAI has identified this package as a potential typosquat of a more popular package. This suggests that the package may be intentionally mimicking another package's name, description, or other metadata.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
14027
335