@ckirby/sbd - npm Package Compare versions

Comparing version 2.1.1 to 2.1.2

lib/Match.js

		@@ -172,19 +172,16 @@ var abbreviations;
		exports.isConcatenated = function(word) {
		var i = 0;
		let result = /[.!?](?=[A-Z])/.exec(word);

		if ((i = word.indexOf(".")) > -1 \|\|
		(i = word.indexOf("!")) > -1 \|\|
		(i = word.indexOf("?")) > -1)
		{
		var c = word.charAt(i + 1);
		// If a punctuation mark is found
		if (result) {
		// Get the index of the punctuation mark in the word
		let index = result.index;

		// Check if the next word starts with a letter
		if (c.match(/[a-zA-Z].*/)) {
		return [word.slice(0, i), word.slice(i+1)];
		}
		// Split the word at the punctuation mark and return the two parts
		return [word.slice(0, index), word.slice(index + 1)];
		}

		// If no valid punctuation mark followed by a letter is found, return false
		return false;
		};

		exports.isBoundaryChar = function(word) {
		@@ -191,0 +188,0 @@ return word === "." \|\|

lib/tokenizer.js

		@@ -32,3 +32,3 @@ /jshint node:true, laxcomma:true /
		var tokens;
		tokens = text.split(/([^-–—\s,;:]+\|\n+)/);
		tokens = text.split(/(\S+\|\n+)/);
		// every other token is a word
		@@ -35,0 +35,0 @@ words = tokens.filter(function (token, ii) {

package.json

		{
		"name": "@ckirby/sbd",
		"version": "2.1.1",
		"version": "2.1.2",
		"description": "Split text into sentences with Sentence Boundary Detection (SBD).",
		@@ -5,0 +5,0 @@ "main": "lib/tokenizer.js",

New alerts