@ckirby/sbd
Advanced tools
Comparing version 2.1.2 to 2.2.0
@@ -0,2 +1,5 @@ | ||
// @ts-check | ||
/** @type {string[]} */ | ||
var abbreviations; | ||
/** @type {string[]} */ | ||
var englishAbbreviations = [ | ||
@@ -64,2 +67,3 @@ "al", | ||
/** @type {(abbr: string[]) => void} */ | ||
exports.setAbbreviations = function(abbr) { | ||
@@ -73,2 +77,3 @@ if(abbr){ | ||
/** @type {(str: string) => boolean} */ | ||
exports.isCapitalized = function(str) { | ||
@@ -78,3 +83,6 @@ return /^[A-Z][a-z].*/.test(str) || this.isNumber(str); | ||
// Start with opening quotes or capitalized letter | ||
/** | ||
* Start with opening quotes or capitalized letter | ||
* @type {(str: string) => boolean} | ||
*/ | ||
exports.isSentenceStarter = function(str) { | ||
@@ -84,20 +92,23 @@ return this.isCapitalized(str) || /``|"|'/.test(str.substring(0,2)); | ||
/** @type {(str: string) => boolean} */ | ||
exports.isCommonAbbreviation = function(str) { | ||
str = str.replace(/\W+/g, '') | ||
if (abbreviations.includes(str)) { | ||
return true; | ||
} | ||
// if str is all-caps, try lower-case and sentence-case | ||
if (!/[a-z]/.test(str)) { | ||
str = str.toLowerCase() | ||
if (abbreviations.includes(str)) { | ||
// if the word has a / in it, consider whether the last part is an | ||
// abbreviation to allow for things like "Mexico/U.S. border" | ||
var last = str.replace(/[^\w/]+/g, '').split('/').pop() || ''; | ||
if (abbreviations.includes(last)) { | ||
return true; | ||
} | ||
str = str.replace(/(\w)(.+)/, function (_, first, rest) { | ||
return first.toUpperCase() + rest | ||
}) | ||
if (abbreviations.includes(str)) { | ||
return true; | ||
} | ||
} | ||
// if last is all-caps, try lower-case and sentence-case | ||
if (!/[a-z]/.test(last)) { | ||
last = last.toLowerCase() | ||
if (abbreviations.includes(last)) { | ||
return true; | ||
} | ||
last = last.replace(/(\w)(.+)/, function (_, first, rest) { | ||
return first.toUpperCase() + rest | ||
}) | ||
if (abbreviations.includes(last)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
@@ -104,0 +115,0 @@ } |
@@ -6,3 +6,6 @@ /*jshint node:true, laxcomma:true */ | ||
// Split the entry into sentences. | ||
/** | ||
* Split the entry into sentences. | ||
*/ | ||
exports.sentences = function(text, user_options) { | ||
@@ -9,0 +12,0 @@ if (!text || typeof text !== "string" || !text.length) { |
{ | ||
"name": "@ckirby/sbd", | ||
"version": "2.1.2", | ||
"version": "2.2.0", | ||
"description": "Split text into sentences with Sentence Boundary Detection (SBD).", | ||
"main": "lib/tokenizer.js", | ||
"types": "lib/tokenizer.d.ts", | ||
"files": [ | ||
@@ -15,3 +16,3 @@ "lib" | ||
"type": "git", | ||
"url": "https://github.com/chadkirby/sbd.git" | ||
"url": "git+https://github.com/chadkirby/sbd.git" | ||
}, | ||
@@ -18,0 +19,0 @@ "keywords": [ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
14942
6
364