Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@ckirby/sbd

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@ckirby/sbd - npm Package Compare versions

Comparing version 2.1.2 to 2.2.0

lib/tokenizer.d.ts

43

lib/Match.js

@@ -0,2 +1,5 @@

// @ts-check
/** @type {string[]} */
var abbreviations;
/** @type {string[]} */
var englishAbbreviations = [

@@ -64,2 +67,3 @@ "al",

/** @type {(abbr: string[]) => void} */
exports.setAbbreviations = function(abbr) {

@@ -73,2 +77,3 @@ if(abbr){

/** @type {(str: string) => boolean} */
exports.isCapitalized = function(str) {

@@ -78,3 +83,6 @@ return /^[A-Z][a-z].*/.test(str) || this.isNumber(str);

// Start with opening quotes or capitalized letter
/**
* Start with opening quotes or capitalized letter
* @type {(str: string) => boolean}
*/
exports.isSentenceStarter = function(str) {

@@ -84,20 +92,23 @@ return this.isCapitalized(str) || /``|"|'/.test(str.substring(0,2));

/** @type {(str: string) => boolean} */
exports.isCommonAbbreviation = function(str) {
str = str.replace(/\W+/g, '')
if (abbreviations.includes(str)) {
return true;
}
// if str is all-caps, try lower-case and sentence-case
if (!/[a-z]/.test(str)) {
str = str.toLowerCase()
if (abbreviations.includes(str)) {
// if the word has a / in it, consider whether the last part is an
// abbreviation to allow for things like "Mexico/U.S. border"
var last = str.replace(/[^\w/]+/g, '').split('/').pop() || '';
if (abbreviations.includes(last)) {
return true;
}
str = str.replace(/(\w)(.+)/, function (_, first, rest) {
return first.toUpperCase() + rest
})
if (abbreviations.includes(str)) {
return true;
}
}
// if last is all-caps, try lower-case and sentence-case
if (!/[a-z]/.test(last)) {
last = last.toLowerCase()
if (abbreviations.includes(last)) {
return true;
}
last = last.replace(/(\w)(.+)/, function (_, first, rest) {
return first.toUpperCase() + rest
})
if (abbreviations.includes(last)) {
return true;
}
}
return false;

@@ -104,0 +115,0 @@ }

@@ -6,3 +6,6 @@ /*jshint node:true, laxcomma:true */

// Split the entry into sentences.
/**
* Split the entry into sentences.
*/
exports.sentences = function(text, user_options) {

@@ -9,0 +12,0 @@ if (!text || typeof text !== "string" || !text.length) {

{
"name": "@ckirby/sbd",
"version": "2.1.2",
"version": "2.2.0",
"description": "Split text into sentences with Sentence Boundary Detection (SBD).",
"main": "lib/tokenizer.js",
"types": "lib/tokenizer.d.ts",
"files": [

@@ -15,3 +16,3 @@ "lib"

"type": "git",
"url": "https://github.com/chadkirby/sbd.git"
"url": "git+https://github.com/chadkirby/sbd.git"
},

@@ -18,0 +19,0 @@ "keywords": [

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc