New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sbd

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sbd - npm Package Compare versions

Comparing version 1.0.6 to 1.0.8

101

dist/sbd.js
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tokenizer = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
var abbreviations = [
"ie",
var abbreviations;
var englishAbbreviations = [
"al",
"adj",
"assn",
"Ave",
"BSc", "MSc",
"Cell",
"Ch",
"Co",
"cc",
"Corp",
"Dem",
"Dept",
"ed",
"eg",
"Eq",
"Eqs",
"est",
"est",
"etc",
"Ex",
"ext", // + number?

@@ -11,47 +29,47 @@ "Fig",

"figs",
"et al",
"Co",
"Corp",
"Ave",
"i.e",
"ie",
"Inc",
"Ex",
"Viz",
"vs",
"Vs",
"repr",
"Rep",
"Dem",
"trans",
"Vol",
"inc",
"Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec",
"jr",
"mi",
"Miss", "Mrs", "Mr", "Ms",
"Mol",
"mt",
"mts",
"no",
"Nos",
"PhD", "MD", "BA", "MA", "MM",
"pl",
"pop",
"pp",
"rev",
"est",
"Prof", "Dr",
"pt",
"Ref",
"Refs",
"Eq",
"Eqs",
"Ch",
"Rep",
"repr",
"rev",
"Sec",
"Secs",
"mi",
"Dept",
"Sgt", "Col", "Gen", "Rep", "Sen",'Gov', "Lt", "Maj", "Capt","St",
"Sr", "sr", "Jr", "jr", "Rev",
"Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat",
"trans",
"Univ",
"Nos",
"No",
"Mol",
"Cell",
"Viz",
"Vol",
"vs",
"v",
];
"Miss", "Mrs", "Mr", "Ms",
"Prof", "Dr",
"Sgt", "Col", "Gen", "Rep", "Sen",'Gov', "Lt", "Maj", "Capt","St",
exports.setAbbreviations = function(abbr) {
if(abbr){
abbreviations = abbr;
} else {
abbreviations = englishAbbreviations;
}
}
"Sr", "Jr", "jr", "Rev",
"PhD", "MD", "BA", "MA", "MM",
"BSc", "MSc",
"Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec",
"Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat"
];
exports.isCapitalized = function(str) {

@@ -216,3 +234,4 @@ return /^[A-Z][a-z].*/.test(str) || this.isNumber(str);

"sanitize" : false,
"allowed_tags" : false
"allowed_tags" : false,
"abbreviations" : null
};

@@ -231,2 +250,4 @@

Match.setAbbreviations(options.abbreviations);
if (options.newline_boundaries) {

@@ -233,0 +254,0 @@ text = text.replace(/\n+|[-#=_+*]{4,}/g, newline_placeholder);

@@ -1,1 +0,1 @@

(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tokenizer=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){var abbreviations=["ie","eg","ext","Fig","fig","Figs","figs","et al","Co","Corp","Ave","Inc","Ex","Viz","vs","Vs","repr","Rep","Dem","trans","Vol","pp","rev","est","Ref","Refs","Eq","Eqs","Ch","Sec","Secs","mi","Dept","Univ","Nos","No","Mol","Cell","Miss","Mrs","Mr","Ms","Prof","Dr","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","Jr","jr","Rev","PhD","MD","BA","MA","MM","BSc","MSc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat"];exports.isCapitalized=function(str){return/^[A-Z][a-z].*/.test(str)||this.isNumber(str)};exports.isSentenceStarter=function(str){return this.isCapitalized(str)||/``|"|'/.test(str.substring(0,2))};exports.isCommonAbbreviation=function(str){return~abbreviations.indexOf(str.replace(/\W+/g,""))};exports.isTimeAbbreviation=function(word,next){if(word==="a.m."||word==="p.m."){var tmp=next.replace(/\W+/g,"").slice(-3).toLowerCase();if(tmp==="day"){return true}}return false};exports.isDottedAbbreviation=function(word){var matches=word.replace(/[\(\)\[\]\{\}]/g,"").match(/(.\.)*/);return matches&&matches[0].length>0};exports.isCustomAbbreviation=function(str){if(str.length<=3){return true}return this.isCapitalized(str)};exports.isNameAbbreviation=function(wordCount,words){if(words.length>0){if(wordCount<5&&words[0].length<6&&this.isCapitalized(words[0])){return true}var capitalized=words.filter(function(str){return/[A-Z]/.test(str.charAt(0))});return capitalized.length>=3}return false};exports.isNumber=function(str,dotPos){if(dotPos){str=str.slice(dotPos-1,dotPos+2)}return!isNaN(str)};exports.isPhoneNr=function(str){return str.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/)};exports.isURL=function(str){return str.match(/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/\/=]*)/)};exports.isConcatenated=function(word){var i=0;if((i=word.indexOf("."))>-1||(i=word.indexOf("!"))>-1||(i=word.indexOf("?"))>-1){var c=word.charAt(i+1);if(c.match(/[a-zA-Z].*/)){return[word.slice(0,i),word.slice(i+1)]}}return false};exports.isBoundaryChar=function(word){return word==="."||word==="!"||word==="?"}},{}],2:[function(require,module,exports){exports.endsWithChar=function ends_with_char(word,c){if(c.length>1){return c.indexOf(word.slice(-1))>-1}return word.slice(-1)===c};exports.endsWith=function ends_with(word,end){return word.slice(word.length-end.length)===end}},{}],3:[function(require,module,exports){module.exports=function sanitizeHtml(text,opts){if(typeof text=="string"||text instanceof String){var $div=document.createElement("DIV");$div.innerHTML=text;text=($div.textContent||"").trim()}else if(typeof text==="object"&&text.textContent){text=(text.textContent||"").trim()}return text}},{}],4:[function(require,module,exports){"use strict";var sanitizeHtml=require("sanitize-html");var String=require("./String");var Match=require("./Match");var newline_placeholder=" @~@ ";var newline_placeholder_t=newline_placeholder.trim();exports.sentences=function(text,user_options){if(!text||typeof text==="undefined"||text.length===0){return[]}var options={newline_boundaries:false,html_boundaries:false,sanitize:false,allowed_tags:false};if(typeof user_options==="boolean"){options.newline_boundaries=true}else{for(var k in user_options){options[k]=user_options[k]}}if(options.newline_boundaries){text=text.replace(/\n+|[-#=_+*]{4,}/g,newline_placeholder)}if(options.html_boundaries){text=text.replace(/(<br\s*\/?>|<\/[p|div|ul|ol]>)/g,"$1"+newline_placeholder)}if(options.sanitize||options.allowed_tags){if(!options.allowed_tags){options.allowed_tags=[""]}text=sanitizeHtml(text,{allowedTags:options.allowed_tags})}var words=text.match(/\S+/g);var wordCount=0;var index=0;var temp=[];var sentences=[];var current=[];for(var i=0,L=words.length;i<L;i++){wordCount++;current.push(words[i]);if(~words[i].indexOf(",")){wordCount=0}if(Match.isBoundaryChar(words[i])||String.endsWithChar(words[i],"?!")||words[i]===newline_placeholder_t){if((options.newline_boundaries||options.html_boundaries)&&words[i]===newline_placeholder_t){current.pop()}sentences.push(current);wordCount=0;current=[];continue}if(String.endsWithChar(words[i],'"')||String.endsWithChar(words[i],"”")){words[i]=words[i].slice(0,-1)}if(String.endsWithChar(words[i],".")){if(i+1<L){if(words[i].length===2&&isNaN(words[i].charAt(0))){continue}if(Match.isCommonAbbreviation(words[i])){continue}if(Match.isSentenceStarter(words[i+1])){if(Match.isTimeAbbreviation(words[i],words[i+1])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,6))){continue}if(Match.isNumber(words[i+1])){if(Match.isCustomAbbreviation(words[i])){continue}}}else{if(String.endsWith(words[i],"..")){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,5))){continue}}}sentences.push(current);current=[];wordCount=0;continue}if((index=words[i].indexOf("."))>-1){if(Match.isNumber(words[i],index)){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isURL(words[i])||Match.isPhoneNr(words[i])){continue}}if(temp=Match.isConcatenated(words[i])){current.pop();current.push(temp[0]);sentences.push(current);current=[];wordCount=0;current.push(temp[1])}}if(current.length){sentences.push(current)}var result=[];var sentence="";sentences=sentences.filter(function(s){return s.length>0});for(var i=0;i<sentences.length;i++){sentence=sentences[i].join(" ");if(sentences[i].length===1&&sentences[i][0].length<4&&sentences[i][0].indexOf(".")>-1){if(sentences[i+1]&&sentences[i+1][0].indexOf(".")<0){sentence+=" "+sentences[i+1].join(" ");i++}}result.push(sentence)}return result}},{"./Match":1,"./String":2,"sanitize-html":3}]},{},[4])(4)});
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tokenizer=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){var abbreviations;var englishAbbreviations=["al","adj","assn","Ave","BSc","MSc","Cell","Ch","Co","cc","Corp","Dem","Dept","ed","eg","Eq","Eqs","est","est","etc","Ex","ext","Fig","fig","Figs","figs","i.e","ie","Inc","inc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","jr","mi","Miss","Mrs","Mr","Ms","Mol","mt","mts","no","Nos","PhD","MD","BA","MA","MM","pl","pop","pp","Prof","Dr","pt","Ref","Refs","Rep","repr","rev","Sec","Secs","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","sr","Jr","jr","Rev","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat","trans","Univ","Viz","Vol","vs","v"];exports.setAbbreviations=function(abbr){if(abbr){abbreviations=abbr}else{abbreviations=englishAbbreviations}};exports.isCapitalized=function(str){return/^[A-Z][a-z].*/.test(str)||this.isNumber(str)};exports.isSentenceStarter=function(str){return this.isCapitalized(str)||/``|"|'/.test(str.substring(0,2))};exports.isCommonAbbreviation=function(str){return~abbreviations.indexOf(str.replace(/\W+/g,""))};exports.isTimeAbbreviation=function(word,next){if(word==="a.m."||word==="p.m."){var tmp=next.replace(/\W+/g,"").slice(-3).toLowerCase();if(tmp==="day"){return true}}return false};exports.isDottedAbbreviation=function(word){var matches=word.replace(/[\(\)\[\]\{\}]/g,"").match(/(.\.)*/);return matches&&matches[0].length>0};exports.isCustomAbbreviation=function(str){if(str.length<=3){return true}return this.isCapitalized(str)};exports.isNameAbbreviation=function(wordCount,words){if(words.length>0){if(wordCount<5&&words[0].length<6&&this.isCapitalized(words[0])){return true}var capitalized=words.filter(function(str){return/[A-Z]/.test(str.charAt(0))});return capitalized.length>=3}return false};exports.isNumber=function(str,dotPos){if(dotPos){str=str.slice(dotPos-1,dotPos+2)}return!isNaN(str)};exports.isPhoneNr=function(str){return str.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/)};exports.isURL=function(str){return str.match(/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/\/=]*)/)};exports.isConcatenated=function(word){var i=0;if((i=word.indexOf("."))>-1||(i=word.indexOf("!"))>-1||(i=word.indexOf("?"))>-1){var c=word.charAt(i+1);if(c.match(/[a-zA-Z].*/)){return[word.slice(0,i),word.slice(i+1)]}}return false};exports.isBoundaryChar=function(word){return word==="."||word==="!"||word==="?"}},{}],2:[function(require,module,exports){exports.endsWithChar=function ends_with_char(word,c){if(c.length>1){return c.indexOf(word.slice(-1))>-1}return word.slice(-1)===c};exports.endsWith=function ends_with(word,end){return word.slice(word.length-end.length)===end}},{}],3:[function(require,module,exports){module.exports=function sanitizeHtml(text,opts){if(typeof text=="string"||text instanceof String){var $div=document.createElement("DIV");$div.innerHTML=text;text=($div.textContent||"").trim()}else if(typeof text==="object"&&text.textContent){text=(text.textContent||"").trim()}return text}},{}],4:[function(require,module,exports){"use strict";var sanitizeHtml=require("sanitize-html");var String=require("./String");var Match=require("./Match");var newline_placeholder=" @~@ ";var newline_placeholder_t=newline_placeholder.trim();exports.sentences=function(text,user_options){if(!text||typeof text==="undefined"||text.length===0){return[]}var options={newline_boundaries:false,html_boundaries:false,sanitize:false,allowed_tags:false,abbreviations:null};if(typeof user_options==="boolean"){options.newline_boundaries=true}else{for(var k in user_options){options[k]=user_options[k]}}Match.setAbbreviations(options.abbreviations);if(options.newline_boundaries){text=text.replace(/\n+|[-#=_+*]{4,}/g,newline_placeholder)}if(options.html_boundaries){text=text.replace(/(<br\s*\/?>|<\/[p|div|ul|ol]>)/g,"$1"+newline_placeholder)}if(options.sanitize||options.allowed_tags){if(!options.allowed_tags){options.allowed_tags=[""]}text=sanitizeHtml(text,{allowedTags:options.allowed_tags})}var words=text.match(/\S+/g);var wordCount=0;var index=0;var temp=[];var sentences=[];var current=[];for(var i=0,L=words.length;i<L;i++){wordCount++;current.push(words[i]);if(~words[i].indexOf(",")){wordCount=0}if(Match.isBoundaryChar(words[i])||String.endsWithChar(words[i],"?!")||words[i]===newline_placeholder_t){if((options.newline_boundaries||options.html_boundaries)&&words[i]===newline_placeholder_t){current.pop()}sentences.push(current);wordCount=0;current=[];continue}if(String.endsWithChar(words[i],'"')||String.endsWithChar(words[i],"”")){words[i]=words[i].slice(0,-1)}if(String.endsWithChar(words[i],".")){if(i+1<L){if(words[i].length===2&&isNaN(words[i].charAt(0))){continue}if(Match.isCommonAbbreviation(words[i])){continue}if(Match.isSentenceStarter(words[i+1])){if(Match.isTimeAbbreviation(words[i],words[i+1])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,6))){continue}if(Match.isNumber(words[i+1])){if(Match.isCustomAbbreviation(words[i])){continue}}}else{if(String.endsWith(words[i],"..")){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,5))){continue}}}sentences.push(current);current=[];wordCount=0;continue}if((index=words[i].indexOf("."))>-1){if(Match.isNumber(words[i],index)){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isURL(words[i])||Match.isPhoneNr(words[i])){continue}}if(temp=Match.isConcatenated(words[i])){current.pop();current.push(temp[0]);sentences.push(current);current=[];wordCount=0;current.push(temp[1])}}if(current.length){sentences.push(current)}var result=[];var sentence="";sentences=sentences.filter(function(s){return s.length>0});for(var i=0;i<sentences.length;i++){sentence=sentences[i].join(" ");if(sentences[i].length===1&&sentences[i][0].length<4&&sentences[i][0].indexOf(".")>-1){if(sentences[i+1]&&sentences[i+1][0].indexOf(".")<0){sentence+=" "+sentences[i+1].join(" ");i++}}result.push(sentence)}return result}},{"./Match":1,"./String":2,"sanitize-html":3}]},{},[4])(4)});

@@ -1,5 +0,23 @@

var abbreviations = [
"ie",
var abbreviations;
var englishAbbreviations = [
"al",
"adj",
"assn",
"Ave",
"BSc", "MSc",
"Cell",
"Ch",
"Co",
"cc",
"Corp",
"Dem",
"Dept",
"ed",
"eg",
"Eq",
"Eqs",
"est",
"est",
"etc",
"Ex",
"ext", // + number?

@@ -10,47 +28,47 @@ "Fig",

"figs",
"et al",
"Co",
"Corp",
"Ave",
"i.e",
"ie",
"Inc",
"Ex",
"Viz",
"vs",
"Vs",
"repr",
"Rep",
"Dem",
"trans",
"Vol",
"inc",
"Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec",
"jr",
"mi",
"Miss", "Mrs", "Mr", "Ms",
"Mol",
"mt",
"mts",
"no",
"Nos",
"PhD", "MD", "BA", "MA", "MM",
"pl",
"pop",
"pp",
"rev",
"est",
"Prof", "Dr",
"pt",
"Ref",
"Refs",
"Eq",
"Eqs",
"Ch",
"Rep",
"repr",
"rev",
"Sec",
"Secs",
"mi",
"Dept",
"Sgt", "Col", "Gen", "Rep", "Sen",'Gov', "Lt", "Maj", "Capt","St",
"Sr", "sr", "Jr", "jr", "Rev",
"Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat",
"trans",
"Univ",
"Nos",
"No",
"Mol",
"Cell",
"Viz",
"Vol",
"vs",
"v",
];
"Miss", "Mrs", "Mr", "Ms",
"Prof", "Dr",
"Sgt", "Col", "Gen", "Rep", "Sen",'Gov', "Lt", "Maj", "Capt","St",
exports.setAbbreviations = function(abbr) {
if(abbr){
abbreviations = abbr;
} else {
abbreviations = englishAbbreviations;
}
}
"Sr", "Jr", "jr", "Rev",
"PhD", "MD", "BA", "MA", "MM",
"BSc", "MSc",
"Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec",
"Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat"
];
exports.isCapitalized = function(str) {

@@ -57,0 +75,0 @@ return /^[A-Z][a-z].*/.test(str) || this.isNumber(str);

@@ -23,3 +23,4 @@ /*jshint node:true, laxcomma:true */

"sanitize" : false,
"allowed_tags" : false
"allowed_tags" : false,
"abbreviations" : null
};

@@ -38,2 +39,4 @@

Match.setAbbreviations(options.abbreviations);
if (options.newline_boundaries) {

@@ -40,0 +43,0 @@ text = text.replace(/\n+|[-#=_+*]{4,}/g, newline_placeholder);

{
"name": "sbd",
"version": "1.0.6",
"version": "1.0.8",
"description": "Split text into sentences with Sentence Boundary Detection (SBD).",

@@ -28,3 +28,3 @@ "main": "lib/tokenizer.js",

"devDependencies": {
"mocha": "1.7.x"
"mocha": "3.0.x"
},

@@ -31,0 +31,0 @@ "dependencies": {

@@ -41,3 +41,4 @@ Sentence Boundary Detection (SBD)

"sanitize" : false,
"allowed_tags" : false
"allowed_tags" : false,
"abbreviations" : null
};

@@ -50,2 +51,3 @@ ```

* `allowed_tags`: To sanitize html, the library [santize-html](https://github.com/punkave/sanitize-html) is used. You can pass the allowed tags option.
* `abbreviations`: list of abbreviations to override the original ones for use with other languages. Don't put dots in abbreviations.

@@ -59,1 +61,10 @@

If you feel something is missing, you can open an issue stating the problem sentence and desired result. If code is unclear give me a @mention. Pull requests are welcome.
## Building the (minified) scripts
```
npm install -g browserify
npm run-script build
```

@@ -45,2 +45,42 @@ /*jshint node:true, laxcomma:true */

});
});
describe('Skip two worded abbreviations', function () {
var entry = "Claims 1–6 and 15–26 are rejected under pre-AIA 35 USC § 103(a) as being unpatentable over Chalana et al. (US 2012/0179503) in view of Oh (US 2013/0013993).";
var sentences = tokenizer.sentences(entry);
it("should get 1 sentence", function () {
assert.equal(sentences.length, 1);
});
});
describe('Skip two worded abbreviations', function () {
var entry = "Et al. is an abbreviation of the Latin loanphrase et alii, meaning and others. It is similar to etc. (short for et cetera, meaning and the rest), but whereas etc. applies to things, et al. applies to people.";
var sentences = tokenizer.sentences(entry);
console.log(sentences)
it("should get 2 sentences", function () {
assert.equal(sentences.length, 2);
});
});
describe('Use other languages', function () {
var entry = "Trzeba tu coś napisać, np. fragment odnoszący się do pkt. 3 wcześniejszego tekstu.";
var sentencesEN = tokenizer.sentences(entry);
var sentencesPL = tokenizer.sentences(entry,{abbreviations:["np","pkt"]});
it("should get 1 sentence", function () {
console.log("#",sentencesEN, sentencesPL, sentencesPL.length)
assert.equal(sentencesEN.length, 3);
assert.equal(sentencesPL.length, 1);
});
it("should not permanently override abbreviations", function() {
var sentences = tokenizer.sentences(entry);
assert.equal(sentences.length, 3);
})
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc