New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sbd

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sbd - npm Package Compare versions

Comparing version 1.0.4 to 1.0.5

58

dist/sbd.js

@@ -1,2 +0,2 @@

!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.tokenizer=e()}}(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tokenizer = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){

@@ -203,29 +203,51 @@ var abbreviations = [

// Split the entry into sentences.
exports.sentences = function(text, newline_boundary) {
if (text.length === 0)
exports.sentences = function(text, user_options) {
if (!text || typeof text === "undefined" || text.length === 0) {
return [];
}
text = sanitizeHtml(text, { "allowedTags" : [''] });
var options = {
"newline_boundaries" : false,
"html_boundaries" : false,
"sanitize" : false,
"allowed_tags" : false
};
/** Preprocessing */
if (typeof newline_boundary === 'undefined') {
newline_boundary = false;
if (typeof user_options === "boolean") {
// Deprecated quick option
options.newline_boundaries = true;
}
else {
// Extend options
for (var k in user_options) {
options[k] = user_options[k];
}
}
if (newline_boundary) {
if (options.newline_boundaries) {
text = text.replace(/\n+|[-#=_+*]{4,}/g, newline_placeholder);
}
var index = 0;
var temp = [];
if (options.html_boundaries) {
text = text.replace(/(<br \/>)/g, "$1" + newline_placeholder);
}
if (options.sanitize || options.allowed_tags) {
if (! options.allowed_tags) {
options.allowed_tags = [""];
}
text = sanitizeHtml(text, { "allowedTags" : options.allowed_tags });
}
// Split the text into words
var words = text.match(/\S+/g); // see http://blog.tompawlak.org/split-string-into-tokens-javascript
var wordCount = 0;
var index = 0;
var temp = [];
var sentences = [];
var current = [];
var wordCount = 0;
for (var i=0, L=words.length; i < L; i++) {

@@ -237,3 +259,3 @@ wordCount++;

// Sub-sentences (Bijzin?), reset counter
// Sub-sentences, reset counter
if (~words[i].indexOf(',')) {

@@ -247,3 +269,3 @@ wordCount = 0;

{
if (newline_boundary && words[i] === newline_placeholder_t) {
if ((options.newline_boundaries || options.html_boundaries) && words[i] === newline_placeholder_t) {
current.pop();

@@ -260,2 +282,8 @@ }

if (String.endsWithChar(words[i], "\"") || String.endsWithChar(words[i], "”")) {
// endQuote = words[i].slice(-1);
words[i] = words[i].slice(0, -1);
}
// A dot might indicate the end sentences

@@ -262,0 +290,0 @@ // Exception: The next sentence starts with a word (non abbreviation)

@@ -1,1 +0,1 @@

!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.tokenizer=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){var abbreviations=["ie","eg","ext","Fig","fig","Figs","figs","et al","Co","Corp","Ave","Inc","Ex","Viz","vs","Vs","repr","Rep","Dem","trans","Vol","pp","rev","est","Ref","Refs","Eq","Eqs","Ch","Sec","Secs","mi","Dept","Univ","Nos","No","Mol","Cell","Miss","Mrs","Mr","Ms","Prof","Dr","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","Jr","jr","Rev","PhD","MD","BA","MA","MM","BSc","MSc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat"];exports.isCapitalized=function(str){return/^[A-Z][a-z].*/.test(str)||this.isNumber(str)};exports.isSentenceStarter=function(str){return this.isCapitalized(str)||/``|"|'/.test(str.substring(0,2))};exports.isCommonAbbreviation=function(str){return~abbreviations.indexOf(str.replace(/\W+/g,""))};exports.isTimeAbbreviation=function(word,next){if(word==="a.m."||word==="p.m."){var tmp=next.replace(/\W+/g,"").slice(-3).toLowerCase();if(tmp==="day"){return true}}return false};exports.isDottedAbbreviation=function(word){var matches=word.replace(/[\(\)\[\]\{\}]/g,"").match(/(.\.)*/);return matches&&matches[0].length>0};exports.isCustomAbbreviation=function(str){if(str.length<=3)return true;return this.isCapitalized(str)};exports.isNameAbbreviation=function(wordCount,words){if(words.length>0){if(wordCount<5&&words[0].length<6&&this.isCapitalized(words[0])){return true}var capitalized=words.filter(function(str){return/[A-Z]/.test(str.charAt(0))});return capitalized.length>=3}return false};exports.isNumber=function(str,dotPos){if(dotPos){str=str.slice(dotPos-1,dotPos+2)}return!isNaN(str)};exports.isPhoneNr=function(str){return str.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/)};exports.isURL=function(str){return str.match(/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/)};exports.isConcatenated=function(word){var i=0;if((i=word.indexOf("."))>-1||(i=word.indexOf("!"))>-1||(i=word.indexOf("?"))>-1){var c=word.charAt(i+1);if(c.match(/[a-zA-Z].*/)){return[word.slice(0,i),word.slice(i+1)]}}return false};exports.isBoundaryChar=function(word){return word==="."||word==="!"||word==="?"}},{}],2:[function(require,module,exports){exports.endsWithChar=function ends_with_char(word,c){if(c.length>1){return c.indexOf(word.slice(-1))>-1}return word.slice(-1)===c};exports.endsWith=function ends_with(word,end){return word.slice(word.length-end.length)===end}},{}],3:[function(require,module,exports){module.exports=function sanitizeHtml(text,opts){if(typeof text=="string"||text instanceof String){var $div=document.createElement("DIV");$div.innerHTML=text;text=($div.textContent||"").trim()}else if(typeof text==="object"&&text.textContent){text=(text.textContent||"").trim()}return text}},{}],4:[function(require,module,exports){"use strict";var sanitizeHtml=require("sanitize-html");var String=require("./String");var Match=require("./Match");var newline_placeholder=" @~@ ";var newline_placeholder_t=newline_placeholder.trim();exports.sentences=function(text,newline_boundary){if(text.length===0)return[];text=sanitizeHtml(text,{allowedTags:[""]});if(typeof newline_boundary==="undefined"){newline_boundary=false}if(newline_boundary){text=text.replace(/\n+|[-#=_+*]{4,}/g,newline_placeholder)}var index=0;var temp=[];var words=text.match(/\S+/g);var sentences=[];var current=[];var wordCount=0;for(var i=0,L=words.length;i<L;i++){wordCount++;current.push(words[i]);if(~words[i].indexOf(",")){wordCount=0}if(Match.isBoundaryChar(words[i])||String.endsWithChar(words[i],"?!")||words[i]===newline_placeholder_t){if(newline_boundary&&words[i]===newline_placeholder_t){current.pop()}sentences.push(current);wordCount=0;current=[];continue}if(String.endsWithChar(words[i],".")){if(i+1<L){if(words[i].length===2&&isNaN(words[i].charAt(0))){continue}if(Match.isCommonAbbreviation(words[i])){continue}if(Match.isSentenceStarter(words[i+1])){if(Match.isTimeAbbreviation(words[i],words[i+1])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,6))){continue}if(Match.isNumber(words[i+1])&&Match.isCustomAbbreviation(words[i])){continue}}else{if(String.endsWith(words[i],"..")){continue}if(Match.isDottedAbbreviation(words[i])||Match.isCustomAbbreviation(words[i])){continue}}}sentences.push(current);current=[];wordCount=0;continue}if((index=words[i].indexOf("."))>-1){if(Match.isNumber(words[i],index)){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isURL(words[i])||Match.isPhoneNr(words[i])){continue}}if(temp=Match.isConcatenated(words[i])){current.pop();current.push(temp[0]);sentences.push(current);current=[];wordCount=0;current.push(temp[1])}}if(current.length)sentences.push(current);var result=[];var sentence="";sentences=sentences.filter(function(s){return s.length>0});for(i=0;i<sentences.length;i++){sentence=sentences[i].join(" ");if(sentences[i].length===1&&sentences[i][0].length<4&&sentences[i][0].indexOf(".")>-1){if(sentences[i+1]&&sentences[i+1][0].indexOf(".")<0){sentence+=" "+sentences[i+1].join(" ");i++}}result.push(sentence)}return result}},{"./Match":1,"./String":2,"sanitize-html":3}]},{},[4])(4)});
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tokenizer=f()}})(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){var abbreviations=["ie","eg","ext","Fig","fig","Figs","figs","et al","Co","Corp","Ave","Inc","Ex","Viz","vs","Vs","repr","Rep","Dem","trans","Vol","pp","rev","est","Ref","Refs","Eq","Eqs","Ch","Sec","Secs","mi","Dept","Univ","Nos","No","Mol","Cell","Miss","Mrs","Mr","Ms","Prof","Dr","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","Jr","jr","Rev","PhD","MD","BA","MA","MM","BSc","MSc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat"];exports.isCapitalized=function(str){return/^[A-Z][a-z].*/.test(str)||this.isNumber(str)};exports.isSentenceStarter=function(str){return this.isCapitalized(str)||/``|"|'/.test(str.substring(0,2))};exports.isCommonAbbreviation=function(str){return~abbreviations.indexOf(str.replace(/\W+/g,""))};exports.isTimeAbbreviation=function(word,next){if(word==="a.m."||word==="p.m."){var tmp=next.replace(/\W+/g,"").slice(-3).toLowerCase();if(tmp==="day"){return true}}return false};exports.isDottedAbbreviation=function(word){var matches=word.replace(/[\(\)\[\]\{\}]/g,"").match(/(.\.)*/);return matches&&matches[0].length>0};exports.isCustomAbbreviation=function(str){if(str.length<=3)return true;return this.isCapitalized(str)};exports.isNameAbbreviation=function(wordCount,words){if(words.length>0){if(wordCount<5&&words[0].length<6&&this.isCapitalized(words[0])){return true}var capitalized=words.filter(function(str){return/[A-Z]/.test(str.charAt(0))});return capitalized.length>=3}return false};exports.isNumber=function(str,dotPos){if(dotPos){str=str.slice(dotPos-1,dotPos+2)}return!isNaN(str)};exports.isPhoneNr=function(str){return str.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/)};exports.isURL=function(str){return str.match(/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/\/=]*)/)};exports.isConcatenated=function(word){var i=0;if((i=word.indexOf("."))>-1||(i=word.indexOf("!"))>-1||(i=word.indexOf("?"))>-1){var c=word.charAt(i+1);if(c.match(/[a-zA-Z].*/)){return[word.slice(0,i),word.slice(i+1)]}}return false};exports.isBoundaryChar=function(word){return word==="."||word==="!"||word==="?"}},{}],2:[function(require,module,exports){exports.endsWithChar=function ends_with_char(word,c){if(c.length>1){return c.indexOf(word.slice(-1))>-1}return word.slice(-1)===c};exports.endsWith=function ends_with(word,end){return word.slice(word.length-end.length)===end}},{}],3:[function(require,module,exports){module.exports=function sanitizeHtml(text,opts){if(typeof text=="string"||text instanceof String){var $div=document.createElement("DIV");$div.innerHTML=text;text=($div.textContent||"").trim()}else if(typeof text==="object"&&text.textContent){text=(text.textContent||"").trim()}return text}},{}],4:[function(require,module,exports){"use strict";var sanitizeHtml=require("sanitize-html");var String=require("./String");var Match=require("./Match");var newline_placeholder=" @~@ ";var newline_placeholder_t=newline_placeholder.trim();exports.sentences=function(text,user_options){if(!text||typeof text==="undefined"||text.length===0){return[]}var options={newline_boundaries:false,html_boundaries:false,sanitize:false,allowed_tags:false};if(typeof user_options==="boolean"){options.newline_boundaries=true}else{for(var k in user_options){options[k]=user_options[k]}}if(options.newline_boundaries){text=text.replace(/\n+|[-#=_+*]{4,}/g,newline_placeholder)}if(options.html_boundaries){text=text.replace(/(<br \/>)/g,"$1"+newline_placeholder)}if(options.sanitize||options.allowed_tags){if(!options.allowed_tags){options.allowed_tags=[""]}text=sanitizeHtml(text,{allowedTags:options.allowed_tags})}var words=text.match(/\S+/g);var wordCount=0;var index=0;var temp=[];var sentences=[];var current=[];for(var i=0,L=words.length;i<L;i++){wordCount++;current.push(words[i]);if(~words[i].indexOf(",")){wordCount=0}if(Match.isBoundaryChar(words[i])||String.endsWithChar(words[i],"?!")||words[i]===newline_placeholder_t){if((options.newline_boundaries||options.html_boundaries)&&words[i]===newline_placeholder_t){current.pop()}sentences.push(current);wordCount=0;current=[];continue}if(String.endsWithChar(words[i],'"')||String.endsWithChar(words[i],"”")){words[i]=words[i].slice(0,-1)}if(String.endsWithChar(words[i],".")){if(i+1<L){if(words[i].length===2&&isNaN(words[i].charAt(0))){continue}if(Match.isCommonAbbreviation(words[i])){continue}if(Match.isSentenceStarter(words[i+1])){if(Match.isTimeAbbreviation(words[i],words[i+1])){continue}if(Match.isNameAbbreviation(wordCount,words.slice(i,6))){continue}if(Match.isNumber(words[i+1])&&Match.isCustomAbbreviation(words[i])){continue}}else{if(String.endsWith(words[i],"..")){continue}if(Match.isDottedAbbreviation(words[i])||Match.isCustomAbbreviation(words[i])){continue}}}sentences.push(current);current=[];wordCount=0;continue}if((index=words[i].indexOf("."))>-1){if(Match.isNumber(words[i],index)){continue}if(Match.isDottedAbbreviation(words[i])){continue}if(Match.isURL(words[i])||Match.isPhoneNr(words[i])){continue}}if(temp=Match.isConcatenated(words[i])){current.pop();current.push(temp[0]);sentences.push(current);current=[];wordCount=0;current.push(temp[1])}}if(current.length)sentences.push(current);var result=[];var sentence="";sentences=sentences.filter(function(s){return s.length>0});for(i=0;i<sentences.length;i++){sentence=sentences[i].join(" ");if(sentences[i].length===1&&sentences[i][0].length<4&&sentences[i][0].indexOf(".")>-1){if(sentences[i+1]&&sentences[i+1][0].indexOf(".")<0){sentence+=" "+sentences[i+1].join(" ");i++}}result.push(sentence)}return result}},{"./Match":1,"./String":2,"sanitize-html":3}]},{},[4])(4)});

@@ -12,38 +12,51 @@ /*jshint node:true, laxcomma:true */

// Split the entry into sentences.
exports.sentences = function(text, options) {
if (!text || typeof text === "undefined" || text.length === 0)
exports.sentences = function(text, user_options) {
if (!text || typeof text === "undefined" || text.length === 0) {
return [];
}
/** Options processing */
var newline_boundary;
var do_sanitize = true;
if (typeof options === 'undefined') {
newline_boundary = false;
var options = {
"newline_boundaries" : false,
"html_boundaries" : false,
"sanitize" : false,
"allowed_tags" : false
};
if (typeof user_options === "boolean") {
// Deprecated quick option
options.newline_boundaries = true;
}
else if (typeof options === 'object') {
newline_boundary = options.newline_boundary || false;
do_sanitize = typeof options.sanitize === 'undefined' ? true : options.sanitize;
}
else {
newline_boundary = options;
// Extend options
for (var k in user_options) {
options[k] = user_options[k];
}
}
text = do_sanitize ? sanitizeHtml(text, { "allowedTags" : [''] }) : text;
if (newline_boundary) {
if (options.newline_boundaries) {
text = text.replace(/\n+|[-#=_+*]{4,}/g, newline_placeholder);
}
var index = 0;
var temp = [];
if (options.html_boundaries) {
text = text.replace(/(<br \/>)/g, "$1" + newline_placeholder);
}
if (options.sanitize || options.allowed_tags) {
if (! options.allowed_tags) {
options.allowed_tags = [""];
}
text = sanitizeHtml(text, { "allowedTags" : options.allowed_tags });
}
// Split the text into words
var words = text.match(/\S+/g); // see http://blog.tompawlak.org/split-string-into-tokens-javascript
var wordCount = 0;
var index = 0;
var temp = [];
var sentences = [];
var current = [];
var wordCount = 0;
for (var i=0, L=words.length; i < L; i++) {

@@ -64,3 +77,3 @@ wordCount++;

{
if (newline_boundary && words[i] === newline_placeholder_t) {
if ((options.newline_boundaries || options.html_boundaries) && words[i] === newline_placeholder_t) {
current.pop();

@@ -67,0 +80,0 @@ }

{
"name": "sbd",
"version": "1.0.4",
"version": "1.0.5",
"description": "Split text into sentences with Sentence Boundary Detection (SBD).",
"main": "lib/tokenizer.js",
"scripts": {
"test": "mocha -R spec"
"test": "mocha -R spec",
"build:js": "browserify lib/tokenizer.js --standalone tokenizer > dist/sbd.js",
"build:minify": "uglifyjs dist/sbd.js > dist/sbd.min.js",
"build": "npm run build:js && npm run build:minify"
},

@@ -9,0 +12,0 @@ "homepage": "http://github.com/Tessmore/sbd",

@@ -25,3 +25,3 @@ Sentence Boundary Detection (SBD)

var text = "On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration.";
var sentences = tokenizer.sentences(text);
var sentences = tokenizer.sentences(text, optional_options);

@@ -35,41 +35,24 @@ // [

The second argument can also be a configuration object, that can support the following values:
#### Optional options
* `newline_boundary`: the same as specifying the second argument as a boolean.
* `sanitize`: set this to `false` in order to disable automatic HTML sanitization. While automatic
sanitization has to remain the default for backwards compatibility purposes, unless you are
specifically providing `sbd` with content you know to contain HTML it is recommended to switch
this off as it can mangle your content.
```javascript
var options = {
"newline_boundary": true,
"sanitize": true
```
var options = {
"newline_boundaries" : false,
"html_boundaries" : false,
"sanitize" : false,
"allowed_tags" : false
};
var sentences = tokenizer.sentences(textFromFile, options);
```
textFromFile = "Title of project: Hello World
Author: Kenny
* `newline_boundaries`, force sentence split at newlines
* `html_boundaries`, force sentence split at specific tags (br, and closing p, div, ul, ol)
* `sanitize`: If you don't expect nor want html in your text.
* `allowed_tags`: To sanitize html, the library [santize-html](https://github.com/punkave/sanitize-html) is used. You can pass the allowed tags option.
May, 2012
Lorem ipsum dolor sit amet. Consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco (laboris nisi?) ut aliquip ex ea commodo consequat.
";
// Gives
// [
// 'Title of project: Hello World',
// 'Author: Kenny',
// 'May, 2012',
// 'Lorem ipsum dolor sit amet.',
// 'Consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.',
// 'Ut enim ad minim veniam, quis nostrud exercitation ullamco (laboris nisi?) ut aliquip ex ea commodo consequat.'
// ]
```
## Contributing
You can run unit tests with `npm test`.
You can run unit tests with `npm test`.
If you feel something is missing, you can open an issue stating the problem sentence and desired result. If code is unclear give me a @mention. Pull requests are welcome.

@@ -12,3 +12,3 @@ /*jshint node:true, laxcomma:true */

var entry = "<p>Hello this is my first sentence.</p> <br><br>There is also a second down the page.";
var sentences = tokenizer.sentences(entry);
var sentences = tokenizer.sentences(entry, { "sanitize": true });

@@ -22,3 +22,3 @@ it("should get 2 sentences", function () {

var entry = "We find that a < b works. But in turn, c > x.";
var sentences = tokenizer.sentences(entry, { sanitize: false });
var sentences = tokenizer.sentences(entry, { "sanitize": false });

@@ -34,2 +34,12 @@ it("should get 2 sentences", function () {

describe('Closing html boundaries (br, p, div) split sentences.', function () {
var entry = "What the Experts Say <br />In certain circumstances, “working for a manager who’s task-oriented and has a high need for achievement can be motivating,” says Linda Hill";
var sentences = tokenizer.sentences(entry, { sanitize: false, "html_boundaries": true });
it("should get 2 sentences", function () {
assert.equal(sentences.length, 2);
});
});
});

@@ -12,3 +12,3 @@ /*jshint node:true, laxcomma:true */

var entry = "1. The item\n2. Another item";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -22,3 +22,3 @@ it("should get 2 sentences", function () {

var entry = "a. The item\nab. Another item\n(1.) Third item";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -32,3 +32,3 @@ it("should get 3 sentences", function () {

var entry = "a. The item\nzz.\nab.\ncd. Hello";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -35,0 +35,0 @@ it("should get 4 sentences", function () {

@@ -120,3 +120,3 @@ /*jshint node:true, laxcomma:true */

var entry = "Search on http://google.com\n\nThen send me an email: gg@gggg.kk";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -131,3 +131,3 @@ it("should get 2 sentences", function () {

var entry = "“If there’s no balance and your boss doesn’t provide support and work that’s meaningful, your chances of burning out are great.” What bothers most people in situations like these is “the lack of boundaries,” says Nancy Rothbard, the David Pottruck Professor of Management at the University of Pennsylvania’s Wharton School.";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -141,3 +141,3 @@ it("should get 2 sentences", function () {

var entry = "“If there’s no balance! And your boss doesn’t provide support and work that’s meaningful, your chances of burning out are great.” What bothers most people in situations like these is “the lack of boundaries,” says Nancy Rothbard, the David Pottruck Professor of Management at the University of Pennsylvania’s Wharton School.";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -151,3 +151,3 @@ it("should get 3 sentences", function () {

var entry = "FAMILIY HISTORY ========================================== Nothing interesting";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -154,0 +154,0 @@ it("should get 2 sentences", function () {

@@ -57,3 +57,3 @@ /*jshint node:true, laxcomma:true */

var entry = "The humble bundle sale\r\nDate: Monday-Fri starting 2015-01-01\nSales starting at ¤2,50";
var sentences = tokenizer.sentences(entry, true);
var sentences = tokenizer.sentences(entry, { "newline_boundaries": true });

@@ -60,0 +60,0 @@ it("should get 3 sentences", function () {

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc