New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sbd

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sbd - npm Package Compare versions

Comparing version 1.0.1 to 1.0.2

19

lib/tokenizer.js

@@ -13,13 +13,22 @@ /*jshint node:true, laxcomma:true */

// Split the entry into sentences.
exports.sentences = function(text, newline_boundary) {
exports.sentences = function(text, options) {
if (text.length === 0)
return [];
text = sanitizeHtml(text, { "allowedTags" : [''] });
/** Preprocessing */
if (typeof newline_boundary === 'undefined') {
/** Options processing */
var newline_boundary;
var do_sanitize = true;
if (typeof options === 'undefined') {
newline_boundary = false;
}
else if (typeof options === 'object') {
newline_boundary = options.newline_boundary || false;
do_sanitize = typeof options.sanitize === 'undefined' ? true : options.sanitize;
}
else {
newline_boundary = options;
}
text = do_sanitize ? sanitizeHtml(text, { "allowedTags" : [''] }) : text;
if (newline_boundary) {

@@ -26,0 +35,0 @@ text = text.replace(/\n+|[-#=_+*]{4,}/g, newline_placeholder);

{
"name": "sbd",
"version": "1.0.1",
"version": "1.0.2",
"description": "Split text into sentences with Sentence Boundary Detection (SBD).",

@@ -5,0 +5,0 @@ "main": "lib/tokenizer.js",

@@ -66,2 +66,10 @@ Sentence Boundary Detection (SBD)

The second argument can also be a configuration object, that can support the following values:
* `newline_boundary`: the same as specifying the second argument as a boolean.
* `sanitize`: set this to `false` in order to disable automatic HTML sanitization. While automatic
sanitization has to remain the default for backwards compatibility purposes, unless you are
specifically providing `sbd` with content you know to contain HTML it is recommended to switch
this off as it can mangle your content.
## Future work

@@ -68,0 +76,0 @@

@@ -19,2 +19,15 @@ /*jshint node:true, laxcomma:true */

});
describe('Non-markup is not interfered with', function () {
var entry = "We find that a < b works. But in turn, c > x.";
var sentences = tokenizer.sentences(entry, { sanitize: false });
it("should get 2 sentences", function () {
assert.equal(sentences.length, 2);
});
it("should not be escaped", function () {
assert(!/&lt;/.test(sentences[0]));
assert(!/&gt;/.test(sentences[1]));
});
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc