sbd - npm Package Compare versions

Comparing version 1.0.1 to 1.0.2

lib/tokenizer.js

		@@ -13,13 +13,22 @@ /jshint node:true, laxcomma:true /
		// Split the entry into sentences.
		exports.sentences = function(text, newline_boundary) {
		exports.sentences = function(text, options) {
		if (text.length === 0)
		return [];

		text = sanitizeHtml(text, { "allowedTags" : [''] });

		/** Preprocessing */
		if (typeof newline_boundary === 'undefined') {
		/** Options processing */
		var newline_boundary;
		var do_sanitize = true;
		if (typeof options === 'undefined') {
		newline_boundary = false;
		}
		else if (typeof options === 'object') {
		newline_boundary = options.newline_boundary \|\| false;
		do_sanitize = typeof options.sanitize === 'undefined' ? true : options.sanitize;
		}
		else {
		newline_boundary = options;
		}

		text = do_sanitize ? sanitizeHtml(text, { "allowedTags" : [''] }) : text;

		if (newline_boundary) {
		@@ -26,0 +35,0 @@ text = text.replace(/\n+\|[-#=_+*]{4,}/g, newline_placeholder);

package.json

		{
		"name": "sbd",
		"version": "1.0.1",
		"version": "1.0.2",
		"description": "Split text into sentences with Sentence Boundary Detection (SBD).",
		@@ -5,0 +5,0 @@ "main": "lib/tokenizer.js",

README.md

		@@ -66,2 +66,10 @@ Sentence Boundary Detection (SBD)

		The second argument can also be a configuration object, that can support the following values:

		* `newline_boundary`: the same as specifying the second argument as a boolean.
		* `sanitize`: set this to `false` in order to disable automatic HTML sanitization. While automatic
		sanitization has to remain the default for backwards compatibility purposes, unless you are
		specifically providing `sbd` with content you know to contain HTML it is recommended to switch
		this off as it can mangle your content.

		## Future work
		@@ -68,0 +76,0 @@

test/html.js

		@@ -19,2 +19,15 @@ /jshint node:true, laxcomma:true /

		});
		describe('Non-markup is not interfered with', function () {
		var entry = "We find that a < b works. But in turn, c > x.";
		var sentences = tokenizer.sentences(entry, { sanitize: false });

		it("should get 2 sentences", function () {
		assert.equal(sentences.length, 2);
		});
		it("should not be escaped", function () {
		assert(!/</.test(sentences[0]));
		assert(!/>/.test(sentences[1]));
		});
		});

		});

Improved metrics