Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

html-janitor

Package Overview
Dependencies
Maintainers
2
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html-janitor - npm Package Compare versions

Comparing version 2.0.2 to 2.0.4

examples/example.html

17

package.json
{
"name": "html-janitor",
"version": "2.0.2",
"version": "2.0.4",
"main": "src/html-janitor.js",
"scripts": {
"test": "karma start --single-run"
"test": "jest"
},

@@ -13,15 +13,4 @@ "repository": {

"devDependencies": {
"karma": "~0.10.2",
"karma-chrome-launcher": "~0.1.0",
"karma-jasmine": "~0.1.3",
"karma-phantomjs-launcher": "~0.1.0",
"karma-firefox-launcher": "~0.1",
"karma-requirejs": "~0.2.0",
"plumber": "~0.4.0",
"plumber-all": "~0.4.0",
"plumber-glob": "~0.4.0",
"plumber-requirejs": "~0.4.0",
"plumber-uglifyjs": "~0.4.0",
"plumber-write": "~0.4.0"
"jest": "^23.6.0"
}
}

@@ -7,4 +7,11 @@ # html-janitor

![](https://circleci.com/gh/guardian/html-janitor.png?circle-token=bd24300ee650966837a73bfe03386828f0192c06)
![](https://circleci.com/gh/guardian/html-janitor.png?circle-token=bd24300ee650966837a73bfe03386828f0192c06) <img src="https://david-dm.org/guardian/html-janitor.svg">
## XSS Note
This library has not been extensively tested. In particular versions prior to 2.0.3 are vulnerable to XSS attacks.
See [here](https://hackerone.com/reports/308155) and [here](https://hackerone.com/reports/308158).
Please upgrade to 2.0.4 or above and consider building your own additional checks on user input.
## Usage

@@ -85,4 +92,2 @@

```
bower install html-janitor
# or
npm install html-janitor

@@ -89,0 +94,0 @@ ```

@@ -43,21 +43,18 @@ (function (root, factory) {

HTMLJanitor.prototype.clean = function (html) {
var sandbox = document.createElement('div');
sandbox.innerHTML = html;
const sandbox = document.implementation.createHTMLDocument();
const root = sandbox.createElement("div");
root.innerHTML = html;
this._sanitize(sandbox);
this._sanitize(sandbox, root);
return sandbox.innerHTML;
return root.innerHTML;
};
HTMLJanitor.prototype._sanitize = function (parentNode) {
var treeWalker = createTreeWalker(parentNode);
HTMLJanitor.prototype._sanitize = function (document, parentNode) {
var treeWalker = createTreeWalker(document, parentNode);
var node = treeWalker.firstChild();
if (!node) { return; }
do {
// Ignore nodes that have already been sanitized
if (node._sanitized) {
continue;
}
if (node.nodeType === Node.TEXT_NODE) {

@@ -73,3 +70,3 @@ // If this text node is just whitespace and the previous or next element

parentNode.removeChild(node);
this._sanitize(parentNode);
this._sanitize(document, parentNode);
break;

@@ -84,3 +81,3 @@ } else {

parentNode.removeChild(node);
this._sanitize(parentNode);
this._sanitize(document, parentNode);
break;

@@ -121,3 +118,3 @@ }

this._sanitize(parentNode);
this._sanitize(document, parentNode);
break;

@@ -138,10 +135,8 @@ }

// Sanitize children
this._sanitize(node);
this._sanitize(document, node);
// Mark node as sanitized so it's ignored in future runs
node._sanitized = true;
} while ((node = treeWalker.nextSibling()));
};
function createTreeWalker(node) {
function createTreeWalker(document, node) {
return document.createTreeWalker(node,

@@ -148,0 +143,0 @@ NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT,

@@ -1,269 +0,248 @@

define([ 'html-janitor' ], function (HTMLJanitor) {
const HTMLJanitor = require("../src/html-janitor");
describe('janitor', function () {
var janitor;
var config = {
tags: {
a: {
href: true
},
b: {},
i: {},
strong: {},
em: {},
sub: {},
sup: {},
strike: {},
const config = {
tags: {
a: {
href: true
},
b: {},
i: {},
strong: {},
em: {},
sub: {},
sup: {},
strike: {},
p: { foo: true, bar: 'baz' },
ul: {},
li: {},
small: true,
div: {},
figure: false,
u: function(el){
// Remove empty underline tags.
var shouldKeepEl = el.innerHTML !== '';
return shouldKeepEl;
},
img: {
height: function(value){
// Only allow if height is less than 10.
return parseInt(value) < 10;
},
width: function(value, el){
// Only allow if height also specified.
return el.hasAttribute('height');
}
},
blockquote: function(el) {
// If blockquote has class 'indent', also allow style.
if (el.classList.contains('indent')){
return { 'class': true, 'style': true };
} else {
return {};
}
}
p: { foo: true, bar: 'baz' },
ul: {},
li: {},
small: true,
div: {},
figure: false,
u: function(el){
// Remove empty underline tags.
var shouldKeepEl = el.innerHTML !== '';
return shouldKeepEl;
},
img: {
height: function(value){
// Only allow if height is less than 10.
return parseInt(value) < 10;
},
width: function(value, el){
// Only allow if height also specified.
return el.hasAttribute('height');
}
},
blockquote: function(el) {
// If blockquote has class 'indent', also allow style.
if (el.classList.contains('indent')){
return { 'class': true, 'style': true };
} else {
return {};
}
}
}
};
const janitor = new HTMLJanitor(config);
};
test('should clean attributes not in the whitelist', function () {
var p = document.createElement('p');
p.setAttribute('style', 'font-size: 16px;');
p.setAttribute('bar', 'not baz');
expect(janitor.clean(p.outerHTML)).toBe('<p></p>');
});
beforeEach(function () {
janitor = new HTMLJanitor(config);
});
test('should not clean attributes in the whitelist', function () {
var p = document.createElement('p');
p.setAttribute('foo', 'true');
p.setAttribute('bar', 'baz');
var cleanP = janitor.clean(p.outerHTML);
expect(cleanP).toMatch(/foo="true"/);
expect(cleanP).toMatch(/bar="baz"/);
});
it('should clean attributes not in the whitelist', function () {
var p = document.createElement('p');
p.setAttribute('style', 'font-size: 16px;');
p.setAttribute('bar', 'not baz');
expect(janitor.clean(p.outerHTML)).toBe('<p></p>');
});
test('should remove elements not in the whitelist', function () {
var aside = document.createElement('aside');
var p = document.createElement('p');
aside.appendChild(p);
expect(janitor.clean(aside.outerHTML)).toBe('<p></p>');
});
it('should not clean attributes in the whitelist', function () {
var p = document.createElement('p');
p.setAttribute('foo', 'true');
p.setAttribute('bar', 'baz');
var cleanP = janitor.clean(p.outerHTML);
expect(cleanP).toMatch(/foo="true"/);
expect(cleanP).toMatch(/bar="baz"/);
});
test('should not keep the inner text of a script element', function () {
var script = document.createElement('script');
script.innerText = 'window.alert(\'foo\');';
expect(janitor.clean(script.outerHTML)).toBe('');
});
it('should remove elements not in the whitelist', function () {
var aside = document.createElement('aside');
var p = document.createElement('p');
aside.appendChild(p);
expect(janitor.clean(aside.outerHTML)).toBe('<p></p>');
});
test('should not keep the inner text of a style element', function () {
var style = document.createElement('style');
style.innerText = '.foo {}';
expect(janitor.clean(style.outerHTML)).toBe('');
});
it('should not keep the inner text of a script element', function () {
var script = document.createElement('script');
script.innerText = 'window.alert(\'foo\');';
expect(janitor.clean(script.outerHTML)).toBe('');
});
test('should clean invalid markup', function () {
var b = document.createElement('b');
var p = document.createElement('p');
b.appendChild(p);
expect(janitor.clean(b.outerHTML)).toBe('<p></p>');
});
it('should not keep the inner text of a style element', function () {
var style = document.createElement('style');
style.innerText = '.foo {}';
expect(janitor.clean(style.outerHTML)).toBe('');
});
test('should clean paragraphs in lists', function () {
var ul = document.createElement('ul');
ul.innerHTML = '<li><p>Some text</p></li>';
expect(janitor.clean(ul.outerHTML)).toBe('<ul><li>Some text</li></ul>');
});
it('should clean invalid markup', function () {
var b = document.createElement('b');
var p = document.createElement('p');
b.appendChild(p);
expect(janitor.clean(b.outerHTML)).toBe('<p></p>');
});
test('should remove comments', function () {
var p = document.createElement('p');
p.innerHTML = 'Hello <b>world</b> <!-- a salutation -->!';
expect(janitor.clean(p.outerHTML)).toBe('<p>Hello <b>world</b> !</p>');
});
it('should clean paragraphs in lists', function () {
var ul = document.createElement('ul');
ul.innerHTML = '<li><p>Some text</p></li>';
expect(janitor.clean(ul.outerHTML)).toBe('<ul><li>Some text</li></ul>');
});
test('should remove text nodes in-between block elements', function () {
var html = '<p></p>\n<p></p>';
expect(janitor.clean(html)).toBe('<p></p><p></p>');
});
it('should remove comments', function () {
var p = document.createElement('p');
p.innerHTML = 'Hello <b>world</b> <!-- a salutation -->!';
expect(janitor.clean(p.outerHTML)).toBe('<p>Hello <b>world</b> !</p>');
});
test('should remove text nodes before block elements', function () {
var html = '\n<p></p>';
expect(janitor.clean(html)).toBe('<p></p>');
});
it('should remove text nodes in-between block elements', function () {
var html = '<p></p>\n<p></p>';
expect(janitor.clean(html)).toBe('<p></p><p></p>');
});
test('should remove text nodes after block elements', function () {
var html = '<p></p>\n';
expect(janitor.clean(html)).toBe('<p></p>');
});
it('should remove text nodes before block elements', function () {
var html = '\n<p></p>';
expect(janitor.clean(html)).toBe('<p></p>');
});
test('should remove nested span elements', function() {
var html ='<p><span>Hello <span>world</span></span></p>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
});
it('should remove text nodes after block elements', function () {
var html = '<p></p>\n';
expect(janitor.clean(html)).toBe('<p></p>');
});
it('should remove nested span elements', function() {
var html ='<p><span>Hello <span>world</span></span></p>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
});
test('should not allow nested block elements by default', function() {
var html = '<div>Hello <div>world</div></div>';
expect(janitor.clean(html)).toBe('<div>Hello world</div>');
});
it('should not allow nested block elements by default', function() {
var html = '<div>Hello <div>world</div></div>';
expect(janitor.clean(html)).toBe('<div>Hello world</div>');
});
test('should not allow nested block elements inside inline elements', function() {
var html = '<strong><p>Hello world</p></strong>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
it('should not allow nested block elements inside inline elements', function() {
var html = '<strong><p>Hello world</p></strong>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<b><p>Hello world</p></b>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<b><p>Hello world</p></b>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<em><p>Hello world</p></em>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<em><p>Hello world</p></em>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<i><p>Hello world</p></i>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<i><p>Hello world</p></i>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<sub><p>Hello world</p></sub>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<sub><p>Hello world</p></sub>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<sup><p>Hello world</p></sup>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<sup><p>Hello world</p></sup>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<u><p>Hello world</p></u>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<u><p>Hello world</p></u>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<strike><p>Hello world</p></strike>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<strike><p>Hello world</p></strike>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
html = '<a href="test"><p>Hello world</p></a>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
});
html = '<a href="test"><p>Hello world</p></a>';
expect(janitor.clean(html)).toBe('<p>Hello world</p>');
});
test('should allow inline elements inside block elements', function() {
var html = '<p>Hello <strong>world</strong></p>';
expect(janitor.clean(html)).toBe('<p>Hello <strong>world</strong></p>');
});
it('should allow inline elements inside block elements', function() {
var html = '<p>Hello <strong>world</strong></p>';
expect(janitor.clean(html)).toBe('<p>Hello <strong>world</strong></p>');
});
test('should allow all attributes for elements with catch-all whitelist', function () {
var el = document.createElement('small');
el.setAttribute('data-test', 'true');
el.setAttribute('title', 'test');
var outputEl = document.createElement('div');
outputEl.innerHTML = janitor.clean(el.outerHTML);
it('should allow all attributes for elements with catch-all whitelist', function () {
var el = document.createElement('small');
el.setAttribute('data-test', 'true');
el.setAttribute('title', 'test');
var output = outputEl.children[0];
var outputEl = document.createElement('div');
outputEl.innerHTML = janitor.clean(el.outerHTML);
expect(output.tagName).toBe('SMALL');
var output = outputEl.children[0];
var attributes = output.attributes;
expect(output.tagName).toBe('SMALL');
expect(attributes.getNamedItem('data-test').name).toBe('data-test');
expect(attributes.getNamedItem('data-test').value).toBe('true');
var attributes = output.attributes;
expect(attributes.getNamedItem('title').name).toBe('title');
expect(attributes.getNamedItem('title').value).toBe('test');
expect(attributes.getNamedItem('data-test').name).toBe('data-test');
expect(attributes.getNamedItem('data-test').value).toBe('true');
});
expect(attributes.getNamedItem('title').name).toBe('title');
expect(attributes.getNamedItem('title').value).toBe('test');
test('should remove an element if blacklisted', function() {
var el = document.createElement('figure');
el.setAttribute('class', 'test');
});
var output = janitor.clean(el.outerHTML);
it('should remove an element if blacklisted', function() {
var el = document.createElement('figure');
el.setAttribute('class', 'test');
expect(output).toBe('');
});
var output = janitor.clean(el.outerHTML);
test('should handle functions as options', function () {
var html = '<div><u>content</u></div>';
expect(janitor.clean(html)).toBe('<div><u>content</u></div>');
expect(output).toBe('');
});
html = '<div><u></u></div>';
expect(janitor.clean(html)).toBe('<div></div>');
});
it('should handle functions as options', function () {
var html = '<div><u>content</u></div>';
expect(janitor.clean(html)).toBe('<div><u>content</u></div>');
test('should handle functions as options for attributes', function () {
var html = '<img height="11">';
expect(janitor.clean(html)).toBe('<img>');
html = '<div><u></u></div>';
expect(janitor.clean(html)).toBe('<div></div>');
});
html = '<img height="9">';
expect(janitor.clean(html)).toBe('<img height="9">');
});
it('should handle functions as options for attributes', function () {
var html = '<img height="11">';
expect(janitor.clean(html)).toBe('<img>');
test('should also handle functions for attributes that take an element', function () {
var html = '<img width="1">';
expect(janitor.clean(html)).toBe('<img>');
html = '<img height="9">';
expect(janitor.clean(html)).toBe('<img height="9">');
});
html = '<img height="9" width="1">';
expect(janitor.clean(html)).toBe('<img height="9" width="1">');
});
it('should also handle functions for attributes that take an element', function () {
var html = '<img width="1">';
expect(janitor.clean(html)).toBe('<img>');
test('should allow certain attributes', function() {
var html = '<blockquote class="indent" style="display:inline" notallowedattr="1"></blockquote>';
expect(janitor.clean(html)).toBe('<blockquote class="indent" style="display:inline"></blockquote>');
html = '<img height="9" width="1">';
expect(janitor.clean(html)).toBe('<img height="9" width="1">');
});
html = '<blockquote style="display:inline"></blockquote>';
expect(janitor.clean(html)).toBe('<blockquote></blockquote>');
});
it('should allow certain attributes', function() {
var html = '<blockquote class="indent" style="display:inline" notallowedattr="1"></blockquote>';
expect(janitor.clean(html)).toBe('<blockquote class="indent" style="display:inline"></blockquote>');
test('janitor that allows nested block elements', function() {
const config = {
tags: {
div: {}
},
keepNestedBlockElements: true
};
const janitor = new HTMLJanitor(config);
html = '<blockquote style="display:inline"></blockquote>';
expect(janitor.clean(html)).toBe('<blockquote></blockquote>');
});
const html = '<div>Hello <div>world</div></div>';
expect(janitor.clean(html)).toBe('<div>Hello <div>world</div></div>');
});
});
describe('janitor that allows nested block elements', function () {
var janitor;
var config = {
tags: {
div: {}
},
keepNestedBlockElements: true
};
beforeEach(function () {
janitor = new HTMLJanitor(config);
});
it('should allow nested block elements', function() {
var html = '<div>Hello <div>world</div></div>';
expect(janitor.clean(html)).toBe('<div>Hello <div>world</div></div>');
});
});
describe('janitor with invalid configuration', function() {
var config = {
tags: {
strong: 53
}
};
it('should throw an Error on invalid configuration', function() {
expect(function() {new HTMLJanitor(config)}).toThrow(new Error('The configuration was invalid'));
});
});
test('janitor with invalid configuration', function() {
const config = {
tags: {
strong: 53
}
};
expect(function() {new HTMLJanitor(config)}).toThrow(new Error('The configuration was invalid'));
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc