html-janitor
Advanced tools
Comparing version 2.0.2 to 2.0.4
{ | ||
"name": "html-janitor", | ||
"version": "2.0.2", | ||
"version": "2.0.4", | ||
"main": "src/html-janitor.js", | ||
"scripts": { | ||
"test": "karma start --single-run" | ||
"test": "jest" | ||
}, | ||
@@ -13,15 +13,4 @@ "repository": { | ||
"devDependencies": { | ||
"karma": "~0.10.2", | ||
"karma-chrome-launcher": "~0.1.0", | ||
"karma-jasmine": "~0.1.3", | ||
"karma-phantomjs-launcher": "~0.1.0", | ||
"karma-firefox-launcher": "~0.1", | ||
"karma-requirejs": "~0.2.0", | ||
"plumber": "~0.4.0", | ||
"plumber-all": "~0.4.0", | ||
"plumber-glob": "~0.4.0", | ||
"plumber-requirejs": "~0.4.0", | ||
"plumber-uglifyjs": "~0.4.0", | ||
"plumber-write": "~0.4.0" | ||
"jest": "^23.6.0" | ||
} | ||
} |
@@ -7,4 +7,11 @@ # html-janitor | ||
![](https://circleci.com/gh/guardian/html-janitor.png?circle-token=bd24300ee650966837a73bfe03386828f0192c06) | ||
![](https://circleci.com/gh/guardian/html-janitor.png?circle-token=bd24300ee650966837a73bfe03386828f0192c06) <img src="https://david-dm.org/guardian/html-janitor.svg"> | ||
## XSS Note | ||
This library has not been extensively tested. In particular versions prior to 2.0.3 are vulnerable to XSS attacks. | ||
See [here](https://hackerone.com/reports/308155) and [here](https://hackerone.com/reports/308158). | ||
Please upgrade to 2.0.4 or above and consider building your own additional checks on user input. | ||
## Usage | ||
@@ -85,4 +92,2 @@ | ||
``` | ||
bower install html-janitor | ||
# or | ||
npm install html-janitor | ||
@@ -89,0 +94,0 @@ ``` |
@@ -43,21 +43,18 @@ (function (root, factory) { | ||
HTMLJanitor.prototype.clean = function (html) { | ||
var sandbox = document.createElement('div'); | ||
sandbox.innerHTML = html; | ||
const sandbox = document.implementation.createHTMLDocument(); | ||
const root = sandbox.createElement("div"); | ||
root.innerHTML = html; | ||
this._sanitize(sandbox); | ||
this._sanitize(sandbox, root); | ||
return sandbox.innerHTML; | ||
return root.innerHTML; | ||
}; | ||
HTMLJanitor.prototype._sanitize = function (parentNode) { | ||
var treeWalker = createTreeWalker(parentNode); | ||
HTMLJanitor.prototype._sanitize = function (document, parentNode) { | ||
var treeWalker = createTreeWalker(document, parentNode); | ||
var node = treeWalker.firstChild(); | ||
if (!node) { return; } | ||
do { | ||
// Ignore nodes that have already been sanitized | ||
if (node._sanitized) { | ||
continue; | ||
} | ||
if (node.nodeType === Node.TEXT_NODE) { | ||
@@ -73,3 +70,3 @@ // If this text node is just whitespace and the previous or next element | ||
parentNode.removeChild(node); | ||
this._sanitize(parentNode); | ||
this._sanitize(document, parentNode); | ||
break; | ||
@@ -84,3 +81,3 @@ } else { | ||
parentNode.removeChild(node); | ||
this._sanitize(parentNode); | ||
this._sanitize(document, parentNode); | ||
break; | ||
@@ -121,3 +118,3 @@ } | ||
this._sanitize(parentNode); | ||
this._sanitize(document, parentNode); | ||
break; | ||
@@ -138,10 +135,8 @@ } | ||
// Sanitize children | ||
this._sanitize(node); | ||
this._sanitize(document, node); | ||
// Mark node as sanitized so it's ignored in future runs | ||
node._sanitized = true; | ||
} while ((node = treeWalker.nextSibling())); | ||
}; | ||
function createTreeWalker(node) { | ||
function createTreeWalker(document, node) { | ||
return document.createTreeWalker(node, | ||
@@ -148,0 +143,0 @@ NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT, |
@@ -1,269 +0,248 @@ | ||
define([ 'html-janitor' ], function (HTMLJanitor) { | ||
const HTMLJanitor = require("../src/html-janitor"); | ||
describe('janitor', function () { | ||
var janitor; | ||
var config = { | ||
tags: { | ||
a: { | ||
href: true | ||
}, | ||
b: {}, | ||
i: {}, | ||
strong: {}, | ||
em: {}, | ||
sub: {}, | ||
sup: {}, | ||
strike: {}, | ||
const config = { | ||
tags: { | ||
a: { | ||
href: true | ||
}, | ||
b: {}, | ||
i: {}, | ||
strong: {}, | ||
em: {}, | ||
sub: {}, | ||
sup: {}, | ||
strike: {}, | ||
p: { foo: true, bar: 'baz' }, | ||
ul: {}, | ||
li: {}, | ||
small: true, | ||
div: {}, | ||
figure: false, | ||
u: function(el){ | ||
// Remove empty underline tags. | ||
var shouldKeepEl = el.innerHTML !== ''; | ||
return shouldKeepEl; | ||
}, | ||
img: { | ||
height: function(value){ | ||
// Only allow if height is less than 10. | ||
return parseInt(value) < 10; | ||
}, | ||
width: function(value, el){ | ||
// Only allow if height also specified. | ||
return el.hasAttribute('height'); | ||
} | ||
}, | ||
blockquote: function(el) { | ||
// If blockquote has class 'indent', also allow style. | ||
if (el.classList.contains('indent')){ | ||
return { 'class': true, 'style': true }; | ||
} else { | ||
return {}; | ||
} | ||
} | ||
p: { foo: true, bar: 'baz' }, | ||
ul: {}, | ||
li: {}, | ||
small: true, | ||
div: {}, | ||
figure: false, | ||
u: function(el){ | ||
// Remove empty underline tags. | ||
var shouldKeepEl = el.innerHTML !== ''; | ||
return shouldKeepEl; | ||
}, | ||
img: { | ||
height: function(value){ | ||
// Only allow if height is less than 10. | ||
return parseInt(value) < 10; | ||
}, | ||
width: function(value, el){ | ||
// Only allow if height also specified. | ||
return el.hasAttribute('height'); | ||
} | ||
}, | ||
blockquote: function(el) { | ||
// If blockquote has class 'indent', also allow style. | ||
if (el.classList.contains('indent')){ | ||
return { 'class': true, 'style': true }; | ||
} else { | ||
return {}; | ||
} | ||
} | ||
} | ||
}; | ||
const janitor = new HTMLJanitor(config); | ||
}; | ||
test('should clean attributes not in the whitelist', function () { | ||
var p = document.createElement('p'); | ||
p.setAttribute('style', 'font-size: 16px;'); | ||
p.setAttribute('bar', 'not baz'); | ||
expect(janitor.clean(p.outerHTML)).toBe('<p></p>'); | ||
}); | ||
beforeEach(function () { | ||
janitor = new HTMLJanitor(config); | ||
}); | ||
test('should not clean attributes in the whitelist', function () { | ||
var p = document.createElement('p'); | ||
p.setAttribute('foo', 'true'); | ||
p.setAttribute('bar', 'baz'); | ||
var cleanP = janitor.clean(p.outerHTML); | ||
expect(cleanP).toMatch(/foo="true"/); | ||
expect(cleanP).toMatch(/bar="baz"/); | ||
}); | ||
it('should clean attributes not in the whitelist', function () { | ||
var p = document.createElement('p'); | ||
p.setAttribute('style', 'font-size: 16px;'); | ||
p.setAttribute('bar', 'not baz'); | ||
expect(janitor.clean(p.outerHTML)).toBe('<p></p>'); | ||
}); | ||
test('should remove elements not in the whitelist', function () { | ||
var aside = document.createElement('aside'); | ||
var p = document.createElement('p'); | ||
aside.appendChild(p); | ||
expect(janitor.clean(aside.outerHTML)).toBe('<p></p>'); | ||
}); | ||
it('should not clean attributes in the whitelist', function () { | ||
var p = document.createElement('p'); | ||
p.setAttribute('foo', 'true'); | ||
p.setAttribute('bar', 'baz'); | ||
var cleanP = janitor.clean(p.outerHTML); | ||
expect(cleanP).toMatch(/foo="true"/); | ||
expect(cleanP).toMatch(/bar="baz"/); | ||
}); | ||
test('should not keep the inner text of a script element', function () { | ||
var script = document.createElement('script'); | ||
script.innerText = 'window.alert(\'foo\');'; | ||
expect(janitor.clean(script.outerHTML)).toBe(''); | ||
}); | ||
it('should remove elements not in the whitelist', function () { | ||
var aside = document.createElement('aside'); | ||
var p = document.createElement('p'); | ||
aside.appendChild(p); | ||
expect(janitor.clean(aside.outerHTML)).toBe('<p></p>'); | ||
}); | ||
test('should not keep the inner text of a style element', function () { | ||
var style = document.createElement('style'); | ||
style.innerText = '.foo {}'; | ||
expect(janitor.clean(style.outerHTML)).toBe(''); | ||
}); | ||
it('should not keep the inner text of a script element', function () { | ||
var script = document.createElement('script'); | ||
script.innerText = 'window.alert(\'foo\');'; | ||
expect(janitor.clean(script.outerHTML)).toBe(''); | ||
}); | ||
test('should clean invalid markup', function () { | ||
var b = document.createElement('b'); | ||
var p = document.createElement('p'); | ||
b.appendChild(p); | ||
expect(janitor.clean(b.outerHTML)).toBe('<p></p>'); | ||
}); | ||
it('should not keep the inner text of a style element', function () { | ||
var style = document.createElement('style'); | ||
style.innerText = '.foo {}'; | ||
expect(janitor.clean(style.outerHTML)).toBe(''); | ||
}); | ||
test('should clean paragraphs in lists', function () { | ||
var ul = document.createElement('ul'); | ||
ul.innerHTML = '<li><p>Some text</p></li>'; | ||
expect(janitor.clean(ul.outerHTML)).toBe('<ul><li>Some text</li></ul>'); | ||
}); | ||
it('should clean invalid markup', function () { | ||
var b = document.createElement('b'); | ||
var p = document.createElement('p'); | ||
b.appendChild(p); | ||
expect(janitor.clean(b.outerHTML)).toBe('<p></p>'); | ||
}); | ||
test('should remove comments', function () { | ||
var p = document.createElement('p'); | ||
p.innerHTML = 'Hello <b>world</b> <!-- a salutation -->!'; | ||
expect(janitor.clean(p.outerHTML)).toBe('<p>Hello <b>world</b> !</p>'); | ||
}); | ||
it('should clean paragraphs in lists', function () { | ||
var ul = document.createElement('ul'); | ||
ul.innerHTML = '<li><p>Some text</p></li>'; | ||
expect(janitor.clean(ul.outerHTML)).toBe('<ul><li>Some text</li></ul>'); | ||
}); | ||
test('should remove text nodes in-between block elements', function () { | ||
var html = '<p></p>\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p><p></p>'); | ||
}); | ||
it('should remove comments', function () { | ||
var p = document.createElement('p'); | ||
p.innerHTML = 'Hello <b>world</b> <!-- a salutation -->!'; | ||
expect(janitor.clean(p.outerHTML)).toBe('<p>Hello <b>world</b> !</p>'); | ||
}); | ||
test('should remove text nodes before block elements', function () { | ||
var html = '\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
it('should remove text nodes in-between block elements', function () { | ||
var html = '<p></p>\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p><p></p>'); | ||
}); | ||
test('should remove text nodes after block elements', function () { | ||
var html = '<p></p>\n'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
it('should remove text nodes before block elements', function () { | ||
var html = '\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
test('should remove nested span elements', function() { | ||
var html ='<p><span>Hello <span>world</span></span></p>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
}); | ||
it('should remove text nodes after block elements', function () { | ||
var html = '<p></p>\n'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
it('should remove nested span elements', function() { | ||
var html ='<p><span>Hello <span>world</span></span></p>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
}); | ||
test('should not allow nested block elements by default', function() { | ||
var html = '<div>Hello <div>world</div></div>'; | ||
expect(janitor.clean(html)).toBe('<div>Hello world</div>'); | ||
}); | ||
it('should not allow nested block elements by default', function() { | ||
var html = '<div>Hello <div>world</div></div>'; | ||
expect(janitor.clean(html)).toBe('<div>Hello world</div>'); | ||
}); | ||
test('should not allow nested block elements inside inline elements', function() { | ||
var html = '<strong><p>Hello world</p></strong>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
it('should not allow nested block elements inside inline elements', function() { | ||
var html = '<strong><p>Hello world</p></strong>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<b><p>Hello world</p></b>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<b><p>Hello world</p></b>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<em><p>Hello world</p></em>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<em><p>Hello world</p></em>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<i><p>Hello world</p></i>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<i><p>Hello world</p></i>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<sub><p>Hello world</p></sub>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<sub><p>Hello world</p></sub>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<sup><p>Hello world</p></sup>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<sup><p>Hello world</p></sup>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<u><p>Hello world</p></u>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<u><p>Hello world</p></u>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<strike><p>Hello world</p></strike>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<strike><p>Hello world</p></strike>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
html = '<a href="test"><p>Hello world</p></a>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
}); | ||
html = '<a href="test"><p>Hello world</p></a>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello world</p>'); | ||
}); | ||
test('should allow inline elements inside block elements', function() { | ||
var html = '<p>Hello <strong>world</strong></p>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello <strong>world</strong></p>'); | ||
}); | ||
it('should allow inline elements inside block elements', function() { | ||
var html = '<p>Hello <strong>world</strong></p>'; | ||
expect(janitor.clean(html)).toBe('<p>Hello <strong>world</strong></p>'); | ||
}); | ||
test('should allow all attributes for elements with catch-all whitelist', function () { | ||
var el = document.createElement('small'); | ||
el.setAttribute('data-test', 'true'); | ||
el.setAttribute('title', 'test'); | ||
var outputEl = document.createElement('div'); | ||
outputEl.innerHTML = janitor.clean(el.outerHTML); | ||
it('should allow all attributes for elements with catch-all whitelist', function () { | ||
var el = document.createElement('small'); | ||
el.setAttribute('data-test', 'true'); | ||
el.setAttribute('title', 'test'); | ||
var output = outputEl.children[0]; | ||
var outputEl = document.createElement('div'); | ||
outputEl.innerHTML = janitor.clean(el.outerHTML); | ||
expect(output.tagName).toBe('SMALL'); | ||
var output = outputEl.children[0]; | ||
var attributes = output.attributes; | ||
expect(output.tagName).toBe('SMALL'); | ||
expect(attributes.getNamedItem('data-test').name).toBe('data-test'); | ||
expect(attributes.getNamedItem('data-test').value).toBe('true'); | ||
var attributes = output.attributes; | ||
expect(attributes.getNamedItem('title').name).toBe('title'); | ||
expect(attributes.getNamedItem('title').value).toBe('test'); | ||
expect(attributes.getNamedItem('data-test').name).toBe('data-test'); | ||
expect(attributes.getNamedItem('data-test').value).toBe('true'); | ||
}); | ||
expect(attributes.getNamedItem('title').name).toBe('title'); | ||
expect(attributes.getNamedItem('title').value).toBe('test'); | ||
test('should remove an element if blacklisted', function() { | ||
var el = document.createElement('figure'); | ||
el.setAttribute('class', 'test'); | ||
}); | ||
var output = janitor.clean(el.outerHTML); | ||
it('should remove an element if blacklisted', function() { | ||
var el = document.createElement('figure'); | ||
el.setAttribute('class', 'test'); | ||
expect(output).toBe(''); | ||
}); | ||
var output = janitor.clean(el.outerHTML); | ||
test('should handle functions as options', function () { | ||
var html = '<div><u>content</u></div>'; | ||
expect(janitor.clean(html)).toBe('<div><u>content</u></div>'); | ||
expect(output).toBe(''); | ||
}); | ||
html = '<div><u></u></div>'; | ||
expect(janitor.clean(html)).toBe('<div></div>'); | ||
}); | ||
it('should handle functions as options', function () { | ||
var html = '<div><u>content</u></div>'; | ||
expect(janitor.clean(html)).toBe('<div><u>content</u></div>'); | ||
test('should handle functions as options for attributes', function () { | ||
var html = '<img height="11">'; | ||
expect(janitor.clean(html)).toBe('<img>'); | ||
html = '<div><u></u></div>'; | ||
expect(janitor.clean(html)).toBe('<div></div>'); | ||
}); | ||
html = '<img height="9">'; | ||
expect(janitor.clean(html)).toBe('<img height="9">'); | ||
}); | ||
it('should handle functions as options for attributes', function () { | ||
var html = '<img height="11">'; | ||
expect(janitor.clean(html)).toBe('<img>'); | ||
test('should also handle functions for attributes that take an element', function () { | ||
var html = '<img width="1">'; | ||
expect(janitor.clean(html)).toBe('<img>'); | ||
html = '<img height="9">'; | ||
expect(janitor.clean(html)).toBe('<img height="9">'); | ||
}); | ||
html = '<img height="9" width="1">'; | ||
expect(janitor.clean(html)).toBe('<img height="9" width="1">'); | ||
}); | ||
it('should also handle functions for attributes that take an element', function () { | ||
var html = '<img width="1">'; | ||
expect(janitor.clean(html)).toBe('<img>'); | ||
test('should allow certain attributes', function() { | ||
var html = '<blockquote class="indent" style="display:inline" notallowedattr="1"></blockquote>'; | ||
expect(janitor.clean(html)).toBe('<blockquote class="indent" style="display:inline"></blockquote>'); | ||
html = '<img height="9" width="1">'; | ||
expect(janitor.clean(html)).toBe('<img height="9" width="1">'); | ||
}); | ||
html = '<blockquote style="display:inline"></blockquote>'; | ||
expect(janitor.clean(html)).toBe('<blockquote></blockquote>'); | ||
}); | ||
it('should allow certain attributes', function() { | ||
var html = '<blockquote class="indent" style="display:inline" notallowedattr="1"></blockquote>'; | ||
expect(janitor.clean(html)).toBe('<blockquote class="indent" style="display:inline"></blockquote>'); | ||
test('janitor that allows nested block elements', function() { | ||
const config = { | ||
tags: { | ||
div: {} | ||
}, | ||
keepNestedBlockElements: true | ||
}; | ||
const janitor = new HTMLJanitor(config); | ||
html = '<blockquote style="display:inline"></blockquote>'; | ||
expect(janitor.clean(html)).toBe('<blockquote></blockquote>'); | ||
}); | ||
const html = '<div>Hello <div>world</div></div>'; | ||
expect(janitor.clean(html)).toBe('<div>Hello <div>world</div></div>'); | ||
}); | ||
}); | ||
describe('janitor that allows nested block elements', function () { | ||
var janitor; | ||
var config = { | ||
tags: { | ||
div: {} | ||
}, | ||
keepNestedBlockElements: true | ||
}; | ||
beforeEach(function () { | ||
janitor = new HTMLJanitor(config); | ||
}); | ||
it('should allow nested block elements', function() { | ||
var html = '<div>Hello <div>world</div></div>'; | ||
expect(janitor.clean(html)).toBe('<div>Hello <div>world</div></div>'); | ||
}); | ||
}); | ||
describe('janitor with invalid configuration', function() { | ||
var config = { | ||
tags: { | ||
strong: 53 | ||
} | ||
}; | ||
it('should throw an Error on invalid configuration', function() { | ||
expect(function() {new HTMLJanitor(config)}).toThrow(new Error('The configuration was invalid')); | ||
}); | ||
}); | ||
test('janitor with invalid configuration', function() { | ||
const config = { | ||
tags: { | ||
strong: 53 | ||
} | ||
}; | ||
expect(function() {new HTMLJanitor(config)}).toThrow(new Error('The configuration was invalid')); | ||
}); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
1
102
0
29987
7
344
1