html-janitor
Advanced tools
Comparing version 0.2.0 to 0.3.1
{ | ||
"name": "html-janitor", | ||
"version": "0.2.0", | ||
"homepage": "https://github.com/guardian/html-janitor", | ||
"main": "src/html-janitor.js", | ||
"authors": [ | ||
"Oliver Joseph Ash <oliverjash@icloud.com>" | ||
], | ||
"license": "MIT", | ||
"ignore": [ | ||
"**/.*", | ||
"node_modules", | ||
"bower_components", | ||
"test", | ||
"tests" | ||
], | ||
"devDependencies": { | ||
"requirejs": "~2.1.9" | ||
"requirejs": "2.1.9" | ||
} | ||
} |
{ | ||
"name": "html-janitor", | ||
"version": "0.2.0", | ||
"description": "", | ||
"version": "0.3.1", | ||
"main": "src/html-janitor.js", | ||
@@ -9,22 +8,15 @@ "scripts": { | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/guardian/html-janitor.git" | ||
}, | ||
"author": "Oliver Joseph Ash <oliverjash@icloud.com>", | ||
"license": "MIT", | ||
"bugs": { | ||
"url": "https://github.com/guardian/html-janitor/issues" | ||
}, | ||
"devDependencies": { | ||
"karma-script-launcher": "~0.1.0", | ||
"karma-firefox-launcher": "~0.1.0", | ||
"karma": "~0.10.2", | ||
"karma-chrome-launcher": "~0.1.0", | ||
"karma-html2js-preprocessor": "~0.1.0", | ||
"karma-jasmine": "~0.1.3", | ||
"karma-requirejs": "~0.1.0", | ||
"karma-coffee-preprocessor": "~0.1.0", | ||
"karma-phantomjs-launcher": "~0.1.0", | ||
"karma": "~0.10.2" | ||
"karma-requirejs": "~0.2.0", | ||
"plumber": "~0.4.0", | ||
"plumber-all": "~0.4.0", | ||
"plumber-glob": "~0.4.0", | ||
"plumber-requirejs": "~0.4.0", | ||
"plumber-uglifyjs": "~0.4.0", | ||
"plumber-write": "~0.4.0" | ||
} | ||
} |
@@ -1,7 +0,8 @@ | ||
// UMD | ||
(function (root, factory) { | ||
if (typeof define === 'function' && define.amd) { | ||
define(factory); | ||
define('html-janitor', factory); | ||
} else if (typeof exports === 'object') { | ||
module.exports = factory(); | ||
} else { | ||
root.amdWeb = factory(); | ||
root.HTMLJanitor = factory(); | ||
} | ||
@@ -14,2 +15,8 @@ }(this, function () { | ||
// TODO: not exhaustive? | ||
var blockElementNames = ['P', 'LI', 'DIV']; | ||
function isBlockElement(node) { | ||
return blockElementNames.indexOf(node.nodeName) !== -1; | ||
} | ||
HTMLJanitor.prototype.clean = function (html) { | ||
@@ -33,14 +40,35 @@ var sandbox = document.createElement('div'); | ||
// Ignore text nodes and nodes that have already been sanitized | ||
if (node.nodeType === 3 || node._sanitized) { | ||
// Ignore nodes that have already been sanitized | ||
if (node._sanitized) { | ||
continue; | ||
} | ||
if (node.nodeType === Node.TEXT_NODE) { | ||
// If this text node is just whitespace and the previous or next element | ||
// sibling is a block element, remove it | ||
// N.B.: This heuristic could change. Very specific to a bug with | ||
// `contenteditable` in Firefox: http://jsbin.com/EyuKase/1/edit?js,output | ||
// FIXME: make this an option? | ||
if (node.data.trim() === '' | ||
&& ((node.previousElementSibling && isBlockElement(node.previousElementSibling)) | ||
|| (node.nextElementSibling && isBlockElement(node.nextElementSibling)))) { | ||
parentNode.removeChild(node); | ||
this._sanitize(parentNode); | ||
break; | ||
} else { | ||
continue; | ||
} | ||
} | ||
// Remove all comments | ||
if (node.nodeType === Node.COMMENT_NODE) { | ||
parentNode.removeChild(node); | ||
this._sanitize(parentNode); | ||
break; | ||
} | ||
var isInlineElement = nodeName === 'b'; | ||
var containsBlockElement; | ||
if (isInlineElement) { | ||
containsBlockElement = Array.prototype.some.call(node.childNodes, function (childNode) { | ||
// TODO: test other block elements | ||
return childNode.nodeName === 'P'; | ||
}); | ||
containsBlockElement = Array.prototype.some.call(node.childNodes, isBlockElement); | ||
} | ||
@@ -50,5 +78,15 @@ | ||
// Block elements should not be nested (e.g. <li><p>...); if | ||
// they are, we want to unwrap the inner block element. | ||
var isNotTopContainer = !! parentNode.parentNode; | ||
// TODO: Don't hardcore this — this is not invalid markup. Should be | ||
// configurable. | ||
var isNestedBlockElement = | ||
isBlockElement(parentNode) && | ||
isBlockElement(node) && | ||
isNotTopContainer; | ||
// Drop tag entirely according to the whitelist *and* if the markup | ||
// is invalid. | ||
if (!this.config.tags[nodeName] || isInvalid) { | ||
if (!this.config.tags[nodeName] || isInvalid || isNestedBlockElement) { | ||
// Do not keep the inner text of SCRIPT/STYLE elements. | ||
@@ -87,7 +125,8 @@ if (! (node.nodeName === 'SCRIPT' || node.nodeName === 'STYLE')) { | ||
node._sanitized = true; | ||
} while (node = treeWalker.nextSibling()); | ||
} while ((node = treeWalker.nextSibling())); | ||
}; | ||
function createTreeWalker(node) { | ||
return document.createTreeWalker(node, NodeFilter.SHOW_ELEMENT); | ||
return document.createTreeWalker(node, | ||
NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT); | ||
} | ||
@@ -94,0 +133,0 @@ |
@@ -8,3 +8,5 @@ define([ 'html-janitor' ], function (HTMLJanitor) { | ||
b: {}, | ||
p: { foo: true, bar: 'baz' } | ||
p: { foo: true, bar: 'baz' }, | ||
ul: {}, | ||
li: {} | ||
} | ||
@@ -58,4 +60,31 @@ | ||
}); | ||
it('should clean paragraphs in lists', function () { | ||
var ul = document.createElement('ul'); | ||
ul.innerHTML = '<li><p>Some text</p></li>'; | ||
expect(janitor.clean(ul.outerHTML)).toBe('<ul><li>Some text</li></ul>'); | ||
}); | ||
it('should remove comments', function () { | ||
var p = document.createElement('p'); | ||
p.innerHTML = 'Hello <b>world</b> <!-- a salutation -->!'; | ||
expect(janitor.clean(p.outerHTML)).toBe('<p>Hello <b>world</b> !</p>'); | ||
}); | ||
it('should remove text nodes in-between block elements', function () { | ||
var html = '<p></p>\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p><p></p>'); | ||
}); | ||
it('should remove text nodes before block elements', function () { | ||
var html = '\n<p></p>'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
it('should remove text nodes after block elements', function () { | ||
var html = '<p></p>\n'; | ||
expect(janitor.clean(html)).toBe('<p></p>'); | ||
}); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
No contributors or author data
MaintenancePackage does not specify a list of contributors or an author in package.json.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
No README
QualityPackage does not have a README. This may indicate a failed publish or a low quality package.
Found 1 instance in 1 package
22779
15
279
0
15
11
3
3
1