clean-html
Advanced tools
Comparing version 1.5.0 to 2.0.0
92
cmd.js
#!/usr/bin/env node | ||
var cleaner = require('./index.js'), | ||
fs = require('fs'), | ||
parseArgs = require('minimist'), | ||
argv = parseArgs(process.argv.slice(2)), | ||
filename = argv['_'][0], | ||
inPlace = getOptAsBool(argv['in-place']), | ||
options = { | ||
'break-around-comments': getOptAsBool(argv['break-around-comments']), | ||
'break-around-tags': getOptAsArray(argv['break-around-tags']), | ||
'indent': argv['indent'], | ||
'remove-attributes': getOptAsArray(argv['remove-attributes']), | ||
'remove-comments': getOptAsBool(argv['remove-comments']), | ||
'remove-empty-tags': getOptAsArray(argv['remove-empty-tags']), | ||
'remove-tags': getOptAsArray(argv['remove-tags']), | ||
'replace-nbsp': getOptAsBool(argv['replace-nbsp']), | ||
'wrap': getOptAsInt(argv['wrap']), | ||
'add-break-around-tags': getOptAsArray(argv['add-break-around-tags']), | ||
'add-remove-attributes': getOptAsArray(argv['add-remove-attributes']), | ||
'add-remove-tags': getOptAsArray(argv['add-remove-tags']) | ||
}; | ||
const fs = require('node:fs'); | ||
const parseArgs = require('minimist'); | ||
const cleaner = require('./index.js'); | ||
const argv = parseArgs(process.argv.slice(2)); | ||
const filename = argv['_'][0]; | ||
const inPlace = getOptAsBool(argv['in-place']); | ||
const options = { | ||
'allow-attributes-without-values': getOptAsBool(argv['allow-attributes-without-values']), | ||
'break-around-comments': getOptAsBool(argv['break-around-comments']), | ||
'break-around-tags': getOptAsArray(argv['break-around-tags']), | ||
'decode-entities': getOptAsBool(argv['decode-entities']), | ||
'indent': argv['indent'], | ||
'lower-case-tags': getOptAsBool(argv['lower-case-tags']), | ||
'lower-case-attribute-names': getOptAsBool(argv['lower-case-attribute-names']), | ||
'preserve-tags': getOptAsArray(argv['preserve-tags']), | ||
'remove-attributes': getOptAsArray(argv['remove-attributes']), | ||
'remove-comments': getOptAsBool(argv['remove-comments']), | ||
'remove-empty-tags': getOptAsArray(argv['remove-empty-tags']), | ||
'remove-tags': getOptAsArray(argv['remove-tags']), | ||
'wrap': getOptAsInt(argv['wrap']), | ||
'add-break-around-tags': getOptAsArray(argv['add-break-around-tags']), | ||
'add-remove-attributes': getOptAsArray(argv['add-remove-attributes']), | ||
'add-remove-tags': getOptAsArray(argv['add-remove-tags']) | ||
}; | ||
function getOptAsArray(opt) { | ||
@@ -30,7 +38,5 @@ if (opt === undefined) { | ||
if (Array.isArray(opt)) { | ||
return opt.map(function (o) { | ||
return o.split(','); | ||
}).reduce(function (prev, curr) { | ||
return prev.concat(curr); | ||
}); | ||
return opt | ||
.map(o => o.split(',')) | ||
.reduce((prev, curr) => prev.concat(curr)); | ||
} | ||
@@ -54,3 +60,3 @@ | ||
var val = parseInt(opt); | ||
const val = parseInt(opt); | ||
@@ -61,13 +67,7 @@ return isNaN(val) ? undefined : val; | ||
function read(filename, callback) { | ||
if (filename) { | ||
return fs.readFile(filename, function (err, data) { | ||
if (err) { | ||
throw err; | ||
} | ||
return fs.readFile(filename, 'utf8', (err, data) => { | ||
if (err) { | ||
throw err; | ||
} | ||
callback(data); | ||
}); | ||
} | ||
process.stdin.on('data', function (data) { | ||
callback(data); | ||
@@ -78,15 +78,11 @@ }); | ||
function write(html, filename) { | ||
if (filename) { | ||
return fs.writeFile(filename, html, function (err) { | ||
if (err) { | ||
throw err; | ||
} | ||
}); | ||
} | ||
process.stdout.write(html + '\n'); | ||
return fs.writeFile(filename, html + '\n', err => { | ||
if (err) { | ||
throw err; | ||
} | ||
}); | ||
} | ||
read(filename, function (data) { | ||
cleaner.clean(data, options, function (html) { | ||
read(filename || process.stdin.fd, data => { | ||
cleaner.clean(data, options, html => { | ||
if (filename && inPlace) { | ||
@@ -96,4 +92,4 @@ return write(html, filename); | ||
write(html); | ||
write(html, process.stdout.fd); | ||
}); | ||
}); |
238
index.js
@@ -1,42 +0,41 @@ | ||
var htmlparser = require('htmlparser2'), | ||
unsupportedTags = [ | ||
'script', | ||
'style' | ||
], | ||
voidElements = [ | ||
'area', | ||
'base', | ||
'basefont', | ||
'br', | ||
'col', | ||
'command', | ||
'embed', | ||
'frame', | ||
'hr', | ||
'img', | ||
'input', | ||
'isindex', | ||
'keygen', | ||
'link', | ||
'meta', | ||
'param', | ||
'source', | ||
'track', | ||
'wbr', | ||
const htmlparser = require('htmlparser2'); | ||
// common self closing svg elements | ||
'circle', | ||
'ellipse', | ||
'line', | ||
'path', | ||
'polygon', | ||
'polyline', | ||
'rect', | ||
'stop', | ||
'use' | ||
], | ||
options = {}; | ||
const voidElements = [ | ||
'area', | ||
'base', | ||
'basefont', | ||
'br', | ||
'col', | ||
'command', | ||
'embed', | ||
'frame', | ||
'hr', | ||
'img', | ||
'input', | ||
'isindex', | ||
'keygen', | ||
'link', | ||
'meta', | ||
'param', | ||
'source', | ||
'track', | ||
'wbr', | ||
// common self closing svg elements | ||
'circle', | ||
'ellipse', | ||
'line', | ||
'path', | ||
'polygon', | ||
'polyline', | ||
'rect', | ||
'stop', | ||
'use' | ||
]; | ||
let options = {}; | ||
function setup(opt) { | ||
options = { | ||
'allow-attributes-without-values': false, | ||
'break-around-comments': true, | ||
@@ -64,3 +63,10 @@ 'break-around-tags': [ | ||
], | ||
'decode-entities': false, | ||
'indent': ' ', | ||
'lower-case-tags': true, | ||
'lower-case-attribute-names': true, | ||
'preserve-tags': [ | ||
'script', | ||
'style' | ||
], | ||
'remove-attributes': [ | ||
@@ -84,3 +90,2 @@ 'align', | ||
], | ||
'replace-nbsp': false, | ||
'wrap': 120 | ||
@@ -93,5 +98,10 @@ }; | ||
options['allow-attributes-without-values'] = opt['allow-attributes-without-values'] === true ? true : false; | ||
options['break-around-comments'] = opt['break-around-comments'] === false ? false : true; | ||
options['break-around-tags'] = opt['break-around-tags'] || options['break-around-tags']; | ||
options['decode-entities'] = opt['decode-entities'] === true ? true : false; | ||
options['indent'] = opt['indent'] || options['indent']; | ||
options['lower-case-tags'] = opt['lower-case-tags'] === false ? false : true; | ||
options['lower-case-attribute-names'] = opt['lower-case-attribute-names'] === false ? false : true; | ||
options['preserve-tags'] = opt['preserve-tags'] || options['preserve-tags']; | ||
options['remove-attributes'] = opt['remove-attributes'] || options['remove-attributes']; | ||
@@ -101,3 +111,2 @@ options['remove-comments'] = opt['remove-comments'] === true ? true : false; | ||
options['remove-tags'] = opt['remove-tags'] || options['remove-tags']; | ||
options['replace-nbsp'] = opt['replace-nbsp'] === true ? true : false; | ||
options['wrap'] = opt['wrap'] >= 0 ? opt['wrap'] : options['wrap']; | ||
@@ -131,3 +140,3 @@ | ||
if (options['break-around-tags'].indexOf(node.name) != -1) { | ||
if (options['break-around-tags'].includes(node.name)) { | ||
return true; | ||
@@ -153,6 +162,2 @@ } | ||
if (node.type == 'text') { | ||
if (options['replace-nbsp']) { | ||
!node.data.replace(/ /g, ' ').trim(); | ||
} | ||
return !node.data.trim(); | ||
@@ -189,7 +194,5 @@ } | ||
function isListedInOptions(optionsArrayName, name) { | ||
var matches = options[optionsArrayName].filter(function(option) { | ||
return options[optionsArrayName].some(option => { | ||
return option instanceof RegExp && option.test(name) || option === name; | ||
}); | ||
return !!matches.length; | ||
} | ||
@@ -202,8 +205,4 @@ | ||
var text = removeExtraSpace(node.data); | ||
let text = removeExtraSpace(node.data); | ||
if (options['replace-nbsp']) { | ||
text = text.replace(/ /g, ' '); | ||
} | ||
if (!node.prev || breakAround(node.prev)) { | ||
@@ -225,3 +224,3 @@ text = text.trimLeft(); | ||
var comment = '<!--' + removeExtraSpace(node.data) + '-->'; | ||
const comment = '<!--' + removeExtraSpace(node.data) + '-->'; | ||
@@ -236,6 +235,2 @@ if (breakAround(node)) { | ||
function renderTag(node) { | ||
if (unsupportedTags.indexOf(node.name) != -1) { | ||
return ''; | ||
} | ||
if (shouldRemove(node)) { | ||
@@ -249,7 +244,11 @@ if (isEmpty(node)) { | ||
var openTag = '<' + node.name; | ||
let openTag = '<' + node.name; | ||
for (var attrib in node.attribs) { | ||
for (let attrib in node.attribs) { | ||
if (!isListedInOptions('remove-attributes', attrib)) { | ||
openTag += ' ' + attrib + '="' + removeExtraSpace(node.attribs[attrib]) + '"'; | ||
if (!node.attribs[attrib] && options['allow-attributes-without-values']) { | ||
openTag += ' ' + attrib; | ||
} else { | ||
openTag += ` ${attrib}="${removeExtraSpace(node.attribs[attrib])}"`; | ||
} | ||
} | ||
@@ -260,3 +259,3 @@ } | ||
if (voidElements.indexOf(node.name) != -1) { | ||
if (voidElements.includes(node.name)) { | ||
if (breakAround(node)) { | ||
@@ -269,3 +268,3 @@ return '\n' + openTag + '\n'; | ||
var closeTag = '</' + node.name + '>'; | ||
let closeTag = '</' + node.name + '>'; | ||
@@ -290,5 +289,5 @@ if (breakAround(node)) { | ||
function render(nodes) { | ||
var html = ''; | ||
let html = ''; | ||
nodes.forEach(function (node) { | ||
nodes.forEach(node => { | ||
if (node.type == 'root') { | ||
@@ -321,15 +320,5 @@ html += render(node.children); | ||
function getIndent(indentLevel) { | ||
var indent = ''; | ||
for (var i = 0; i < indentLevel; i++) { | ||
indent += options['indent']; | ||
} | ||
return indent; | ||
} | ||
function wrap(line, indent) { | ||
// find the last space before the column limit | ||
var bound = line.lastIndexOf(' ', options['wrap']); | ||
let bound = line.lastIndexOf(' ', options['wrap']); | ||
@@ -348,4 +337,4 @@ if (bound == -1) { | ||
var line1 = line.substr(0, bound), | ||
line2 = indent + options['indent'].repeat(2) + line.substr(bound + 1); | ||
const line1 = line.substr(0, bound); | ||
let line2 = indent + options['indent'].repeat(2) + line.substr(bound + 1); | ||
@@ -366,46 +355,50 @@ if (line1.trim().length == 0) { | ||
function indent(html) { | ||
var indentLevel = 0; | ||
let indentLevel = 0; | ||
const openTagRe = /^<(\w+)[^>]*>$/; | ||
const closeTagRe = /^<\/(\w+)>$/; | ||
return html.replace(/.*\n/g, function (line) { | ||
var openTags = [], | ||
result, | ||
tagRegEx = /<\/?(\w+).*?>/g, | ||
tag, | ||
tagName; | ||
return html.split('\n').map(line => { | ||
const closeTagMatch = line.match(closeTagRe); | ||
while (result = tagRegEx.exec(line)) { | ||
// don't increase indent if tag is inside a comment | ||
if (line.lastIndexOf('<!--', result.index) < result.index | ||
&& line.indexOf('-->', result.index) > result.index) { | ||
continue; | ||
} | ||
if (closeTagMatch) { | ||
indentLevel--; | ||
} | ||
tag = result[0]; | ||
tagName = result[1]; | ||
const indent = options['indent'].repeat(indentLevel); | ||
const indented = indent + line; | ||
if (voidElements.indexOf(tagName) != -1) { | ||
continue; | ||
} | ||
const openTagMatch = line.match(openTagRe); | ||
if (tag.indexOf('</') == -1) { | ||
openTags.push(tag); | ||
indentLevel++; | ||
} else { | ||
openTags.pop(); | ||
indentLevel--; | ||
} | ||
if (openTagMatch && !voidElements.includes(openTagMatch[1])) { | ||
indentLevel++; | ||
} | ||
var indent = getIndent(indentLevel - openTags.length); | ||
if (options['wrap'] && indented.length > options['wrap']) { | ||
return wrap(indented, indent); | ||
} | ||
line = indent + line; | ||
return indented; | ||
}).join('\n'); | ||
} | ||
if (options['wrap'] && line.length > options['wrap']) { | ||
line = wrap(line, indent); | ||
} | ||
const preserveTagReplacements = {}; | ||
return line; | ||
function preserveTags(html) { | ||
const tagPattern = options['preserve-tags'].join('|'); | ||
const re = new RegExp(`<(?:${tagPattern})[^>]*>.*?<\/(?:${tagPattern})>`, 'gs'); | ||
return html.replace(re, (match, offset) => { | ||
preserveTagReplacements[offset] = match; | ||
return `<meta name="clean-html-replacement" offset="${offset}">`; | ||
}); | ||
} | ||
function undoPreserveTags(html) { | ||
const re = /<meta name="clean-html-replacement" offset="(\d+)">/g; | ||
return html.replace(re, (_, offset) => { | ||
return preserveTagReplacements[offset]; | ||
}); | ||
} | ||
function clean(html, opt, callback) { | ||
@@ -419,3 +412,3 @@ if (typeof opt == 'function') { | ||
var handler = new htmlparser.DomHandler(function (err, dom) { | ||
const handler = new htmlparser.DomHandler((err, dom) => { | ||
if (err) { | ||
@@ -425,15 +418,24 @@ throw err; | ||
var html = render(dom); | ||
html = indent(html).trim(); | ||
callback( | ||
undoPreserveTags( | ||
indent( | ||
render(dom) | ||
).trim() | ||
) | ||
); | ||
}); | ||
callback(html); | ||
const parser = new htmlparser.Parser(handler, { | ||
decodeEntities: options['decode-entities'], | ||
lowerCaseTags: options['lower-case-tags'], | ||
lowerCaseAttributeNames: options['lower-case-attribute-names'], | ||
}); | ||
var parser = new htmlparser.Parser(handler); | ||
parser.write(html); | ||
parser.done(); | ||
parser.write( | ||
preserveTags(html) | ||
); | ||
parser.end(); | ||
} | ||
module.exports = { | ||
clean: clean | ||
}; | ||
module.exports = {clean}; |
{ | ||
"name": "clean-html", | ||
"version": "1.5.0", | ||
"version": "2.0.0", | ||
"description": "HTML cleaner and beautifier", | ||
@@ -8,6 +8,13 @@ "main": "index.js", | ||
"dependencies": { | ||
"htmlparser2": "^3.8.2", | ||
"minimist": "^1.1.1" | ||
"htmlparser2": "^8.0.2", | ||
"minimist": "^1.2.8" | ||
}, | ||
"devDependencies": {}, | ||
"files": [ | ||
"cmd.js", | ||
"index.js", | ||
"package.json", | ||
"README.md", | ||
"release-notes.md", | ||
"UNLICENSE" | ||
], | ||
"scripts": { | ||
@@ -14,0 +21,0 @@ "test": "node test.js" |
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
0
18631
5
412
213
1
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.1.0(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhtmlparser2@8.0.2(transitive)
- Removeddom-serializer@0.2.2(transitive)
- Removeddomelementtype@1.3.1(transitive)
- Removeddomhandler@2.4.2(transitive)
- Removeddomutils@1.7.0(transitive)
- Removedentities@1.1.22.2.0(transitive)
- Removedhtmlparser2@3.10.1(transitive)
- Removedinherits@2.0.4(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedutil-deprecate@1.0.2(transitive)
Updatedhtmlparser2@^8.0.2
Updatedminimist@^1.2.8