document-write-html
Advanced tools
Comparing version 0.1.3 to 0.2.0
@@ -7,2 +7,3 @@ /* jshint quotmark:false*/ | ||
.replace(/\\"|"/g, '\\"') | ||
.replace(/"/g, '\\"') | ||
.replace(/\\&/g, '\\&') | ||
@@ -9,0 +10,0 @@ .replace(/\\r|\r/g, '\\r') |
329
lib/index.js
@@ -0,1 +1,3 @@ | ||
var escape = require('./escape.js'); | ||
var urlLib = require('url'); | ||
//var log = require('./logger.js'); | ||
@@ -5,19 +7,22 @@ var cssParser = require('css-parse'); | ||
var CleanCSS = require('clean-css'); | ||
var htmlparser = require('htmlparser2'); | ||
var toHtml = require('htmlparser-to-html'); | ||
var urlLib = require('url'); | ||
var escape = require('./escape.js'); | ||
// var toHtml = require('htmlparser-to-html'); | ||
var toHtml = require('./toHtml.js'); | ||
var select = require('CSSselect'); | ||
function toDocumentWrite(str, tag) { | ||
function toDocumentWrite(str, wrapInTag) { | ||
var output = ['document.write(\'']; | ||
if (tag){ | ||
if (tag === 'script'){ | ||
tag = 'scr\'+\'ipt'; | ||
if (wrapInTag){ | ||
if (wrapInTag === 'script'){ | ||
wrapInTag = 'scr\'+\'ipt'; | ||
} | ||
output.push('<'+tag+'>'); | ||
output.push('<'+wrapInTag+'>'); | ||
} | ||
output.push(escape(str)); | ||
if (tag){ | ||
output.push('</'+tag+'>'); | ||
// __MANGLE__ hack ;P | ||
output.push(escape(str).replace(/__MANGLE__/mig, '\'+\'')); | ||
if (wrapInTag){ | ||
output.push('</'+wrapInTag+'>'); | ||
} | ||
@@ -59,3 +64,3 @@ output.push('\');'); | ||
function parseAndResolve(base, html) { | ||
function resolve(base, html) { | ||
var REG_EXP_ABS_URL = /^(\/\/|http)/i; | ||
@@ -79,166 +84,210 @@ | ||
function parse(str, resourceUrlHandler) { | ||
var output = { | ||
pre: '', | ||
content: '', | ||
style: '', | ||
script: '', | ||
full: '' | ||
var TAGS_WITH_RESOURCE_REFS = [ | ||
{ | ||
select: 'base', | ||
'tagHandler': createAttributeHandler('href') | ||
}, | ||
{ | ||
select: 'script', | ||
'tagHandler': createAttributeHandler('src') | ||
}, | ||
{ | ||
select: 'style', | ||
'tagHandler': createStyleTagResourceHandler | ||
}, | ||
{ | ||
select: 'link', | ||
'tagHandler': createAttributeHandler('href') | ||
}, | ||
{ | ||
select: 'img', | ||
'tagHandler': createAttributeHandler('src') | ||
}, | ||
{ | ||
select: '[style]', | ||
'tagHandler': createStyleAttributeResourceHandler | ||
} | ||
]; | ||
function resolveResources(dom, _resourceUrlHandler){ | ||
TAGS_WITH_RESOURCE_REFS.forEach(function(rule){ | ||
var handler = rule.tagHandler(_resourceUrlHandler); | ||
select(rule.select, dom).forEach(function(tag){ | ||
handler(tag); | ||
}); | ||
}); | ||
} | ||
function createAttributeHandler(key){ | ||
return function(fn){ | ||
return function(tag){ | ||
if (fn && tag.attribs && tag.attribs[key] ) { | ||
tag.attribs[key] = fn(tag.attribs[key]); | ||
} | ||
}; | ||
}; | ||
} | ||
var handler = new htmlparser.DomHandler(function (err, dom) { | ||
if (err) { | ||
throw err; | ||
} | ||
function createStyleAttributeResourceHandler(resourceUrlHandler){ | ||
return function (domTag) { | ||
var style = domTag.attribs.style; | ||
var baseTagFound = false; | ||
select('base', dom).forEach(function (baseTag) { | ||
baseTagFound = true; | ||
if ( resourceUrlHandler && baseTag.attribs && baseTag.attribs.href ) { | ||
baseTag.attribs.href = resourceUrlHandler(baseTag.attribs.href); | ||
//output.content += toHtml(baseTag); | ||
//baseTag.remove() | ||
function replaceHandler(url) { | ||
if (!url || typeof url !== 'string'){ | ||
return; | ||
} | ||
}); | ||
if (resourceUrlHandler && baseTagFound === false){ | ||
output.pre += '<base href="'+resourceUrlHandler('./')+'" />'; | ||
var newUrl = resourceUrlHandler(url); | ||
if (url !== newUrl) { | ||
domTag.attribs.style = domTag.attribs.style.replace(url, newUrl); | ||
} | ||
} | ||
select('script', dom).forEach(function (scriptTag) { | ||
if (resourceUrlHandler && scriptTag.attribs && scriptTag.attribs.src) { | ||
scriptTag.attribs.src = resourceUrlHandler(scriptTag.attribs.src); | ||
if (resourceUrlHandler && style) { | ||
var result; | ||
try { | ||
result = cssParser('#DUMMY{' + style + '}'); | ||
replaceCSSUrlHandler(result.stylesheet.rules[0].declarations, replaceHandler); | ||
} catch (e) { | ||
//console.log(e); | ||
} | ||
}); | ||
} | ||
}; | ||
} | ||
select('style', dom).forEach(function (styleTag) { | ||
function replaceHandler(url, entry) { | ||
if (!url || typeof url !== 'string'){ | ||
return; | ||
} | ||
var newUrl = resourceUrlHandler(url); | ||
if (url !== newUrl) { | ||
entry.value = entry.value.replace(new RegExp('url\\([\"|\'|\\\']?'+url+'[\"|\'|\\\']?\\)', 'm'), 'url(\"'+newUrl+'\")'); | ||
} | ||
function createStyleTagResourceHandler(resourceUrlHandler){ | ||
return function (tag) { | ||
function CSSURLReplaceHandler(url, entry) { | ||
if (!url || typeof url !== 'string'){ | ||
return; | ||
} | ||
var newUrl = resourceUrlHandler(url); | ||
if (url !== newUrl) { | ||
entry.value = entry.value.replace(new RegExp('url\\([\"|\'|\\\']?'+url+'[\"|\'|\\\']?\\)', 'm'), 'url(\"'+newUrl+'\")'); | ||
} | ||
} | ||
function rulesHandler(rule){ | ||
if (rule.declarations){ | ||
replaceCSSUrlHandler(rule.declarations, replaceHandler); | ||
} | ||
if (rule.rules && rule.rules.length > 0){ | ||
rule.rules.forEach(rulesHandler); | ||
} | ||
function rulesHandler(rule){ | ||
if (rule.declarations){ | ||
replaceCSSUrlHandler(rule.declarations, CSSURLReplaceHandler); | ||
} | ||
if (rule.rules && rule.rules.length > 0){ | ||
rule.rules.forEach(rulesHandler); | ||
} | ||
} | ||
if (resourceUrlHandler && styleTag) { | ||
var result; | ||
if (resourceUrlHandler && tag) { | ||
var result; | ||
try { | ||
result = cssParser(toHtml(styleTag.children)); | ||
result.stylesheet.rules.forEach(rulesHandler); | ||
} catch (e) { | ||
} | ||
if (result){ | ||
styleTag.children = cssStringify(result); | ||
} | ||
try { | ||
result = cssParser(toHtml(tag.children)); | ||
result.stylesheet.rules.forEach(rulesHandler); | ||
} catch (e) { | ||
console.log('\nERROR:', e); | ||
} | ||
}); | ||
if (result){ | ||
tag.children[0].data = cssStringify(result); | ||
} | ||
} | ||
}; | ||
} | ||
var TAGS_TO_DELETE = ['title', 'meta']; | ||
var TAGS_TO_REMOVE = ['html', 'head', 'body']; | ||
function filterOutResources(dom){ | ||
var filtered = []; | ||
var after = []; | ||
// filter out top level | ||
dom.forEach(filterHandler); | ||
var head = select('head', dom)[0]; | ||
if (head) { | ||
select('link', head).forEach(function (linkTag) { | ||
if ( resourceUrlHandler && linkTag.attribs && linkTag.attribs.href ) { | ||
linkTag.attribs.href = resourceUrlHandler(linkTag.attribs.href); | ||
} | ||
output.content += toHtml(linkTag); | ||
function filterHandler(entry){ | ||
if (entry.type === 'comment'){ | ||
return; | ||
} | ||
if (entry.type === 'tag' && entry.name === 'body' && !!entry.attribs.onload){ | ||
// | ||
var newEntry = { | ||
type: 'script', | ||
name: 'script', | ||
attribs: { type: 'text/javascript' }, | ||
children: [], | ||
parent: null | ||
}; | ||
newEntry.children.push({ | ||
data: entry.attribs.onload.replace(/^javascript\:/, ''), | ||
type: 'text', | ||
parent: newEntry | ||
}); | ||
after.push(newEntry); | ||
} | ||
select('style', head).forEach(function (styleTag) { | ||
output.style += toHtml(styleTag.children); | ||
}); | ||
if (entry.type === 'tag' && TAGS_TO_DELETE.indexOf(entry.name) >-1){ | ||
// lets ignore | ||
return; | ||
} | ||
if (entry.type === 'tag' && TAGS_TO_REMOVE.indexOf(entry.name) >-1){ | ||
// lets copy children | ||
entry.children.forEach(filterHandler); | ||
return; | ||
} | ||
select('script', head).forEach(function (scriptTag) { | ||
if (scriptTag.attribs && scriptTag.attribs.src) { | ||
output.content += toHtml(scriptTag); | ||
} else { | ||
output.script += toHtml(scriptTag.children); | ||
} | ||
}); | ||
if (entry.type === 'directive' && entry.name === '!doctype'){ | ||
// ignore | ||
return; | ||
} | ||
select('img', dom).forEach(function (imgTag) { | ||
if (resourceUrlHandler) { | ||
imgTag.attribs.src = resourceUrlHandler(imgTag.attribs.src); | ||
} | ||
}); | ||
filtered.push(entry); | ||
} | ||
select('[style]', dom).forEach(function (tag) { | ||
var style = tag.attribs.style; | ||
function replaceHandler(url) { | ||
if (!url || typeof url !== 'string'){ | ||
return; | ||
} | ||
var newUrl = resourceUrlHandler(url); | ||
if (url !== newUrl) { | ||
tag.attribs.style = tag.attribs.style.replace(url, newUrl); | ||
} | ||
} | ||
filtered = filtered.filter(filterEmptyContent); | ||
if (resourceUrlHandler && style) { | ||
var result; | ||
try { | ||
result = cssParser('#DUMMY{' + style + '}'); | ||
replaceCSSUrlHandler(result.stylesheet.rules[0].declarations, replaceHandler); | ||
} catch (e) { | ||
//console.log(e); | ||
} | ||
} | ||
}); | ||
function filterEmptyContent(entry){ | ||
if (entry.type === 'text' && entry.data.trim() === ''){ | ||
// remove newlines and tabs | ||
return false; | ||
} | ||
return true; | ||
} | ||
var body = select('body', dom)[0]; | ||
if (body) { | ||
output.content += toHtml(body.children); | ||
} else { | ||
output.content += toHtml(dom); | ||
return filtered.concat(after); | ||
} | ||
function parse(str, resourceUrlHandler) { | ||
var res; | ||
runParser(str, function (err, dom) { | ||
if (err) { | ||
throw err; | ||
} | ||
var pre = ''; | ||
var baseTagFound = select('base', dom).length >= 1; | ||
// output document with rewritten urls | ||
output.full = toHtml(dom); | ||
}); | ||
if (resourceUrlHandler && baseTagFound === false){ | ||
pre += '<base href="'+resourceUrlHandler('./')+'" />'; | ||
} | ||
new htmlparser.Parser(handler).parseComplete(str); | ||
resolveResources(dom, resourceUrlHandler); | ||
return { | ||
'pre': output.pre, | ||
'content': output.content, | ||
'style': output.style, | ||
'script': output.script, | ||
'full': output.full | ||
}; | ||
dom = filterOutResources(dom); | ||
// console.log('--------------------> DOM::', dom); | ||
// console.log('--------------------> OUTPUT::', toHtml(dom)); | ||
res = pre + toHtml(dom); | ||
}); | ||
return res; | ||
} | ||
function toScriptTag(base, html, str){ | ||
str = ''; | ||
var trimmed = parseAndResolve(base, html); | ||
if (trimmed.pre){ | ||
str += toDocumentWrite(trimmed.pre); | ||
} | ||
if (trimmed.style) { | ||
str += toDocumentWrite('<style>' + minifyCSS(trimmed.style) + '</style>'); | ||
} | ||
str += toDocumentWrite(trimmed.content); | ||
if (trimmed.script){ | ||
str += toDocumentWrite('<scr\'+\'ipt>' + trimmed.script + '</scr\'+\'ipt>'); | ||
} | ||
return str; | ||
function runParser(str, domHandler){ | ||
new htmlparser.Parser(new htmlparser.DomHandler(domHandler)).parseComplete(str); | ||
} | ||
function toScriptTag(base, html){ | ||
return '<script>'+toDocumentWrite(resolve(base, html))+'</script>'; | ||
} | ||
module.exports = { | ||
@@ -248,4 +297,4 @@ 'escape': escape, | ||
'toDocumentWrite': toDocumentWrite, | ||
'parseAndResolve': parseAndResolve, | ||
'parseAndResolve': resolve, | ||
'toScriptTag': toScriptTag | ||
}; |
{ | ||
"name": "document-write-html", | ||
"version": "0.1.3", | ||
"version": "0.2.0", | ||
"description": "Convert a index.html page into document.writes. Why? Because most ad delivery systems do this - so nice to emulate *bad behaviour* for testing.", | ||
@@ -26,18 +26,18 @@ "main": "lib/index.js", | ||
"dependencies": { | ||
"htmlparser-to-html": "0.0.5", | ||
"css-parse": "^1.7.0", | ||
"clean-css": "^2.1.6", | ||
"htmlparser2": "^3.5.1", | ||
"css-parse": "^2.0.0", | ||
"clean-css": "^2.2.8", | ||
"htmlparser2": "^3.7.3", | ||
"soupselect": "^0.2.0", | ||
"css-stringify": "^1.4.1", | ||
"css-stringify": "^2.0.0", | ||
"CSSselect": "^0.7.0" | ||
}, | ||
"devDependencies": { | ||
"referee": "^1.0.2", | ||
"mocha": "^1.17.1", | ||
"coveralls": "^2.8.0", | ||
"istanbul": "^0.2.6", | ||
"referee": "^1.0.3", | ||
"mocha": "^1.20.1", | ||
"coveralls": "^2.11.1", | ||
"istanbul": "^0.3.0", | ||
"mocha-lcov-reporter": "0.0.1", | ||
"jshint": "^2.4.4" | ||
"jshint": "^2.5.2", | ||
"phantom": "^0.6.5" | ||
} | ||
} |
138
test/test.js
@@ -0,1 +1,2 @@ | ||
/* jshint evil: true */ | ||
var referee = require('referee'); | ||
@@ -39,3 +40,12 @@ var assert = referee.assert; | ||
var files = ['bad_escape_1.html', 'bad_escape_2.js', 'bad_escape_3.css']; | ||
it('should replace quote html entities', function(){ | ||
var content = 'document.write(\"fn("url")\");'; | ||
var expected = 'document.write(\'<scr\'+\'ipt>document.write(\\\"fn(\\\"url\\\")\\\");</scr\'+\'ipt>\');'; | ||
var result = html.toDocumentWrite(content, 'script'); | ||
assert.equals(result, expected); | ||
}); | ||
var files = ['bad_escape_1.html', 'bad_escape_2.js', 'bad_escape_3.css', 'order123.html']; | ||
files.forEach(function(fileName){ | ||
@@ -73,2 +83,3 @@ it(fileName + ' should produce valid javascript', function(done){ | ||
describe('parseAndResolve', function(){ | ||
@@ -84,3 +95,4 @@ | ||
assert.equals(output.full.match(RE_FIND_ENTRIES).length, 14); | ||
var match = output.match(RE_FIND_ENTRIES); | ||
assert.equals(match && match.length, 15); | ||
}); | ||
@@ -96,3 +108,3 @@ | ||
assert.equals(output.full.match(RE_FIND_ENTRIES).length, 4); | ||
assert.equals(output.match(RE_FIND_ENTRIES).length, 5); | ||
}); | ||
@@ -109,6 +121,6 @@ }); | ||
assert.equals('document.write(\''+content+'\');', output.replace(/\\"/gmi, '"')); | ||
assert.equals('<script>document.write(\''+content+'\');</script>', output.replace(/\\"/gmi, '"')); | ||
}); | ||
it('should output html-page page as document write', function(){ | ||
it('should output html-page page as document write, and add missing base', function(){ | ||
var expected = '<div></div>'; | ||
@@ -119,5 +131,119 @@ var content = '<head></head><body>'+expected; | ||
assert.equals('document.write(\'<base href=\\"'+base+'\\" />\');document.write(\''+expected+'\');', output); | ||
var i = '<script>document.write(\'<base href=\\"'+base+'\\" />'+expected+'\');</script>'; | ||
assert.equals(i, output); | ||
}); | ||
describe('via phantom', function(){ | ||
var phantom = require('phantom'); | ||
var http = require('http'); | ||
before(function(done){ | ||
var PORT = process.env.PORT||7070; | ||
var self = this; | ||
phantom.create(function(ph){ | ||
self.phantom = ph; | ||
ph.createPage(function(page){ | ||
self.page = page; | ||
done(); | ||
}); | ||
}); | ||
self.base = 'http://127.0.0.1:'+PORT+'/'; | ||
self.content = ''; | ||
self.server = http.createServer(function(req, res){ | ||
if (req.url === '/?i=pass1' || req.url === '/?i=pass2'){ | ||
res.writeHead(200, {'Content-Type': 'text/html'}); | ||
res.end(self.content); | ||
// console.log('-------------------> Serve -------------->', req.url, self.active,':\n', self.content, '\nEND:\n'); | ||
} else { | ||
res.writeHead(200, {'Content-Type': 'application/javascript'}); | ||
if (req.url.indexOf('/?output') === 0){ | ||
var c = 'output("'+req.url.substring(9)+'");'; | ||
// console.log('-------------------> Serve -------------->', req.url, self.active, '\n', c); | ||
return res.end(c); | ||
} | ||
// console.log(' ---- MISSING :'+req.url); | ||
res.end('console.log("'+req.url+'");'); | ||
} | ||
}).listen(PORT); | ||
var script = [ | ||
'<script type="text/javascript">', | ||
'window.__output = window.__output||[];', | ||
'window.output = function(a){window.__output.push(a);}', | ||
'</script>' | ||
].join(''); | ||
function pass1(){ | ||
var str = html.toScriptTag(self.base, (getFixture(self.active)).toString()).toString(); | ||
// console.log('---------------------> pass1(WRITE):\n'+ str); | ||
self.content = [ | ||
'<!DOCTYPE html>', '<html>', '<head>', script, | ||
'</head>','<body>', | ||
str, | ||
'</body></html>' | ||
].join(''); | ||
} | ||
function pass2(){ | ||
self.content = getFixture(self.active); | ||
} | ||
self.open = function(page, cb){ | ||
function get(){ return window.__output; } | ||
var url = self.base; | ||
pass1(); | ||
page.open(url + '?i=pass1', function(){ | ||
page.evaluate(get, function(result1){ | ||
pass2(); | ||
page.open(url + '?i=pass2', function(){ | ||
page.evaluate(get, function(result2){ | ||
cb(result1, result2); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}; | ||
}); | ||
// generate test per input | ||
[{ name: 'order123.html', expected: 7 }].forEach(function(data){ | ||
it('should output '+data.name+' html page elements in correct order', function(done){ | ||
this.timeout(5000); | ||
this.active = data.name; | ||
this.open(this.page, assertResult); | ||
function assertResult(result1, result2){ | ||
setTimeout(function(){ | ||
// console.log('------------------------> RESULT:', result1,'\n', result2); | ||
assert(result1, 'expected a result from wrapped result'); | ||
assert.equals(result1.length, data.expected); | ||
assert(result2, 'expected a result from demo'); | ||
assert.equals(result2.length, data.expected); | ||
assert.equals(result1.length, result2.length); | ||
done(); | ||
}, 0); | ||
} | ||
}); | ||
}); | ||
after(function(done){ | ||
this.phantom.exit(); | ||
this.server.close(); | ||
done(); | ||
}); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
28352
6
19
564
7
2
4
+ Addedatob@2.1.2(transitive)
+ Addedcss@2.2.4(transitive)
+ Addedcss-parse@2.0.0(transitive)
+ Addedcss-stringify@2.0.0(transitive)
+ Addeddecode-uri-component@0.2.2(transitive)
+ Addedresolve-url@0.2.1(transitive)
+ Addedsource-map-resolve@0.5.3(transitive)
+ Addedsource-map-url@0.4.1(transitive)
+ Addedurix@0.1.0(transitive)
- Removedhtmlparser-to-html@0.0.5
- Removedamdefine@1.0.1(transitive)
- Removedcss-parse@1.7.0(transitive)
- Removedcss-stringify@1.4.1(transitive)
- Removedhtmlparser-to-html@0.0.5(transitive)
- Removedsource-map@0.1.43(transitive)
Updatedclean-css@^2.2.8
Updatedcss-parse@^2.0.0
Updatedcss-stringify@^2.0.0
Updatedhtmlparser2@^3.7.3