htmlparser2
Advanced tools
Comparing version 2.1.2 to 2.1.3
@@ -30,5 +30,6 @@ var ElementType = require("./ElementType.js"); | ||
DefaultHandler.prototype._handleCallback = | ||
DefaultHandler.prototype.onerror = function(error){ | ||
if(typeof this._callback === "function"){ | ||
return this._callback(error, this.dom); | ||
this._callback(error, this.dom); | ||
} else { | ||
@@ -39,6 +40,4 @@ if(error) throw error; | ||
DefaultHandler.prototype._handleCallback = DefaultHandler.prototype.onerror; | ||
DefaultHandler.prototype.onclosetag = function(name){ | ||
this._tagStack.pop(); | ||
if(this._tagStack.pop().name !== name) this._handleCallback(Error("tagname didn't match!")); | ||
}; | ||
@@ -102,3 +101,3 @@ | ||
data: data, | ||
type: ElementType.Comment | ||
type: ElementType.Comment | ||
}; | ||
@@ -105,0 +104,0 @@ |
@@ -29,3 +29,3 @@ var ElementType = require("./ElementType.js"); | ||
if(recurse !== false) recurse = true; | ||
if(isNaN(limit)) limit = Infinity; | ||
if(isNaN(limit)) limit = 1/0; | ||
if(!Array.isArray(element)){ | ||
@@ -41,14 +41,14 @@ element = [element]; | ||
for(var key in options){ | ||
if(key === "tag_name"){ | ||
if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false; | ||
if(!options.tag_name(element.name)) return false; | ||
} else if(key === "tag_type") { | ||
if(!options.tag_type(type)) return false; | ||
} else if(key === "tag_contains") { | ||
if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false; | ||
if(!options.tag_contains(element.data)) return false; | ||
} else if(!element.attribs || !options[key](element.attribs[key])) | ||
return false; | ||
} | ||
for(var key in options){ | ||
if(key === "tag_name"){ | ||
if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false; | ||
if(!options.tag_name(element.name)) return false; | ||
} else if(key === "tag_type") { | ||
if(!options.tag_type(type)) return false; | ||
} else if(key === "tag_contains") { | ||
if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false; | ||
if(!options.tag_contains(element.data)) return false; | ||
} else if(!element.attribs || !options[key](element.attribs[key])) | ||
return false; | ||
} | ||
@@ -90,3 +90,3 @@ return true; | ||
else return filter(function(elem){ | ||
return filter(function(elem){ | ||
var type = elem.type; | ||
@@ -142,2 +142,2 @@ if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false; | ||
} | ||
}; | ||
}; |
@@ -8,3 +8,4 @@ //Types of elements found in the DOM | ||
Style: "style", //Special tag <style>...</style> | ||
Tag: "tag" //Any tag that isn't special | ||
Tag: "tag", //Any tag that isn't special | ||
CDATA: "cdata" | ||
}; |
@@ -16,3 +16,3 @@ module.exports = { | ||
Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")}); | ||
return ElementType; | ||
return this.ElementType; | ||
}, | ||
@@ -19,0 +19,0 @@ get DomUtils(){ |
@@ -16,4 +16,4 @@ var ElementType = require("./ElementType.js"); | ||
//Regular expressions used for cleaning up and parsing (stateless) | ||
var _reTagName = /[^\s\/]+/; //matches tag names | ||
var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g; | ||
var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g, | ||
_reTail = /\s|\/|$/; | ||
@@ -27,7 +27,12 @@ Parser.prototype._options = { | ||
/* | ||
onopentag, | ||
onclosetag, | ||
ontext, | ||
onprocessinginstruction, | ||
oncomment | ||
oncdataend, | ||
oncdatastart, | ||
onclosetag, | ||
oncomment, | ||
oncommentend, | ||
onerror, | ||
onopentag, | ||
onprocessinginstruction, | ||
onreset, | ||
ontext | ||
*/ | ||
@@ -76,12 +81,6 @@ }; | ||
var parseAttributes = function(data){ | ||
var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace | ||
if(pos === 0) return attrs; | ||
var attribRaw = data.substr(pos); | ||
_reAttrib.lastIndex = 0; | ||
var match; | ||
while(match = _reAttrib.exec(attribRaw)){ | ||
if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4]; | ||
else attrs[match[5]] = match[5]; | ||
var attrs = {}, match; | ||
while(match = _reAttrib.exec(data)){ | ||
attrs[match[1]] = match[2] || match[3] || match[4] || match[1]; | ||
} | ||
@@ -94,8 +93,5 @@ | ||
Parser.prototype._parseTagName = function(data){ | ||
var match = data.match(_reTagName); | ||
if(match === null) return ""; | ||
if(this._options.lowerCaseTags){ | ||
return match[0].toLowerCase(); | ||
} | ||
else return match[0]; | ||
var match = data.substr(0, data.search(_reTail)); | ||
if(!this._options.lowerCaseTags) return match; | ||
return match.toLowerCase(); | ||
}; | ||
@@ -108,3 +104,4 @@ | ||
SpecialTags[ElementType.Script] = 2; //2^1 | ||
SpecialTags[ElementType.Comment] = 4; //2^3 | ||
SpecialTags[ElementType.Comment] = 4; //2^2 | ||
SpecialTags[ElementType.CDATA] = 8; //2^3 | ||
@@ -115,3 +112,3 @@ //Parses through HTML text and returns an array of found elements | ||
var next, rawData, elementType, elementData, lastTagSep; | ||
var next, rawData, elementData, lastTagSep; | ||
@@ -121,3 +118,3 @@ var opening = buffer.indexOf("<"), closing = buffer.indexOf(">"); | ||
//if force is true, parse everything | ||
if(force) opening = Infinity; | ||
if(force) opening = 1/0; | ||
@@ -142,9 +139,17 @@ while(opening !== closing){ //just false if both are -1 | ||
if(this._contentFlags >= SpecialTags[ElementType.Comment]){ | ||
if(this._contentFlags >= SpecialTags[ElementType.CDATA]){ | ||
if(this._tagSep === ">" && rawData.substr(-2) === "]]"){ | ||
if(rawData.length !== 2 && this._cbs.ontext){ | ||
this._cbs.ontext(rawData.slice(0,-2)); | ||
} | ||
this._contentFlags -= SpecialTags[ElementType.CDATA]; | ||
if(this._cbs.oncdataend) this._cbs.oncdataend(); | ||
} | ||
else if(this._cbs.ontext) this._cbs.ontext(rawData + this._tagSep); | ||
} | ||
else if(this._contentFlags >= SpecialTags[ElementType.Comment]){ | ||
//We're currently in a comment tag | ||
this._processComment(rawData); | ||
continue; | ||
} | ||
if(lastTagSep === "<"){ | ||
else if(lastTagSep === "<"){ | ||
elementData = rawData.trimLeft(); | ||
@@ -171,4 +176,4 @@ if(elementData.charAt(0) === "/"){ | ||
} | ||
else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){ | ||
if(elementData.substr(0, 3) === "!--"){ | ||
else if(elementData.charAt(0) === "!"){ | ||
if(elementData.substr(1, 2) === "--"){ | ||
//This tag is a comment | ||
@@ -178,8 +183,18 @@ this._contentFlags += SpecialTags[ElementType.Comment]; | ||
} | ||
else if(elementData.substr(1, 7) === "[CDATA["){ | ||
if(this._cbs.oncdatastart) this._cbs.oncdatastart(); | ||
if(this._tagSep === ">" && elementData.substr(-2) === "]]"){ | ||
if(this._cbs.oncdataend) this._cbs.oncdataend(); | ||
if(this._cbs.ontext) this._cbs.ontext(elementData.slice(8, -2)); | ||
} | ||
else{ | ||
if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8)); | ||
this._contentFlags += SpecialTags[ElementType.CDATA]; | ||
} | ||
} | ||
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep); | ||
//This tag is a directive | ||
//TODO: what about CDATA? | ||
else if(this._cbs.onprocessinginstruction){ | ||
this._cbs.onprocessinginstruction( | ||
elementData.charAt(0) + this._parseTagName(elementData.substr(1)), | ||
"!" + this._parseTagName(elementData.substr(1)), | ||
elementData | ||
@@ -190,2 +205,10 @@ ); | ||
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep); | ||
else if(elementData.charAt(0) === "?"){ | ||
if(this._cbs.onprocessinginstruction){ | ||
this._cbs.onprocessinginstruction( | ||
"?" + this._parseTagName(elementData.substr(1)), | ||
elementData | ||
); | ||
} | ||
} | ||
else this._processOpenTag(this._parseTagName(elementData), elementData); | ||
@@ -247,10 +270,9 @@ } | ||
if(this._stack && (!emptyTags[name] || this._options.xmlMode)){ | ||
var i = this._stack.length; | ||
while(i !== 0 && this._stack[--i] !== name){} | ||
if(i !== 0 || this._stack[0] === name) | ||
var pos = this._stack.lastIndexOf(name); | ||
if(pos !== -1) | ||
if(this._cbs.onclosetag){ | ||
while(i < this._stack.length) | ||
this._cbs.onclosetag(this._stack.pop()); | ||
pos = this._stack.length - pos; | ||
while(pos--) this._cbs.onclosetag(this._stack.pop()); | ||
} | ||
else this._stack.splice(i); | ||
else this._stack.splice(pos); | ||
} | ||
@@ -257,0 +279,0 @@ //many browsers (eg. Safari, Chrome) convert </br> to <br> |
{ | ||
"name": "htmlparser2", | ||
"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.", | ||
"version": "2.1.2", | ||
"version": "2.1.3", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
"keywords": ["html", "parser", "xml", "dom", "rss", "feed", "atom"], | ||
"contributors": ["Chris Winberry <chris@winberry.net>"], | ||
@@ -20,3 +21,3 @@ "repository": { | ||
"scripts": { | ||
"test": "cd tests && node 00-runtests.js" | ||
"test": "node --harmony_proxies tests/00-runtests.js" | ||
}, | ||
@@ -23,0 +24,0 @@ "engines": "node >= 0.3.0", |
@@ -10,6 +10,6 @@ var fs = require("fs"); | ||
//read files, load them, run them | ||
fs.readdirSync(test.dir | ||
fs.readdirSync(__dirname + test.dir | ||
).map(function(file){ | ||
if(file[0] === ".") return false; | ||
return require(test.dir + file); | ||
return require(__dirname + test.dir + file); | ||
}).forEach(function(file){ | ||
@@ -19,3 +19,3 @@ if(file === false) return; | ||
failed = false, | ||
start = Date.now() | ||
start = Date.now(), | ||
took = 0; | ||
@@ -22,0 +22,0 @@ |
@@ -6,3 +6,3 @@ //Runs tests for HTML | ||
exports.dir = "./HTML/"; | ||
exports.dir = "/HTML/"; | ||
@@ -9,0 +9,0 @@ /* |
@@ -6,3 +6,3 @@ //Runs tests for feeds | ||
exports.dir = "./Feeds/"; | ||
exports.dir = "/Feeds/"; | ||
@@ -15,2 +15,2 @@ exports.test = function(test, cb){ | ||
helper.writeToParser(handler, test.options.parser, test.html); | ||
} | ||
}; |
@@ -1,29 +0,41 @@ | ||
var helper = require("./test-helper.js"); | ||
var helper = require("./test-helper.js"), | ||
sliceArr = Array.prototype.slice; | ||
exports.dir = "./Events/"; | ||
exports.dir = "/Events/"; | ||
exports.test = function(test, cb){ | ||
var tokens = []; | ||
var cbs = { | ||
onopentag: function(name, attributes){ | ||
tokens.push({event:"open", name: name, attributes: attributes}); | ||
}, | ||
onclosetag: function(name){ | ||
tokens.push({event:"close", name: name}); | ||
}, | ||
ontext: function(text){ | ||
tokens.push({event:"text", text: text}); | ||
}, | ||
oncomment: function(data){ | ||
tokens.push({event:"comment", data:data}); | ||
}, | ||
onprocessinginstruction: function(name, data){ | ||
tokens.push({event:"processing", name:name, data:data}); | ||
}, | ||
onend: function(){ | ||
//deletes all tokens | ||
cb(null, tokens.splice(0)); | ||
} | ||
}; | ||
var tokens = [], cbs; | ||
if(typeof Proxy !== "undefined"){ | ||
cbs = Proxy.create({ get: function(a, name){ | ||
if(name === "onend"){ | ||
return function(){ | ||
cb(null, tokens.splice(0)); | ||
} | ||
} | ||
if(name === "onreset") return function(){}; | ||
return function(){ | ||
tokens.push({ | ||
event: name.substr(2), | ||
data: sliceArr.apply(arguments) | ||
}); | ||
} | ||
}}); | ||
} | ||
else{ | ||
cbs = { | ||
onerror: cb, | ||
onend: function(){ | ||
cb(null, tokens.splice(0)); | ||
} | ||
}; | ||
helper.EVENTS.forEach(function(name){ | ||
cbs["on" + name] = function(){ | ||
tokens.push({ | ||
event: name, | ||
data: sliceArr.apply(arguments) | ||
}); | ||
} | ||
}); | ||
} | ||
helper.writeToParser(cbs, test.options.parser, test.html); | ||
} | ||
}; |
@@ -1,3 +0,1 @@ | ||
var DomUtils = require("../lib/DomUtils.js"); | ||
//generate a dom | ||
@@ -12,3 +10,3 @@ var handler = new (require("../lib/DefaultHandler.js"))(); | ||
exports.dir = "./DomUtils/"; | ||
exports.dir = "/DomUtils/"; | ||
@@ -15,0 +13,0 @@ exports.test = function(test, cb){ |
@@ -1,7 +0,26 @@ | ||
var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"), | ||
parser = new (require("../lib/Parser.js"))({}), | ||
var multiply = function(text){ | ||
return Array(5e3+1).join(text); | ||
}, | ||
tests = { | ||
self_closing: multiply("<br/>"), | ||
tag: multiply("<tag foo=bar foobar> Text </tag>"), | ||
comment: multiply("<!-- this is <<a> comment -->"), | ||
directive: multiply("<?foo bar?>"), | ||
special: multiply("<script> THIS IS <SPECIAL> </script>"), | ||
xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>") | ||
} | ||
empty = function(){}, | ||
cbs = {}; | ||
require("./test-helper.js").EVENTS.forEach(function(name){ | ||
cbs["on" + name] = empty; | ||
}); | ||
var parser = new (require("../lib/Parser.js"))(cbs), | ||
ben = require("ben"); | ||
console.log("Test took (ms)", ben(1e2, function(){ | ||
parser.parseComplete(xml); | ||
})); | ||
Object.keys(tests).forEach(function(name){ | ||
console.log("Test", name, "took", ben(150, function(){ | ||
parser.parseComplete(tests[name]); | ||
})); | ||
}); |
exports.name = "simple"; | ||
exports.options = {handler: {}, parser: {}}; | ||
exports.html = "<h1 class=test>adsf</h1>"; | ||
exports.expected = [ { event: 'open', | ||
name: 'h1', | ||
attributes: { class: 'test' } }, | ||
{ event: 'text', text: 'adsf' }, | ||
{ event: 'close', name: 'h1' } ]; | ||
exports.expected = [ | ||
{ | ||
"event": "opentag", | ||
"data": [ | ||
"h1", | ||
{ | ||
"class": "test" | ||
}, | ||
"tag" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
"data": [ | ||
"adsf" | ||
] | ||
}, | ||
{ | ||
"event": "closetag", | ||
"data": [ | ||
"h1" | ||
] | ||
} | ||
]; |
@@ -6,28 +6,41 @@ exports.name = "Template script tags"; | ||
{ | ||
"event": "open", | ||
"name": "script", | ||
"attributes": { | ||
"type": "text/template" | ||
} | ||
"event": "opentag", | ||
"data": [ | ||
"script", | ||
{ | ||
"type": "text/template" | ||
}, | ||
"script" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
"text": "<h1" | ||
"data": [ | ||
"<h1" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
"text": ">Heading1" | ||
"data": [ | ||
">Heading1" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
"text": "</h1" | ||
"data": [ | ||
"</h1" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
"text": ">" | ||
"data": [ | ||
">" | ||
] | ||
}, | ||
{ | ||
"event": "close", | ||
"name": "script" | ||
"event": "closetag", | ||
"data": [ | ||
"script" | ||
] | ||
} | ||
]; |
exports.name = "RDF test"; | ||
exports.type = "rss"; | ||
exports.options = { | ||
@@ -13,15 +12,18 @@ handler: {}, | ||
exports.expected = { | ||
type: 'rdf:RDF', | ||
id: '', | ||
title: 'craigslist | all community in SF bay area', | ||
link: 'http://sfbay.craigslist.org/ccc/', | ||
items: [{ | ||
title: '![CDATA[ Music Equipment Repair and Consignment ]]', | ||
link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n', | ||
description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065' | ||
}, { | ||
title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]', | ||
link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n', | ||
description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.' | ||
}] | ||
"type": "rdf:RDF", | ||
"id": "", | ||
"title": "craigslist | all community in SF bay area", | ||
"link": "http://sfbay.craigslist.org/ccc/", | ||
"items": [ | ||
{ | ||
"title": " Music Equipment Repair and Consignment ", | ||
"link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n", | ||
"description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065" | ||
}, | ||
{ | ||
"title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n", | ||
"link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n", | ||
"description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101." | ||
} | ||
] | ||
}; |
@@ -13,2 +13,4 @@ var Parser = require("../lib/Parser.js"), | ||
parser.parseComplete(data); | ||
} | ||
} | ||
exports.EVENTS = ["cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag"/*, "error", "end"*/]; |
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
55910
51
1650