htmlparser2
Advanced tools
Comparing version 2.2.0 to 2.2.2
@@ -6,3 +6,2 @@ var ElementType = require("./ElementType.js"); | ||
this._done = false; | ||
this._inSpecialTag = false; | ||
this._tagStack = []; | ||
@@ -54,3 +53,3 @@ if(typeof callback === "object") this._options = callback; | ||
lastChild = lastTag.children[lastTag.children.length - 1]; | ||
if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){ | ||
if(element.type === ElementType.Text && lastChild.type === ElementType.Text){ | ||
lastChild.data += element.data; | ||
@@ -67,5 +66,2 @@ } else { | ||
DomHandler.prototype.onopentag = function(name, attribs, type){ | ||
if(type === ElementType.Script || type === ElementType.Style){ | ||
this._inSpecialTag = true; | ||
} | ||
var element = { | ||
@@ -72,0 +68,0 @@ type: type, |
@@ -13,3 +13,3 @@ var ElementType = require("./ElementType.js"); | ||
this._done = false; | ||
this._paused = false; | ||
this._running = true; //false if paused | ||
} | ||
@@ -49,11 +49,12 @@ | ||
//Parses a piece of an HTML document | ||
Parser.prototype.write = | ||
Parser.prototype.parseChunk = function(data){ | ||
Parser.prototype.parseChunk = | ||
Parser.prototype.write = function(data){ | ||
if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done")); | ||
this._buffer += data; //FIXME: this can be a bottleneck | ||
if(!this._paused) this._parseTags(); | ||
if(this._running) this._parseTags(); | ||
}; | ||
//Tells the parser that the HTML being parsed is complete | ||
Parser.prototype.end = Parser.prototype.done = function(chunk){ | ||
Parser.prototype.done = | ||
Parser.prototype.end = function(chunk){ | ||
if(this._done) return; | ||
@@ -64,3 +65,3 @@ | ||
if(!this._paused) this._finishParsing(); | ||
if(this._running) this._finishParsing(); | ||
}; | ||
@@ -80,7 +81,8 @@ | ||
Parser.prototype.pause = function(){ | ||
if(!this._done) this._paused = true; | ||
if(!this._done) this._running = false; | ||
}; | ||
Parser.prototype.resume = function(){ | ||
this._paused = false; | ||
if(this._running) return; | ||
this._running = true; | ||
this._parseTags(); | ||
@@ -139,3 +141,3 @@ if(this._done) this._finishParsing(); | ||
//opening !== closing is just false if both are -1 | ||
while(opening !== closing && !this._paused){ | ||
while(opening !== closing && this._running){ | ||
lastTagSep = this._tagSep; | ||
@@ -203,3 +205,3 @@ | ||
else{ | ||
if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8)); | ||
if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8) + this._tagSep); | ||
this._contentFlags += SpecialTags[ElementType.CDATA]; | ||
@@ -230,5 +232,6 @@ } | ||
if(this._contentFlags !== 0){ | ||
this._writeSpecial(rawData, lastTagSep); | ||
this._writeSpecial(rawData, ">"); | ||
} | ||
else if(rawData !== "" && this._cbs.ontext){ | ||
if(this._tagSep === ">") rawData += ">"; //it's the second > in a row | ||
this._cbs.ontext(rawData); | ||
@@ -282,3 +285,3 @@ } | ||
Parser.prototype._processCloseTag = function(name){ | ||
if(this._stack && (!emptyTags[name] || this._options.xmlMode)){ | ||
if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){ | ||
var pos = this._stack.lastIndexOf(name); | ||
@@ -320,3 +323,3 @@ if(pos !== -1) | ||
//If tag self-terminates, add an explicit, separate closing tag | ||
if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){ | ||
if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){ | ||
if(this._cbs.onclosetag) this._cbs.onclosetag(name); | ||
@@ -323,0 +326,0 @@ } else { |
{ | ||
"name": "htmlparser2", | ||
"description": "Performance-optimized forgiving HTML/XML/RSS parser", | ||
"version": "2.2.0", | ||
"version": "2.2.2", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"], |
@@ -40,3 +40,3 @@ #htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser) | ||
Output: | ||
Output (simplified): | ||
@@ -43,0 +43,0 @@ ```javascript |
@@ -26,3 +26,3 @@ exports.name = "CDATA"; | ||
"data": [ | ||
" asdf " | ||
" asdf >" | ||
] | ||
@@ -29,0 +29,0 @@ }, |
@@ -20,3 +20,3 @@ exports.name = "RDF test"; | ||
"link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n", | ||
"description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065" | ||
"description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->" | ||
}, | ||
@@ -26,5 +26,5 @@ { | ||
"link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n", | ||
"description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101." | ||
"description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->" | ||
} | ||
] | ||
}; |
2964
73683
57