htmlparser2
Advanced tools
Comparing version 1.0.0 to 1.1.0
@@ -31,5 +31,4 @@ var ElementType = require("./ElementType.js"); | ||
//Regular expressions used for cleaning up and parsing (stateless) | ||
var _reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace | ||
var _reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents | ||
var _reWhitespace = /\s/g; //Used to find any whitespace to split on | ||
var _reWhitespace = /\s/; //Used to find any whitespace to split on | ||
var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element | ||
@@ -66,4 +65,3 @@ | ||
Parser.prototype.done = function(){ | ||
if(this._done) | ||
return; | ||
if(this._done) return; | ||
this._done = true; | ||
@@ -77,3 +75,3 @@ | ||
raw: rawData | ||
, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "") | ||
, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim() | ||
, type: this._parseState | ||
@@ -94,4 +92,4 @@ }; | ||
this._buffer = ""; | ||
this._prevTagSep = ""; | ||
this._done = false; | ||
this._elements = []; | ||
this._elementsCurrent = 0; | ||
@@ -107,4 +105,4 @@ this._current = 0; | ||
this._parseState = ElementType.Text; | ||
this._prevTagSep = ''; | ||
this._tagStack = []; | ||
this._elements = []; | ||
this._handler.reset(); | ||
@@ -174,3 +172,3 @@ }; | ||
raw: rawData | ||
, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "") | ||
, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim() | ||
, type: this._parseState | ||
@@ -184,7 +182,8 @@ }; | ||
if(this._tagStack.length){ //We're parsing inside a script/comment/style tag | ||
if(this._tagStack[this._tagStack.length - 1] === ElementType.Script){ //We're currently in a script tag | ||
var type = this._tagStack[this._tagStack.length - 1]; | ||
if(type === ElementType.Script){ //We're currently in a script tag | ||
if(elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack | ||
this._tagStack.pop(); | ||
else { //Not a closing script tag | ||
if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment | ||
if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment | ||
//All data from here to script close is now a text element | ||
@@ -195,3 +194,3 @@ element.type = ElementType.Text; | ||
prevElement = this._elements[this._elements.length - 1]; | ||
prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw; | ||
prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData; | ||
element.raw = element.data = ""; //This causes the current element to not be added to the element list | ||
@@ -202,7 +201,7 @@ } | ||
} | ||
else if(this._tagStack[this._tagStack.length - 1] === ElementType.Style){ //We're currently in a style tag | ||
else if(type === ElementType.Style){ //We're currently in a style tag | ||
if(elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack | ||
this._tagStack.pop(); | ||
else { | ||
if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment | ||
if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment | ||
//All data from here to style close is now a text element | ||
@@ -213,4 +212,4 @@ element.type = ElementType.Text; | ||
prevElement = this._elements[this._elements.length - 1]; | ||
if(element.raw !== ""){ | ||
prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw; | ||
if(rawData !== ""){ | ||
prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData; | ||
element.raw = element.data = ""; //This causes the current element to not be added to the element list | ||
@@ -220,6 +219,4 @@ } else { //Element is empty, so just append the last tag marker found | ||
} | ||
} else { //The previous element was not text | ||
if(element.raw !== ""){ | ||
element.raw = element.data = element.raw; | ||
} | ||
} else {//The previous element was not text | ||
if(rawData !== "") element.data = rawData; | ||
} | ||
@@ -229,5 +226,4 @@ } | ||
} | ||
else if(this._tagStack[this._tagStack.length - 1] === ElementType.Comment){ //We're currently in a comment tag | ||
rawLen = element.raw.length; | ||
if(element.raw.charAt(rawLen - 2) === "-" && element.raw.charAt(rawLen - 1) === "-" && tagSep === ">"){ | ||
else if(type === ElementType.Comment){ //We're currently in a comment tag | ||
if(rawData.substr(-2) === "--" && tagSep === ">"){ | ||
//Actually, we're no longer in a style tag, so pop it off the stack | ||
@@ -262,10 +258,7 @@ this._tagStack.pop(); | ||
if(element.type === ElementType.Tag){ | ||
element.name = elementName; | ||
if(element.raw.indexOf("!--") === 0){ //This tag is really comment | ||
if(element.raw.substring(0, 3) === "!--"){ //This tag is really comment | ||
element.type = ElementType.Comment; | ||
delete element.name; | ||
rawLen = element.raw.length; | ||
//Check if the comment is terminated in the current element | ||
if(element.raw.charAt(rawLen - 1) === "-" && element.raw.charAt(rawLen - 2) === "-" && tagSep === ">") | ||
if(element.raw.substr(-2) === "--" && tagSep === ">") | ||
element.raw = element.data = element.raw.replace(_reTrimComment, ""); | ||
@@ -277,24 +270,25 @@ else { //It's not so push the comment onto the tag stack | ||
} | ||
else if(element.raw.indexOf("!") === 0 || element.raw.indexOf("?") === 0){ | ||
element.type = ElementType.Directive; | ||
//TODO: what about CDATA? | ||
else { | ||
element.name = elementName; | ||
if(element.raw[0] === "!" || element.raw[0] === "?"){ | ||
element.type = ElementType.Directive; | ||
//TODO: what about CDATA? | ||
} | ||
else if(elementName[0] === "/"){ | ||
element.data = element.name; | ||
if(elementName === "/script") element.type = ElementType.Script; | ||
else if(elementName === "/style") element.type = ElementType.Style; | ||
} | ||
else if(elementName === "script"){ | ||
element.type = ElementType.Script; | ||
//Special tag, push onto the tag stack if not terminated | ||
if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Script); | ||
} | ||
else if(elementName === "style"){ | ||
element.type = ElementType.Style; | ||
//Special tag, push onto the tag stack if not terminated | ||
if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Style); | ||
} | ||
} | ||
else if(element.name === "script"){ | ||
element.type = ElementType.Script; | ||
//Special tag, push onto the tag stack if not terminated | ||
if(element.data.charAt(element.data.length - 1) !== "/") | ||
this._tagStack.push(ElementType.Script); | ||
} | ||
else if(element.name === "/script") | ||
element.type = ElementType.Script; | ||
else if(element.name === "style"){ | ||
element.type = ElementType.Style; | ||
//Special tag, push onto the tag stack if not terminated | ||
if(element.data.charAt(element.data.length - 1) !== "/") | ||
this._tagStack.push(ElementType.Style); | ||
} | ||
else if(element.name === "/style") | ||
element.type = ElementType.Style; | ||
if(element.name && element.name.charAt(0) === "/") | ||
element.data = element.name; | ||
} | ||
@@ -317,3 +311,3 @@ | ||
&& | ||
element.data.charAt(element.data.length - 1) === "/" | ||
element.data.substr(-1) === "/" | ||
) | ||
@@ -347,13 +341,14 @@ this._elements.push({ | ||
l = this._location, | ||
end = this._current - (startTag ? 1 : 0), | ||
chunk = startTag && l.charOffset === 0 && this._current === 0; | ||
end = this._current, | ||
chunk = startTag && l.charOffset === 0 && end === 0; | ||
if(startTag) end--; | ||
for (; l.charOffset < end; l.charOffset++){ | ||
c = this._buffer.charAt(l.charOffset); | ||
c = this._buffer[l.charOffset]; | ||
if(c === '\n'){ | ||
l.inBuffer++; | ||
l.col = 0; | ||
} else if(c !== '\r'){ | ||
} else if(c !== '\r') | ||
l.col++; | ||
} | ||
} | ||
@@ -383,14 +378,9 @@ return { | ||
switch (element.type){ | ||
case ElementType.Comment: | ||
this._handler.writeComment(element); | ||
case ElementType.Comment: this._handler.writeComment(element); | ||
break; | ||
case ElementType.Directive: | ||
this._handler.writeDirective(element); | ||
case ElementType.Directive: this._handler.writeDirective(element); | ||
break; | ||
case ElementType.Text: | ||
this._handler.writeText(element); | ||
case ElementType.Text: this._handler.writeText(element); | ||
break; | ||
default: | ||
this._handler.writeTag(element); | ||
break; | ||
default: this._handler.writeTag(element); | ||
} | ||
@@ -397,0 +387,0 @@ } |
{ | ||
"name": "htmlparser2" | ||
, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)." | ||
, "version": "1.0.0" | ||
, "version": "1.1.0" | ||
, "author": "Felix Boehm <me@feedic.com>" | ||
@@ -6,0 +6,0 @@ , "contributors": [ "Chris Winberry <chris@winberry.net>" ] |
64042
1915