htmlparser2
Advanced tools
Comparing version 3.1.5 to 3.1.6
@@ -177,3 +177,8 @@ var Tokenizer = require("./Tokenizer.js"); | ||
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, value); | ||
if(this._attribs) this._attribs[this._attribname] = value; | ||
if( | ||
this._attribs && | ||
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) | ||
){ | ||
this._attribs[this._attribname] = value; | ||
} | ||
this._attribname = ""; | ||
@@ -180,0 +185,0 @@ }; |
@@ -67,9 +67,40 @@ module.exports = Tokenizer; | ||
AFTER_STYLE_3 = i++, //L | ||
AFTER_STYLE_4 = i++; //E | ||
AFTER_STYLE_4 = i++, //E | ||
SPECIAL_NONE = 0, | ||
SPECIAL_SCRIPT = 1, | ||
SPECIAL_STYLE = 2; | ||
function whitespace(c){ | ||
return c === " " || c === "\t" || c === "\r" || c === "\n"; | ||
return c === " " || c === "\n" || c === "\t" || c === "\f"; | ||
} | ||
function ifElseState(upper, SUCCESS, FAILURE){ | ||
var lower = upper.toLowerCase(); | ||
if(upper === lower){ | ||
return function(c){ | ||
this._state = c === lower ? SUCCESS : FAILURE; | ||
}; | ||
} else { | ||
return function(c){ | ||
this._state = (c === lower || c === upper) ? SUCCESS : FAILURE; | ||
}; | ||
} | ||
} | ||
function consumeSpecialNameChar(upper, NEXT_STATE){ | ||
var lower = upper.toLowerCase(); | ||
return function(c){ | ||
if(c === lower || c === upper){ | ||
this._state = NEXT_STATE; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
} | ||
function Tokenizer(options, cbs){ | ||
@@ -80,8 +111,331 @@ this._state = TEXT; | ||
this._index = 0; | ||
this._options = options; | ||
this._special = 0; // 1 for script, 2 for style | ||
this._special = SPECIAL_NONE; | ||
this._cbs = cbs; | ||
this._running = true; | ||
this._xmlMode = !!(options && options.xmlMode); | ||
} | ||
Tokenizer.prototype._stateText = function (c) { | ||
if(c === "<"){ | ||
if(this._index > this._sectionStart){ | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._state = BEFORE_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeTagName = function (c) { | ||
if(c === "/"){ | ||
this._state = BEFORE_CLOSING_TAG_NAME; | ||
} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) { | ||
this._state = TEXT; | ||
} else if(c === "!"){ | ||
this._state = BEFORE_DECLARATION; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "?"){ | ||
this._state = IN_PROCESSING_INSTRUCTION; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = (!this._xmlMode && (c === "s" || c === "S")) ? | ||
BEFORE_SPECIAL : IN_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInTagName = function (c) { | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._emitToken("onopentagname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeCloseingTagName = function (c) { | ||
if(whitespace(c)); | ||
else if(c === ">"){ | ||
this._state = TEXT; | ||
} else if(this._special !== SPECIAL_NONE){ | ||
if(c === "s" || c === "S"){ | ||
this._state = BEFORE_SPECIAL_END; | ||
} else { | ||
this._state = TEXT; | ||
this._index--; | ||
} | ||
} else { | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCloseingTagName = function (c) { | ||
if(c === ">" || whitespace(c)){ | ||
this._emitToken("onclosetag"); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
this._special = SPECIAL_NONE; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterCloseingTagName = function (c) { | ||
//skip everything until ">" | ||
if(c === ">"){ | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeName = function (c) { | ||
if(whitespace(c)){ | ||
/* noop */ | ||
} else if(c === ">"){ | ||
this._state = TEXT; | ||
this._cbs.onopentagend(); | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "/"){ | ||
this._cbs.onselfclosingtag(); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
} else { | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeName = function (c) { | ||
if(c === "=" || c === "/" || c === ">" || whitespace(c)){ | ||
if(this._index > this._sectionStart){ | ||
this._cbs.onattribname(this._getSection()); | ||
} | ||
this._sectionStart = -1; | ||
this._state = AFTER_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterAttributeName = function (c) { | ||
if(c === "="){ | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if(c === "/" || c === ">"){ | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeValue = function (c) { | ||
if(c === "\""){ | ||
this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "'"){ | ||
this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES; | ||
this._sectionStart = this._index + 1; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) { | ||
if(c === "\""){ | ||
this._emitToken("onattribvalue"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) { | ||
if(c === "'"){ | ||
this._emitToken("onattribvalue"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) { | ||
if(whitespace(c) || c === ">"){ | ||
this._emitToken("onattribvalue"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeDeclaration = function (c) { | ||
this._state = c === "[" ? BEFORE_CDATA_1 : | ||
c === "-" ? BEFORE_COMMENT : | ||
IN_DECLARATION; | ||
}; | ||
Tokenizer.prototype._stateInDeclaration = function (c) { | ||
if(c === ">"){ | ||
this._cbs.ondeclaration(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInProcessingInstruction = function (c) { | ||
if(c === ">"){ | ||
this._cbs.onprocessinginstruction(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeComment = function (c) { | ||
if(c === "-"){ | ||
this._state = IN_COMMENT; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInComment = function (c) { | ||
if(c === "-") this._state = AFTER_COMMENT_1; | ||
}; | ||
Tokenizer.prototype._stateAfterComment1 = ifElseState("-", AFTER_COMMENT_2, IN_COMMENT); | ||
Tokenizer.prototype._stateAfterComment2 = function (c) { | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "-") { | ||
this._state = IN_COMMENT; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
}; | ||
Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata6 = function (c) { | ||
if(c === "["){ | ||
this._state = IN_CDATA; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCdata = function (c) { | ||
if(c === "]") this._state = AFTER_CDATA_1; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata1 = ifElseState("]", AFTER_CDATA_2, IN_CDATA); | ||
Tokenizer.prototype._stateAfterCdata2 = function (c) { | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "]") { | ||
this._state = IN_CDATA; | ||
} | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecial = function (c) { | ||
if(c === "c" || c === "C"){ | ||
this._state = BEFORE_SCRIPT_1; | ||
} else if(c === "t" || c === "T"){ | ||
this._state = BEFORE_STYLE_1; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecialEnd = function (c) { | ||
if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){ | ||
this._state = AFTER_SCRIPT_1; | ||
} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){ | ||
this._state = AFTER_STYLE_1; | ||
} | ||
else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2); | ||
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3); | ||
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4); | ||
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5); | ||
Tokenizer.prototype._stateBeforeScript5 = function (c) { | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_SCRIPT; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT); | ||
Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT); | ||
Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT); | ||
Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT); | ||
Tokenizer.prototype._stateAfterScript5 = function (c) { | ||
if(c === ">" || whitespace(c)){ | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 6; | ||
this._index--; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2); | ||
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3); | ||
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4); | ||
Tokenizer.prototype._stateBeforeStyle4 = function (c) { | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_STYLE; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT); | ||
Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT); | ||
Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT); | ||
Tokenizer.prototype._stateAfterStyle4 = function (c) { | ||
if(c === ">" || whitespace(c)){ | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 5; | ||
this._index--; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._cleanup = function () { | ||
if(this._sectionStart < 0){ | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else { | ||
if(this._state === TEXT){ | ||
if(this._sectionStart !== this._index){ | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
} | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else if(this._sectionStart === this._index){ | ||
//the section just started | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else { | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
}; | ||
//TODO make events conditional | ||
@@ -93,78 +447,14 @@ Tokenizer.prototype.write = function(chunk){ | ||
var c = this._buffer.charAt(this._index); | ||
if(this._state === TEXT){ | ||
if(c === "<"){ | ||
this._emitIfToken("ontext"); | ||
this._state = BEFORE_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
if(this._state === TEXT) { | ||
this._stateText(c); | ||
} else if(this._state === BEFORE_TAG_NAME){ | ||
if(c === "/"){ | ||
this._state = BEFORE_CLOSING_TAG_NAME; | ||
} else if(c === ">" || this._special > 0 || whitespace(c)) { | ||
this._state = TEXT; | ||
} else { | ||
if(whitespace(c)); | ||
else if(c === "!"){ | ||
this._state = BEFORE_DECLARATION; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "?"){ | ||
this._state = IN_PROCESSING_INSTRUCTION; | ||
this._sectionStart = this._index + 1; | ||
} else if( | ||
!(this._options && this._options.xmlMode) && | ||
(c === "s" || c === "S") | ||
){ | ||
this._state = BEFORE_SPECIAL; | ||
this._sectionStart = this._index; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
} | ||
} else if(this._state === IN_TAG_NAME){ | ||
if(c === "/"){ | ||
this._emitToken("onopentagname"); | ||
this._cbs.onselfclosingtag(); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
} else if(c === ">"){ | ||
this._emitToken("onopentagname"); | ||
this._cbs.onopentagend(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if(whitespace(c)){ | ||
this._emitToken("onopentagname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} | ||
this._stateBeforeTagName(c); | ||
} else if(this._state === IN_TAG_NAME) { | ||
this._stateInTagName(c); | ||
} else if(this._state === BEFORE_CLOSING_TAG_NAME){ | ||
if(whitespace(c)); | ||
else if(c === ">"){ | ||
this._state = TEXT; | ||
} else if(this._special > 0){ | ||
if(c === "s" || c === "S"){ | ||
this._state = BEFORE_SPECIAL_END; | ||
} else { | ||
this._state = TEXT; | ||
continue; | ||
} | ||
} else { | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
this._stateBeforeCloseingTagName(c); | ||
} else if(this._state === IN_CLOSING_TAG_NAME){ | ||
if(c === ">"){ | ||
this._emitToken("onclosetag"); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
this._special = 0; | ||
} else if(whitespace(c)){ | ||
this._emitToken("onclosetag"); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
this._special = 0; | ||
} | ||
this._stateInCloseingTagName(c); | ||
} else if(this._state === AFTER_CLOSING_TAG_NAME){ | ||
//skip everything until ">" | ||
if(c === ">"){ | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
this._stateAfterCloseingTagName(c); | ||
} | ||
@@ -176,66 +466,15 @@ | ||
else if(this._state === BEFORE_ATTRIBUTE_NAME){ | ||
if(c === ">"){ | ||
this._state = TEXT; | ||
this._cbs.onopentagend(); | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "/"){ | ||
this._cbs.onselfclosingtag(); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
this._stateBeforeAttributeName(c); | ||
} else if(this._state === IN_ATTRIBUTE_NAME){ | ||
if(c === "="){ | ||
this._emitIfToken("onattribname"); | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if(whitespace(c)){ | ||
this._emitIfToken("onattribname"); | ||
this._state = AFTER_ATTRIBUTE_NAME; | ||
} else if(c === "/" || c === ">"){ | ||
this._emitIfToken("onattribname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
continue; | ||
} | ||
this._stateInAttributeName(c); | ||
} else if(this._state === AFTER_ATTRIBUTE_NAME){ | ||
if(c === "="){ | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if(c === "/" || c === ">"){ | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
continue; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
this._stateAfterAttributeName(c); | ||
} else if(this._state === BEFORE_ATTRIBUTE_VALUE){ | ||
if(c === "\""){ | ||
this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "'"){ | ||
this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES; | ||
this._sectionStart = this._index + 1; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES; | ||
this._sectionStart = this._index; | ||
} | ||
this._stateBeforeAttributeValue(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){ | ||
if(c === "\""){ | ||
this._emitToken("onattribvalue"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} | ||
this._stateInAttributeValueDoubleQuotes(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){ | ||
if(c === "'"){ | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._emitToken("onattribvalue"); | ||
} | ||
this._stateInAttributeValueSingleQuotes(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){ | ||
if(c === ">"){ | ||
this._emitToken("onattribvalue"); | ||
this._state = TEXT; | ||
this._cbs.onopentagend(); | ||
this._sectionStart = this._index + 1; | ||
} else if(whitespace(c)){ | ||
this._emitToken("onattribvalue"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} | ||
this._stateInAttributeValueNoQuotes(c); | ||
} | ||
@@ -247,11 +486,5 @@ | ||
else if(this._state === BEFORE_DECLARATION){ | ||
if(c === "[") this._state = BEFORE_CDATA_1; | ||
else if(c === "-") this._state = BEFORE_COMMENT; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeDeclaration(c); | ||
} else if(this._state === IN_DECLARATION){ | ||
if(c === ">"){ | ||
this._emitToken("ondeclaration"); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
this._stateInDeclaration(c); | ||
} | ||
@@ -263,7 +496,3 @@ | ||
else if(this._state === IN_PROCESSING_INSTRUCTION){ | ||
if(c === ">"){ | ||
this._emitToken("onprocessinginstruction"); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
this._stateInProcessingInstruction(c); | ||
} | ||
@@ -275,23 +504,9 @@ | ||
else if(this._state === BEFORE_COMMENT){ | ||
if(c === "-"){ | ||
this._state = IN_COMMENT; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
this._stateBeforeComment(c); | ||
} else if(this._state === IN_COMMENT){ | ||
if(c === "-") this._state = AFTER_COMMENT_1; | ||
this._stateInComment(c); | ||
} else if(this._state === AFTER_COMMENT_1){ | ||
if(c === "-") this._state = AFTER_COMMENT_2; | ||
else this._state = IN_COMMENT; | ||
this._stateAfterComment1(c); | ||
} else if(this._state === AFTER_COMMENT_2){ | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "-") { | ||
this._state = IN_COMMENT; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
this._stateAfterComment2(c); | ||
} | ||
@@ -303,38 +518,19 @@ | ||
else if(this._state === BEFORE_CDATA_1){ | ||
if(c === "C") this._state = BEFORE_CDATA_2; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeCdata1(c); | ||
} else if(this._state === BEFORE_CDATA_2){ | ||
if(c === "D") this._state = BEFORE_CDATA_3; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeCdata2(c); | ||
} else if(this._state === BEFORE_CDATA_3){ | ||
if(c === "A") this._state = BEFORE_CDATA_4; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeCdata3(c); | ||
} else if(this._state === BEFORE_CDATA_4){ | ||
if(c === "T") this._state = BEFORE_CDATA_5; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeCdata4(c); | ||
} else if(this._state === BEFORE_CDATA_5){ | ||
if(c === "A") this._state = BEFORE_CDATA_6; | ||
else this._state = IN_DECLARATION; | ||
this._stateBeforeCdata5(c); | ||
} else if(this._state === BEFORE_CDATA_6){ | ||
if(c === "["){ | ||
this._state = IN_CDATA; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
this._stateBeforeCdata6(c); | ||
} else if(this._state === IN_CDATA){ | ||
if(c === "]") this._state = AFTER_CDATA_1; | ||
this._stateInCdata(c); | ||
} else if(this._state === AFTER_CDATA_1){ | ||
if(c === "]") this._state = AFTER_CDATA_2; | ||
else this._state = IN_CDATA; | ||
this._stateAfterCdata1(c); | ||
} else if(this._state === AFTER_CDATA_2){ | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "]") { | ||
this._state = IN_CDATA; | ||
} | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
this._stateAfterCdata2(c); | ||
} | ||
@@ -346,17 +542,5 @@ | ||
else if(this._state === BEFORE_SPECIAL){ | ||
if(c === "c" || c === "C"){ | ||
this._state = BEFORE_SCRIPT_1; | ||
} else if(c === "t" || c === "T"){ | ||
this._state = BEFORE_STYLE_1; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeSpecial(c); | ||
} else if(this._state === BEFORE_SPECIAL_END){ | ||
if(this._special === 1 && (c === "c" || c === "C")){ | ||
this._state = AFTER_SCRIPT_1; | ||
} else if(this._special === 2 && (c === "t" || c === "T")){ | ||
this._state = AFTER_STYLE_1; | ||
} | ||
else this._state = TEXT; | ||
this._stateBeforeSpecialEnd(c); | ||
} | ||
@@ -368,64 +552,23 @@ | ||
else if(this._state === BEFORE_SCRIPT_1){ | ||
if(c === "r" || c === "R"){ | ||
this._state = BEFORE_SCRIPT_2; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeScript1(c); | ||
} else if(this._state === BEFORE_SCRIPT_2){ | ||
if(c === "i" || c === "I"){ | ||
this._state = BEFORE_SCRIPT_3; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeScript2(c); | ||
} else if(this._state === BEFORE_SCRIPT_3){ | ||
if(c === "p" || c === "P"){ | ||
this._state = BEFORE_SCRIPT_4; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeScript3(c); | ||
} else if(this._state === BEFORE_SCRIPT_4){ | ||
if(c === "t" || c === "T"){ | ||
this._state = BEFORE_SCRIPT_5; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeScript4(c); | ||
} else if(this._state === BEFORE_SCRIPT_5){ | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = 1; | ||
} | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
this._stateBeforeScript5(c); | ||
} | ||
else if(this._state === AFTER_SCRIPT_1){ | ||
if(c === "r" || c === "R"){ | ||
this._state = AFTER_SCRIPT_2; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterScript1(c); | ||
} else if(this._state === AFTER_SCRIPT_2){ | ||
if(c === "i" || c === "I"){ | ||
this._state = AFTER_SCRIPT_3; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterScript2(c); | ||
} else if(this._state === AFTER_SCRIPT_3){ | ||
if(c === "p" || c === "P"){ | ||
this._state = AFTER_SCRIPT_4; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterScript3(c); | ||
} else if(this._state === AFTER_SCRIPT_4){ | ||
if(c === "t" || c === "T"){ | ||
this._state = AFTER_SCRIPT_5; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterScript4(c); | ||
} else if(this._state === AFTER_SCRIPT_5){ | ||
if(c === ">" || whitespace(c)){ | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 6; | ||
continue; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterScript5(c); | ||
} | ||
@@ -437,57 +580,23 @@ | ||
else if(this._state === BEFORE_STYLE_1){ | ||
if(c === "y" || c === "Y"){ | ||
this._state = BEFORE_STYLE_2; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeStyle1(c); | ||
} else if(this._state === BEFORE_STYLE_2){ | ||
if(c === "l" || c === "L"){ | ||
this._state = BEFORE_STYLE_3; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeStyle2(c); | ||
} else if(this._state === BEFORE_STYLE_3){ | ||
if(c === "e" || c === "E"){ | ||
this._state = BEFORE_STYLE_4; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
} | ||
this._stateBeforeStyle3(c); | ||
} else if(this._state === BEFORE_STYLE_4){ | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = 2; | ||
} | ||
this._state = IN_TAG_NAME; | ||
continue; //consume the token again | ||
this._stateBeforeStyle4(c); | ||
} | ||
else if(this._state === AFTER_STYLE_1){ | ||
if(c === "y" || c === "Y"){ | ||
this._state = AFTER_STYLE_2; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterStyle1(c); | ||
} else if(this._state === AFTER_STYLE_2){ | ||
if(c === "l" || c === "L"){ | ||
this._state = AFTER_STYLE_3; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterStyle2(c); | ||
} else if(this._state === AFTER_STYLE_3){ | ||
if(c === "e" || c === "E"){ | ||
this._state = AFTER_STYLE_4; | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterStyle3(c); | ||
} else if(this._state === AFTER_STYLE_4){ | ||
if(c === ">" || whitespace(c)){ | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 5; | ||
continue; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
this._stateAfterStyle4(c); | ||
} | ||
else { | ||
this._cbs.onerror(Error("unknown state"), this._state); | ||
this._cbs.onerror(Error("unknown _state"), this._state); | ||
} | ||
@@ -498,25 +607,3 @@ | ||
//cleanup | ||
if(this._sectionStart === -1){ | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else { | ||
if(this._state === TEXT){ | ||
if(this._sectionStart !== this._index){ | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
} | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else if(this._sectionStart === this._index){ | ||
//the section just started | ||
this._buffer = ""; | ||
this._index = 0; | ||
} else if(this._sectionStart > 0){ | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
this._cleanup(); | ||
}; | ||
@@ -555,15 +642,12 @@ | ||
Tokenizer.prototype.reset = function(){ | ||
Tokenizer.call(this, this._options, this._cbs); | ||
Tokenizer.call(this, {xmlMode: this._xmlMode}, this._cbs); | ||
}; | ||
Tokenizer.prototype._emitToken = function(name){ | ||
this._cbs[name](this._buffer.substring(this._sectionStart, this._index)); | ||
this._sectionStart = -1; | ||
Tokenizer.prototype._getSection = function(){ | ||
return this._buffer.substring(this._sectionStart, this._index); | ||
}; | ||
Tokenizer.prototype._emitIfToken = function(name){ | ||
if(this._index > this._sectionStart){ | ||
this._cbs[name](this._buffer.substring(this._sectionStart, this._index)); | ||
} | ||
Tokenizer.prototype._emitToken = function(name){ | ||
this._cbs[name](this._getSection()); | ||
this._sectionStart = -1; | ||
}; |
{ | ||
"name": "htmlparser2", | ||
"description": "Fast & forgiving HTML/XML/RSS parser", | ||
"version": "3.1.5", | ||
"version": "3.1.6", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"], |
@@ -29,3 +29,3 @@ #htmlparser2 [![NPM version](https://badge.fury.io/js/htmlparser2.png)](https://npmjs.org/package/htmlparser2) [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2) [![Dependency Status](https://david-dm.org/fb55/htmlparser2.png)](https://david-dm.org/fb55/htmlparser2) | ||
}); | ||
parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';< / script>"); | ||
parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>"); | ||
parser.end(); | ||
@@ -32,0 +32,0 @@ ``` |
@@ -0,0 +0,0 @@ var multiply = function(text){ |
121970
49
5544