Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
10
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 3.10.0 to 3.10.1

88

lib/CollectingHandler.js
module.exports = CollectingHandler;
function CollectingHandler(cbs){
this._cbs = cbs || {};
this.events = [];
function CollectingHandler(cbs) {
this._cbs = cbs || {};
this.events = [];
}
var EVENTS = require("./").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
name = "on" + name;
CollectingHandler.prototype[name] = function(){
this.events.push([name]);
if(this._cbs[name]) this._cbs[name]();
};
} else if(EVENTS[name] === 1){
name = "on" + name;
CollectingHandler.prototype[name] = function(a){
this.events.push([name, a]);
if(this._cbs[name]) this._cbs[name](a);
};
} else if(EVENTS[name] === 2){
name = "on" + name;
CollectingHandler.prototype[name] = function(a, b){
this.events.push([name, a, b]);
if(this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
Object.keys(EVENTS).forEach(function(name) {
if (EVENTS[name] === 0) {
name = "on" + name;
CollectingHandler.prototype[name] = function() {
this.events.push([name]);
if (this._cbs[name]) this._cbs[name]();
};
} else if (EVENTS[name] === 1) {
name = "on" + name;
CollectingHandler.prototype[name] = function(a) {
this.events.push([name, a]);
if (this._cbs[name]) this._cbs[name](a);
};
} else if (EVENTS[name] === 2) {
name = "on" + name;
CollectingHandler.prototype[name] = function(a, b) {
this.events.push([name, a, b]);
if (this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});
CollectingHandler.prototype.onreset = function(){
this.events = [];
if(this._cbs.onreset) this._cbs.onreset();
CollectingHandler.prototype.onreset = function() {
this.events = [];
if (this._cbs.onreset) this._cbs.onreset();
};
CollectingHandler.prototype.restart = function(){
if(this._cbs.onreset) this._cbs.onreset();
CollectingHandler.prototype.restart = function() {
if (this._cbs.onreset) this._cbs.onreset();
for(var i = 0, len = this.events.length; i < len; i++){
if(this._cbs[this.events[i][0]]){
for (var i = 0, len = this.events.length; i < len; i++) {
if (this._cbs[this.events[i][0]]) {
var num = this.events[i].length;
var num = this.events[i].length;
if(num === 1){
this._cbs[this.events[i][0]]();
} else if(num === 2){
this._cbs[this.events[i][0]](this.events[i][1]);
} else {
this._cbs[this.events[i][0]](this.events[i][1], this.events[i][2]);
}
}
}
if (num === 1) {
this._cbs[this.events[i][0]]();
} else if (num === 2) {
this._cbs[this.events[i][0]](this.events[i][1]);
} else {
this._cbs[this.events[i][0]](
this.events[i][1],
this.events[i][2]
);
}
}
}
};

@@ -1,8 +0,7 @@

var index = require("./index.js");
var DomHandler = index.DomHandler;
var DomUtils = index.DomUtils;
var DomHandler = require("domhandler");
var DomUtils = require("domutils");
//TODO: make this a streamable handler
function FeedHandler(callback, options){
this.init(callback, options);
function FeedHandler(callback, options) {
this.init(callback, options);
}

@@ -14,83 +13,103 @@

function getElements(what, where){
return DomUtils.getElementsByTagName(what, where, true);
function getElements(what, where) {
return DomUtils.getElementsByTagName(what, where, true);
}
function getOneElement(what, where){
return DomUtils.getElementsByTagName(what, where, true, 1)[0];
function getOneElement(what, where) {
return DomUtils.getElementsByTagName(what, where, true, 1)[0];
}
function fetch(what, where, recurse){
return DomUtils.getText(
DomUtils.getElementsByTagName(what, where, recurse, 1)
).trim();
function fetch(what, where, recurse) {
return DomUtils.getText(
DomUtils.getElementsByTagName(what, where, recurse, 1)
).trim();
}
function addConditionally(obj, prop, what, where, recurse){
var tmp = fetch(what, where, recurse);
if(tmp) obj[prop] = tmp;
function addConditionally(obj, prop, what, where, recurse) {
var tmp = fetch(what, where, recurse);
if (tmp) obj[prop] = tmp;
}
var isValidFeed = function(value){
return value === "rss" || value === "feed" || value === "rdf:RDF";
var isValidFeed = function(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
};
FeedHandler.prototype.onend = function(){
var feed = {},
feedRoot = getOneElement(isValidFeed, this.dom),
tmp, childs;
FeedHandler.prototype.onend = function() {
var feed = {},
feedRoot = getOneElement(isValidFeed, this.dom),
tmp,
childs;
if(feedRoot){
if(feedRoot.name === "feed"){
childs = feedRoot.children;
if (feedRoot) {
if (feedRoot.name === "feed") {
childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
addConditionally(feed, "description", "subtitle", childs);
if((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "email", childs, true);
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
if (
(tmp = getOneElement("link", childs)) &&
(tmp = tmp.attribs) &&
(tmp = tmp.href)
)
feed.link = tmp;
addConditionally(feed, "description", "subtitle", childs);
if ((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function(item){
var entry = {}, tmp;
feed.items = getElements("entry", childs).map(function(item) {
var entry = {},
tmp;
item = item.children;
item = item.children;
addConditionally(entry, "id", "id", item);
addConditionally(entry, "title", "title", item);
if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
if((tmp = fetch("summary", item) || fetch("content", item))) entry.description = tmp;
if((tmp = fetch("updated", item))) entry.pubDate = new Date(tmp);
return entry;
});
} else {
childs = getOneElement("channel", feedRoot.children).children;
addConditionally(entry, "id", "id", item);
addConditionally(entry, "title", "title", item);
if (
(tmp = getOneElement("link", item)) &&
(tmp = tmp.attribs) &&
(tmp = tmp.href)
)
entry.link = tmp;
if ((tmp = fetch("summary", item) || fetch("content", item)))
entry.description = tmp;
if ((tmp = fetch("updated", item)))
entry.pubDate = new Date(tmp);
return entry;
});
} else {
childs = getOneElement("channel", feedRoot.children).children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
if((tmp = fetch("lastBuildDate", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "managingEditor", childs, true);
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
if ((tmp = fetch("lastBuildDate", childs)))
feed.updated = new Date(tmp);
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function(item){
var entry = {}, tmp;
feed.items = getElements("item", feedRoot.children).map(function(
item
) {
var entry = {},
tmp;
item = item.children;
item = item.children;
addConditionally(entry, "id", "guid", item);
addConditionally(entry, "title", "title", item);
addConditionally(entry, "link", "link", item);
addConditionally(entry, "description", "description", item);
if((tmp = fetch("pubDate", item))) entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this, feedRoot ? null : Error("couldn't find root of feed")
);
addConditionally(entry, "id", "guid", item);
addConditionally(entry, "title", "title", item);
addConditionally(entry, "link", "link", item);
addConditionally(entry, "description", "description", item);
if ((tmp = fetch("pubDate", item)))
entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this,
feedRoot ? null : Error("couldn't find root of feed")
);
};
module.exports = FeedHandler;
var Parser = require("./Parser.js");
var DomHandler = require("domhandler");
function defineProp(name, value){
delete module.exports[name];
module.exports[name] = value;
return value;
function defineProp(name, value) {
delete module.exports[name];
module.exports[name] = value;
return value;
}
module.exports = {
Parser: Parser,
Tokenizer: require("./Tokenizer.js"),
ElementType: require("domelementtype"),
DomHandler: DomHandler,
get FeedHandler(){
return defineProp("FeedHandler", require("./FeedHandler.js"));
},
get Stream(){
return defineProp("Stream", require("./Stream.js"));
},
get WritableStream(){
return defineProp("WritableStream", require("./WritableStream.js"));
},
get ProxyHandler(){
return defineProp("ProxyHandler", require("./ProxyHandler.js"));
},
get DomUtils(){
return defineProp("DomUtils", require("domutils"));
},
get CollectingHandler(){
return defineProp("CollectingHandler", require("./CollectingHandler.js"));
},
// For legacy support
DefaultHandler: DomHandler,
get RssHandler(){
return defineProp("RssHandler", this.FeedHandler);
},
//helper methods
parseDOM: function(data, options){
var handler = new DomHandler(options);
new Parser(handler, options).end(data);
return handler.dom;
},
parseFeed: function(feed, options){
var handler = new module.exports.FeedHandler(options);
new Parser(handler, options).end(feed);
return handler.dom;
},
createDomStream: function(cb, options, elementCb){
var handler = new DomHandler(cb, options, elementCb);
return new Parser(handler, options);
},
// List of all events that the parser emits
EVENTS: { /* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
Parser: Parser,
Tokenizer: require("./Tokenizer.js"),
ElementType: require("domelementtype"),
DomHandler: DomHandler,
get FeedHandler() {
return defineProp("FeedHandler", require("./FeedHandler.js"));
},
get Stream() {
return defineProp("Stream", require("./Stream.js"));
},
get WritableStream() {
return defineProp("WritableStream", require("./WritableStream.js"));
},
get ProxyHandler() {
return defineProp("ProxyHandler", require("./ProxyHandler.js"));
},
get DomUtils() {
return defineProp("DomUtils", require("domutils"));
},
get CollectingHandler() {
return defineProp(
"CollectingHandler",
require("./CollectingHandler.js")
);
},
// For legacy support
DefaultHandler: DomHandler,
get RssHandler() {
return defineProp("RssHandler", this.FeedHandler);
},
//helper methods
parseDOM: function(data, options) {
var handler = new DomHandler(options);
new Parser(handler, options).end(data);
return handler.dom;
},
parseFeed: function(feed, options) {
var handler = new module.exports.FeedHandler(options);
new Parser(handler, options).end(feed);
return handler.dom;
},
createDomStream: function(cb, options, elementCb) {
var handler = new DomHandler(cb, options, elementCb);
return new Parser(handler, options);
},
// List of all events that the parser emits
EVENTS: {
/* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
};

@@ -27,104 +27,106 @@ var Tokenizer = require("./Tokenizer.js");

var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
};
var openImpliesClose = {
tr : { tr:true, th:true, td:true },
th : { th:true },
td : { thead:true, th:true, td:true },
body : { head:true, link:true, script:true },
li : { li:true },
p : { p:true },
h1 : { p:true },
h2 : { p:true },
h3 : { p:true },
h4 : { p:true },
h5 : { p:true },
h6 : { p:true },
select : formTags,
input : formTags,
output : formTags,
button : formTags,
datalist: formTags,
textarea: formTags,
option : { option:true },
optgroup: { optgroup:true }
tr: { tr: true, th: true, td: true },
th: { th: true },
td: { thead: true, th: true, td: true },
body: { head: true, link: true, script: true },
li: { li: true },
p: { p: true },
h1: { p: true },
h2: { p: true },
h3: { p: true },
h4: { p: true },
h5: { p: true },
h6: { p: true },
select: formTags,
input: formTags,
output: formTags,
button: formTags,
datalist: formTags,
textarea: formTags,
option: { option: true },
optgroup: { optgroup: true }
};
var voidElements = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
command: true,
embed: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
keygen: true,
link: true,
meta: true,
param: true,
source: true,
track: true,
wbr: true,
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
command: true,
embed: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
keygen: true,
link: true,
meta: true,
param: true,
source: true,
track: true,
wbr: true
};
var foreignContextElements = {
__proto__: null,
math: true,
svg: true
}
__proto__: null,
math: true,
svg: true
};
var htmlIntegrationElements = {
__proto__: null,
mi: true,
mo: true,
mn: true,
ms: true,
mtext: true,
"annotation-xml": true,
foreignObject: true,
desc: true,
title: true
}
__proto__: null,
mi: true,
mo: true,
mn: true,
ms: true,
mtext: true,
"annotation-xml": true,
foreignObject: true,
desc: true,
title: true
};
var re_nameEnd = /\s|\//;
function Parser(cbs, options){
this._options = options || {};
this._cbs = cbs || {};
function Parser(cbs, options) {
this._options = options || {};
this._cbs = cbs || {};
this._tagname = "";
this._attribname = "";
this._attribvalue = "";
this._attribs = null;
this._stack = [];
this._foreignContext = [];
this._tagname = "";
this._attribname = "";
this._attribvalue = "";
this._attribs = null;
this._stack = [];
this._foreignContext = [];
this.startIndex = 0;
this.endIndex = null;
this.startIndex = 0;
this.endIndex = null;
this._lowerCaseTagNames = "lowerCaseTags" in this._options ?
!!this._options.lowerCaseTags :
!this._options.xmlMode;
this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?
!!this._options.lowerCaseAttributeNames :
!this._options.xmlMode;
this._lowerCaseTagNames =
"lowerCaseTags" in this._options
? !!this._options.lowerCaseTags
: !this._options.xmlMode;
this._lowerCaseAttributeNames =
"lowerCaseAttributeNames" in this._options
? !!this._options.lowerCaseAttributeNames
: !this._options.xmlMode;
if(this._options.Tokenizer) {
Tokenizer = this._options.Tokenizer;
}
this._tokenizer = new Tokenizer(this._options, this);
if (this._options.Tokenizer) {
Tokenizer = this._options.Tokenizer;
}
this._tokenizer = new Tokenizer(this._options, this);
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
}

@@ -134,230 +136,243 @@

Parser.prototype._updatePosition = function(initialOffset){
if(this.endIndex === null){
if(this._tokenizer._sectionStart <= initialOffset){
this.startIndex = 0;
} else {
this.startIndex = this._tokenizer._sectionStart - initialOffset;
}
}
else this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer.getAbsoluteIndex();
Parser.prototype._updatePosition = function(initialOffset) {
if (this.endIndex === null) {
if (this._tokenizer._sectionStart <= initialOffset) {
this.startIndex = 0;
} else {
this.startIndex = this._tokenizer._sectionStart - initialOffset;
}
} else this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer.getAbsoluteIndex();
};
//Tokenizer event handlers
Parser.prototype.ontext = function(data){
this._updatePosition(1);
this.endIndex--;
Parser.prototype.ontext = function(data) {
this._updatePosition(1);
this.endIndex--;
if(this._cbs.ontext) this._cbs.ontext(data);
if (this._cbs.ontext) this._cbs.ontext(data);
};
Parser.prototype.onopentagname = function(name){
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
Parser.prototype.onopentagname = function(name) {
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
this._tagname = name;
this._tagname = name;
if(!this._options.xmlMode && name in openImpliesClose) {
for(
var el;
(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];
this.onclosetag(el)
);
}
if (!this._options.xmlMode && name in openImpliesClose) {
for (
var el;
(el = this._stack[this._stack.length - 1]) in
openImpliesClose[name];
this.onclosetag(el)
);
}
if(this._options.xmlMode || !(name in voidElements)){
this._stack.push(name);
if(name in foreignContextElements) this._foreignContext.push(true);
else if(name in htmlIntegrationElements) this._foreignContext.push(false);
}
if (this._options.xmlMode || !(name in voidElements)) {
this._stack.push(name);
if (name in foreignContextElements) this._foreignContext.push(true);
else if (name in htmlIntegrationElements)
this._foreignContext.push(false);
}
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
if(this._cbs.onopentag) this._attribs = {};
if (this._cbs.onopentagname) this._cbs.onopentagname(name);
if (this._cbs.onopentag) this._attribs = {};
};
Parser.prototype.onopentagend = function(){
this._updatePosition(1);
Parser.prototype.onopentagend = function() {
this._updatePosition(1);
if(this._attribs){
if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
this._attribs = null;
}
if (this._attribs) {
if (this._cbs.onopentag)
this._cbs.onopentag(this._tagname, this._attribs);
this._attribs = null;
}
if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
this._cbs.onclosetag(this._tagname);
}
if (
!this._options.xmlMode &&
this._cbs.onclosetag &&
this._tagname in voidElements
) {
this._cbs.onclosetag(this._tagname);
}
this._tagname = "";
this._tagname = "";
};
Parser.prototype.onclosetag = function(name){
this._updatePosition(1);
Parser.prototype.onclosetag = function(name) {
this._updatePosition(1);
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
if (name in foreignContextElements || name in htmlIntegrationElements) {
this._foreignContext.pop();
}
if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
var pos = this._stack.lastIndexOf(name);
if(pos !== -1){
if(this._cbs.onclosetag){
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.length = pos;
} else if(name === "p" && !this._options.xmlMode){
this.onopentagname(name);
this._closeCurrentTag();
}
} else if(!this._options.xmlMode && (name === "br" || name === "p")){
this.onopentagname(name);
this._closeCurrentTag();
}
if (
this._stack.length &&
(!(name in voidElements) || this._options.xmlMode)
) {
var pos = this._stack.lastIndexOf(name);
if (pos !== -1) {
if (this._cbs.onclosetag) {
pos = this._stack.length - pos;
while (pos--) this._cbs.onclosetag(this._stack.pop());
} else this._stack.length = pos;
} else if (name === "p" && !this._options.xmlMode) {
this.onopentagname(name);
this._closeCurrentTag();
}
} else if (!this._options.xmlMode && (name === "br" || name === "p")) {
this.onopentagname(name);
this._closeCurrentTag();
}
};
Parser.prototype.onselfclosingtag = function(){
if(this._options.xmlMode || this._options.recognizeSelfClosing
|| this._foreignContext[this._foreignContext.length - 1]){
this._closeCurrentTag();
} else {
this.onopentagend();
}
Parser.prototype.onselfclosingtag = function() {
if (
this._options.xmlMode ||
this._options.recognizeSelfClosing ||
this._foreignContext[this._foreignContext.length - 1]
) {
this._closeCurrentTag();
} else {
this.onopentagend();
}
};
Parser.prototype._closeCurrentTag = function(){
var name = this._tagname;
Parser.prototype._closeCurrentTag = function() {
var name = this._tagname;
this.onopentagend();
this.onopentagend();
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if(this._stack[this._stack.length - 1] === name){
if(this._cbs.onclosetag){
this._cbs.onclosetag(name);
}
this._stack.pop();
if((name in foreignContextElements) || (name in htmlIntegrationElements)){
this._foreignContext.pop();
}
}
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if (this._stack[this._stack.length - 1] === name) {
if (this._cbs.onclosetag) {
this._cbs.onclosetag(name);
}
this._stack.pop();
}
};
Parser.prototype.onattribname = function(name){
if(this._lowerCaseAttributeNames){
name = name.toLowerCase();
}
this._attribname = name;
Parser.prototype.onattribname = function(name) {
if (this._lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this._attribname = name;
};
Parser.prototype.onattribdata = function(value){
this._attribvalue += value;
Parser.prototype.onattribdata = function(value) {
this._attribvalue += value;
};
Parser.prototype.onattribend = function(){
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
if(
this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
){
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
Parser.prototype.onattribend = function() {
if (this._cbs.onattribute)
this._cbs.onattribute(this._attribname, this._attribvalue);
if (
this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
) {
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
};
Parser.prototype._getInstructionName = function(value){
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
Parser.prototype._getInstructionName = function(value) {
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
return name;
return name;
};
Parser.prototype.ondeclaration = function(value){
if(this._cbs.onprocessinginstruction){
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("!" + name, "!" + value);
}
Parser.prototype.ondeclaration = function(value) {
if (this._cbs.onprocessinginstruction) {
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("!" + name, "!" + value);
}
};
Parser.prototype.onprocessinginstruction = function(value){
if(this._cbs.onprocessinginstruction){
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("?" + name, "?" + value);
}
Parser.prototype.onprocessinginstruction = function(value) {
if (this._cbs.onprocessinginstruction) {
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("?" + name, "?" + value);
}
};
Parser.prototype.oncomment = function(value){
this._updatePosition(4);
Parser.prototype.oncomment = function(value) {
this._updatePosition(4);
if(this._cbs.oncomment) this._cbs.oncomment(value);
if(this._cbs.oncommentend) this._cbs.oncommentend();
if (this._cbs.oncomment) this._cbs.oncomment(value);
if (this._cbs.oncommentend) this._cbs.oncommentend();
};
Parser.prototype.oncdata = function(value){
this._updatePosition(1);
Parser.prototype.oncdata = function(value) {
this._updatePosition(1);
if(this._options.xmlMode || this._options.recognizeCDATA){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._cbs.ontext) this._cbs.ontext(value);
if(this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
}
if (this._options.xmlMode || this._options.recognizeCDATA) {
if (this._cbs.oncdatastart) this._cbs.oncdatastart();
if (this._cbs.ontext) this._cbs.ontext(value);
if (this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
}
};
Parser.prototype.onerror = function(err){
if(this._cbs.onerror) this._cbs.onerror(err);
Parser.prototype.onerror = function(err) {
if (this._cbs.onerror) this._cbs.onerror(err);
};
Parser.prototype.onend = function(){
if(this._cbs.onclosetag){
for(
var i = this._stack.length;
i > 0;
this._cbs.onclosetag(this._stack[--i])
);
}
if(this._cbs.onend) this._cbs.onend();
Parser.prototype.onend = function() {
if (this._cbs.onclosetag) {
for (
var i = this._stack.length;
i > 0;
this._cbs.onclosetag(this._stack[--i])
);
}
if (this._cbs.onend) this._cbs.onend();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function(){
if(this._cbs.onreset) this._cbs.onreset();
this._tokenizer.reset();
Parser.prototype.reset = function() {
if (this._cbs.onreset) this._cbs.onreset();
this._tokenizer.reset();
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
};
//Parses a complete HTML document and pushes it to the handler
Parser.prototype.parseComplete = function(data){
this.reset();
this.end(data);
Parser.prototype.parseComplete = function(data) {
this.reset();
this.end(data);
};
Parser.prototype.write = function(chunk){
this._tokenizer.write(chunk);
Parser.prototype.write = function(chunk) {
this._tokenizer.write(chunk);
};
Parser.prototype.end = function(chunk){
this._tokenizer.end(chunk);
Parser.prototype.end = function(chunk) {
this._tokenizer.end(chunk);
};
Parser.prototype.pause = function(){
this._tokenizer.pause();
Parser.prototype.pause = function() {
this._tokenizer.pause();
};
Parser.prototype.resume = function(){
this._tokenizer.resume();
Parser.prototype.resume = function() {
this._tokenizer.resume();
};

@@ -364,0 +379,0 @@

module.exports = ProxyHandler;
function ProxyHandler(cbs){
this._cbs = cbs || {};
function ProxyHandler(cbs) {
this._cbs = cbs || {};
}
var EVENTS = require("./").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
name = "on" + name;
ProxyHandler.prototype[name] = function(){
if(this._cbs[name]) this._cbs[name]();
};
} else if(EVENTS[name] === 1){
name = "on" + name;
ProxyHandler.prototype[name] = function(a){
if(this._cbs[name]) this._cbs[name](a);
};
} else if(EVENTS[name] === 2){
name = "on" + name;
ProxyHandler.prototype[name] = function(a, b){
if(this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});
Object.keys(EVENTS).forEach(function(name) {
if (EVENTS[name] === 0) {
name = "on" + name;
ProxyHandler.prototype[name] = function() {
if (this._cbs[name]) this._cbs[name]();
};
} else if (EVENTS[name] === 1) {
name = "on" + name;
ProxyHandler.prototype[name] = function(a) {
if (this._cbs[name]) this._cbs[name](a);
};
} else if (EVENTS[name] === 2) {
name = "on" + name;
ProxyHandler.prototype[name] = function(a, b) {
if (this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});

@@ -5,4 +5,4 @@ module.exports = Stream;

function Stream(options){
Parser.call(this, new Cbs(this), options);
function Stream(options) {
Parser.call(this, new Cbs(this), options);
}

@@ -14,4 +14,4 @@

function Cbs(scope){
this.scope = scope;
function Cbs(scope) {
this.scope = scope;
}

@@ -21,18 +21,18 @@

Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
Cbs.prototype["on" + name] = function(){
this.scope.emit(name);
};
} else if(EVENTS[name] === 1){
Cbs.prototype["on" + name] = function(a){
this.scope.emit(name, a);
};
} else if(EVENTS[name] === 2){
Cbs.prototype["on" + name] = function(a, b){
this.scope.emit(name, a, b);
};
} else {
throw Error("wrong number of arguments!");
}
});
Object.keys(EVENTS).forEach(function(name) {
if (EVENTS[name] === 0) {
Cbs.prototype["on" + name] = function() {
this.scope.emit(name);
};
} else if (EVENTS[name] === 1) {
Cbs.prototype["on" + name] = function(a) {
this.scope.emit(name, a);
};
} else if (EVENTS[name] === 2) {
Cbs.prototype["on" + name] = function(a, b) {
this.scope.emit(name, a, b);
};
} else {
throw Error("wrong number of arguments!");
}
});

@@ -6,26 +6,26 @@ module.exports = Tokenizer;

var legacyMap = require("entities/maps/legacy.json");
var xmlMap = require("entities/maps/xml.json");
var xmlMap = require("entities/maps/xml.json");
var i = 0;
var TEXT = i++;
var BEFORE_TAG_NAME = i++; //after <
var IN_TAG_NAME = i++;
var IN_SELF_CLOSING_TAG = i++;
var BEFORE_CLOSING_TAG_NAME = i++;
var IN_CLOSING_TAG_NAME = i++;
var AFTER_CLOSING_TAG_NAME = i++;
var TEXT = i++;
var BEFORE_TAG_NAME = i++; //after <
var IN_TAG_NAME = i++;
var IN_SELF_CLOSING_TAG = i++;
var BEFORE_CLOSING_TAG_NAME = i++;
var IN_CLOSING_TAG_NAME = i++;
var AFTER_CLOSING_TAG_NAME = i++;
//attributes
var BEFORE_ATTRIBUTE_NAME = i++;
var IN_ATTRIBUTE_NAME = i++;
var AFTER_ATTRIBUTE_NAME = i++;
var BEFORE_ATTRIBUTE_VALUE = i++;
var IN_ATTRIBUTE_VALUE_DQ = i++; // "
var IN_ATTRIBUTE_VALUE_SQ = i++; // '
var IN_ATTRIBUTE_VALUE_NQ = i++;
var BEFORE_ATTRIBUTE_NAME = i++;
var IN_ATTRIBUTE_NAME = i++;
var AFTER_ATTRIBUTE_NAME = i++;
var BEFORE_ATTRIBUTE_VALUE = i++;
var IN_ATTRIBUTE_VALUE_DQ = i++; // "
var IN_ATTRIBUTE_VALUE_SQ = i++; // '
var IN_ATTRIBUTE_VALUE_NQ = i++;
//declarations
var BEFORE_DECLARATION = i++; // !
var IN_DECLARATION = i++;
var BEFORE_DECLARATION = i++; // !
var IN_DECLARATION = i++;

@@ -36,413 +36,457 @@ //processing instructions

//comments
var BEFORE_COMMENT = i++;
var IN_COMMENT = i++;
var AFTER_COMMENT_1 = i++;
var AFTER_COMMENT_2 = i++;
var BEFORE_COMMENT = i++;
var IN_COMMENT = i++;
var AFTER_COMMENT_1 = i++;
var AFTER_COMMENT_2 = i++;
//cdata
var BEFORE_CDATA_1 = i++; // [
var BEFORE_CDATA_2 = i++; // C
var BEFORE_CDATA_3 = i++; // D
var BEFORE_CDATA_4 = i++; // A
var BEFORE_CDATA_5 = i++; // T
var BEFORE_CDATA_6 = i++; // A
var IN_CDATA = i++; // [
var AFTER_CDATA_1 = i++; // ]
var AFTER_CDATA_2 = i++; // ]
var BEFORE_CDATA_1 = i++; // [
var BEFORE_CDATA_2 = i++; // C
var BEFORE_CDATA_3 = i++; // D
var BEFORE_CDATA_4 = i++; // A
var BEFORE_CDATA_5 = i++; // T
var BEFORE_CDATA_6 = i++; // A
var IN_CDATA = i++; // [
var AFTER_CDATA_1 = i++; // ]
var AFTER_CDATA_2 = i++; // ]
//special tags
var BEFORE_SPECIAL = i++; //S
var BEFORE_SPECIAL_END = i++; //S
var BEFORE_SPECIAL = i++; //S
var BEFORE_SPECIAL_END = i++; //S
var BEFORE_SCRIPT_1 = i++; //C
var BEFORE_SCRIPT_2 = i++; //R
var BEFORE_SCRIPT_3 = i++; //I
var BEFORE_SCRIPT_4 = i++; //P
var BEFORE_SCRIPT_5 = i++; //T
var AFTER_SCRIPT_1 = i++; //C
var AFTER_SCRIPT_2 = i++; //R
var AFTER_SCRIPT_3 = i++; //I
var AFTER_SCRIPT_4 = i++; //P
var AFTER_SCRIPT_5 = i++; //T
var BEFORE_SCRIPT_1 = i++; //C
var BEFORE_SCRIPT_2 = i++; //R
var BEFORE_SCRIPT_3 = i++; //I
var BEFORE_SCRIPT_4 = i++; //P
var BEFORE_SCRIPT_5 = i++; //T
var AFTER_SCRIPT_1 = i++; //C
var AFTER_SCRIPT_2 = i++; //R
var AFTER_SCRIPT_3 = i++; //I
var AFTER_SCRIPT_4 = i++; //P
var AFTER_SCRIPT_5 = i++; //T
var BEFORE_STYLE_1 = i++; //T
var BEFORE_STYLE_2 = i++; //Y
var BEFORE_STYLE_3 = i++; //L
var BEFORE_STYLE_4 = i++; //E
var AFTER_STYLE_1 = i++; //T
var AFTER_STYLE_2 = i++; //Y
var AFTER_STYLE_3 = i++; //L
var AFTER_STYLE_4 = i++; //E
var BEFORE_STYLE_1 = i++; //T
var BEFORE_STYLE_2 = i++; //Y
var BEFORE_STYLE_3 = i++; //L
var BEFORE_STYLE_4 = i++; //E
var AFTER_STYLE_1 = i++; //T
var AFTER_STYLE_2 = i++; //Y
var AFTER_STYLE_3 = i++; //L
var AFTER_STYLE_4 = i++; //E
var BEFORE_ENTITY = i++; //&
var BEFORE_NUMERIC_ENTITY = i++; //#
var IN_NAMED_ENTITY = i++;
var IN_NUMERIC_ENTITY = i++;
var IN_HEX_ENTITY = i++; //X
var BEFORE_ENTITY = i++; //&
var BEFORE_NUMERIC_ENTITY = i++; //#
var IN_NAMED_ENTITY = i++;
var IN_NUMERIC_ENTITY = i++;
var IN_HEX_ENTITY = i++; //X
var j = 0;
var SPECIAL_NONE = j++;
var SPECIAL_SCRIPT = j++;
var SPECIAL_STYLE = j++;
var SPECIAL_NONE = j++;
var SPECIAL_SCRIPT = j++;
var SPECIAL_STYLE = j++;
function whitespace(c){
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
function whitespace(c) {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}
function ifElseState(upper, SUCCESS, FAILURE){
var lower = upper.toLowerCase();
function ifElseState(upper, SUCCESS, FAILURE) {
var lower = upper.toLowerCase();
if(upper === lower){
return function(c){
if(c === lower){
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
} else {
return function(c){
if(c === lower || c === upper){
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
}
if (upper === lower) {
return function(c) {
if (c === lower) {
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
} else {
return function(c) {
if (c === lower || c === upper) {
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
}
}
function consumeSpecialNameChar(upper, NEXT_STATE){
var lower = upper.toLowerCase();
function consumeSpecialNameChar(upper, NEXT_STATE) {
var lower = upper.toLowerCase();
return function(c){
if(c === lower || c === upper){
this._state = NEXT_STATE;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
return function(c) {
if (c === lower || c === upper) {
this._state = NEXT_STATE;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
}
function Tokenizer(options, cbs){
this._state = TEXT;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
this._baseState = TEXT;
this._special = SPECIAL_NONE;
this._cbs = cbs;
this._running = true;
this._ended = false;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
function Tokenizer(options, cbs) {
this._state = TEXT;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
this._baseState = TEXT;
this._special = SPECIAL_NONE;
this._cbs = cbs;
this._running = true;
this._ended = false;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
}
Tokenizer.prototype._stateText = function(c){
if(c === "<"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateText = function(c) {
if (c === "<") {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if (
this._decodeEntities &&
this._special === SPECIAL_NONE &&
c === "&"
) {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeTagName = function(c){
if(c === "/"){
this._state = BEFORE_CLOSING_TAG_NAME;
} else if(c === "<"){
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
this._state = TEXT;
} else if(c === "!"){
this._state = BEFORE_DECLARATION;
this._sectionStart = this._index + 1;
} else if(c === "?"){
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else {
this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
BEFORE_SPECIAL : IN_TAG_NAME;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateBeforeTagName = function(c) {
if (c === "/") {
this._state = BEFORE_CLOSING_TAG_NAME;
} else if (c === "<") {
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
} else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
this._state = TEXT;
} else if (c === "!") {
this._state = BEFORE_DECLARATION;
this._sectionStart = this._index + 1;
} else if (c === "?") {
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else {
this._state =
!this._xmlMode && (c === "s" || c === "S")
? BEFORE_SPECIAL
: IN_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInTagName = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
Tokenizer.prototype._stateInTagName = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
if(whitespace(c));
else if(c === ">"){
this._state = TEXT;
} else if(this._special !== SPECIAL_NONE){
if(c === "s" || c === "S"){
this._state = BEFORE_SPECIAL_END;
} else {
this._state = TEXT;
this._index--;
}
} else {
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateBeforeCloseingTagName = function(c) {
if (whitespace(c));
else if (c === ">") {
this._state = TEXT;
} else if (this._special !== SPECIAL_NONE) {
if (c === "s" || c === "S") {
this._state = BEFORE_SPECIAL_END;
} else {
this._state = TEXT;
this._index--;
}
} else {
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInCloseingTagName = function(c){
if(c === ">" || whitespace(c)){
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
}
Tokenizer.prototype._stateInCloseingTagName = function(c) {
if (c === ">" || whitespace(c)) {
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterCloseingTagName = function(c){
//skip everything until ">"
if(c === ">"){
this._state = TEXT;
this._sectionStart = this._index + 1;
}
Tokenizer.prototype._stateAfterCloseingTagName = function(c) {
//skip everything until ">"
if (c === ">") {
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeAttributeName = function(c){
if(c === ">"){
this._cbs.onopentagend();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c === "/"){
this._state = IN_SELF_CLOSING_TAG;
} else if(!whitespace(c)){
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateBeforeAttributeName = function(c) {
if (c === ">") {
this._cbs.onopentagend();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c === "/") {
this._state = IN_SELF_CLOSING_TAG;
} else if (!whitespace(c)) {
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInSelfClosingTag = function(c){
if(c === ">"){
this._cbs.onselfclosingtag();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(!whitespace(c)){
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
Tokenizer.prototype._stateInSelfClosingTag = function(c) {
if (c === ">") {
this._cbs.onselfclosingtag();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateInAttributeName = function(c){
if(c === "=" || c === "/" || c === ">" || whitespace(c)){
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = AFTER_ATTRIBUTE_NAME;
this._index--;
}
Tokenizer.prototype._stateInAttributeName = function(c) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = AFTER_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterAttributeName = function(c){
if(c === "="){
this._state = BEFORE_ATTRIBUTE_VALUE;
} else if(c === "/" || c === ">"){
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if(!whitespace(c)){
this._cbs.onattribend();
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateAfterAttributeName = function(c) {
if (c === "=") {
this._state = BEFORE_ATTRIBUTE_VALUE;
} else if (c === "/" || c === ">") {
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if (!whitespace(c)) {
this._cbs.onattribend();
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeAttributeValue = function(c){
if(c === "\""){
this._state = IN_ATTRIBUTE_VALUE_DQ;
this._sectionStart = this._index + 1;
} else if(c === "'"){
this._state = IN_ATTRIBUTE_VALUE_SQ;
this._sectionStart = this._index + 1;
} else if(!whitespace(c)){
this._state = IN_ATTRIBUTE_VALUE_NQ;
this._sectionStart = this._index;
this._index--; //reconsume token
}
Tokenizer.prototype._stateBeforeAttributeValue = function(c) {
if (c === '"') {
this._state = IN_ATTRIBUTE_VALUE_DQ;
this._sectionStart = this._index + 1;
} else if (c === "'") {
this._state = IN_ATTRIBUTE_VALUE_SQ;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = IN_ATTRIBUTE_VALUE_NQ;
this._sectionStart = this._index;
this._index--; //reconsume token
}
};
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){
if(c === "\""){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c) {
if (c === '"') {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){
if(c === "'"){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c) {
if (c === "'") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){
if(whitespace(c) || c === ">"){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c) {
if (whitespace(c) || c === ">") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeDeclaration = function(c){
this._state = c === "[" ? BEFORE_CDATA_1 :
c === "-" ? BEFORE_COMMENT :
IN_DECLARATION;
Tokenizer.prototype._stateBeforeDeclaration = function(c) {
this._state =
c === "["
? BEFORE_CDATA_1
: c === "-"
? BEFORE_COMMENT
: IN_DECLARATION;
};
Tokenizer.prototype._stateInDeclaration = function(c){
if(c === ">"){
this._cbs.ondeclaration(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
Tokenizer.prototype._stateInDeclaration = function(c) {
if (c === ">") {
this._cbs.ondeclaration(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateInProcessingInstruction = function(c){
if(c === ">"){
this._cbs.onprocessinginstruction(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
Tokenizer.prototype._stateInProcessingInstruction = function(c) {
if (c === ">") {
this._cbs.onprocessinginstruction(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeComment = function(c){
if(c === "-"){
this._state = IN_COMMENT;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
}
Tokenizer.prototype._stateBeforeComment = function(c) {
if (c === "-") {
this._state = IN_COMMENT;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
}
};
Tokenizer.prototype._stateInComment = function(c){
if(c === "-") this._state = AFTER_COMMENT_1;
Tokenizer.prototype._stateInComment = function(c) {
if (c === "-") this._state = AFTER_COMMENT_1;
};
Tokenizer.prototype._stateAfterComment1 = function(c){
if(c === "-"){
this._state = AFTER_COMMENT_2;
} else {
this._state = IN_COMMENT;
}
Tokenizer.prototype._stateAfterComment1 = function(c) {
if (c === "-") {
this._state = AFTER_COMMENT_2;
} else {
this._state = IN_COMMENT;
}
};
Tokenizer.prototype._stateAfterComment2 = function(c){
if(c === ">"){
//remove 2 trailing chars
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c !== "-"){
this._state = IN_COMMENT;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
Tokenizer.prototype._stateAfterComment2 = function(c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncomment(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c !== "-") {
this._state = IN_COMMENT;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
};
Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata1 = ifElseState(
"C",
BEFORE_CDATA_2,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata2 = ifElseState(
"D",
BEFORE_CDATA_3,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata3 = ifElseState(
"A",
BEFORE_CDATA_4,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata4 = ifElseState(
"T",
BEFORE_CDATA_5,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata5 = ifElseState(
"A",
BEFORE_CDATA_6,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata6 = function(c){
if(c === "["){
this._state = IN_CDATA;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
this._index--;
}
Tokenizer.prototype._stateBeforeCdata6 = function(c) {
if (c === "[") {
this._state = IN_CDATA;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
this._index--;
}
};
Tokenizer.prototype._stateInCdata = function(c){
if(c === "]") this._state = AFTER_CDATA_1;
Tokenizer.prototype._stateInCdata = function(c) {
if (c === "]") this._state = AFTER_CDATA_1;
};
Tokenizer.prototype._stateAfterCdata1 = function(c){
if(c === "]") this._state = AFTER_CDATA_2;
else this._state = IN_CDATA;
Tokenizer.prototype._stateAfterCdata1 = function(c) {
if (c === "]") this._state = AFTER_CDATA_2;
else this._state = IN_CDATA;
};
Tokenizer.prototype._stateAfterCdata2 = function(c){
if(c === ">"){
//remove 2 trailing chars
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c !== "]") {
this._state = IN_CDATA;
}
//else: stay in AFTER_CDATA_2 (`]]]>`)
Tokenizer.prototype._stateAfterCdata2 = function(c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncdata(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c !== "]") {
this._state = IN_CDATA;
}
//else: stay in AFTER_CDATA_2 (`]]]>`)
};
Tokenizer.prototype._stateBeforeSpecial = function(c){
if(c === "c" || c === "C"){
this._state = BEFORE_SCRIPT_1;
} else if(c === "t" || c === "T"){
this._state = BEFORE_STYLE_1;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
Tokenizer.prototype._stateBeforeSpecial = function(c) {
if (c === "c" || c === "C") {
this._state = BEFORE_SCRIPT_1;
} else if (c === "t" || c === "T") {
this._state = BEFORE_STYLE_1;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
Tokenizer.prototype._stateBeforeSpecialEnd = function(c){
if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
this._state = AFTER_SCRIPT_1;
} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
this._state = AFTER_STYLE_1;
}
else this._state = TEXT;
Tokenizer.prototype._stateBeforeSpecialEnd = function(c) {
if (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) {
this._state = AFTER_SCRIPT_1;
} else if (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) {
this._state = AFTER_STYLE_1;
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2);
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3);
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4);
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5);
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar(
"R",
BEFORE_SCRIPT_2
);
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar(
"I",
BEFORE_SCRIPT_3
);
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar(
"P",
BEFORE_SCRIPT_4
);
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar(
"T",
BEFORE_SCRIPT_5
);
Tokenizer.prototype._stateBeforeScript5 = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._special = SPECIAL_SCRIPT;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
Tokenizer.prototype._stateBeforeScript5 = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = SPECIAL_SCRIPT;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};

@@ -455,22 +499,30 @@

Tokenizer.prototype._stateAfterScript5 = function(c){
if(c === ">" || whitespace(c)){
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
}
else this._state = TEXT;
Tokenizer.prototype._stateAfterScript5 = function(c) {
if (c === ">" || whitespace(c)) {
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2);
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3);
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4);
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar(
"Y",
BEFORE_STYLE_2
);
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar(
"L",
BEFORE_STYLE_3
);
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar(
"E",
BEFORE_STYLE_4
);
Tokenizer.prototype._stateBeforeStyle4 = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._special = SPECIAL_STYLE;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
Tokenizer.prototype._stateBeforeStyle4 = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = SPECIAL_STYLE;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};

@@ -482,426 +534,441 @@

Tokenizer.prototype._stateAfterStyle4 = function(c){
if(c === ">" || whitespace(c)){
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
}
else this._state = TEXT;
Tokenizer.prototype._stateAfterStyle4 = function(c) {
if (c === ">" || whitespace(c)) {
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY);
Tokenizer.prototype._stateBeforeEntity = ifElseState(
"#",
BEFORE_NUMERIC_ENTITY,
IN_NAMED_ENTITY
);
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState(
"X",
IN_HEX_ENTITY,
IN_NUMERIC_ENTITY
);
//for entities terminated with a semicolon
Tokenizer.prototype._parseNamedEntityStrict = function(){
//offset = 1
if(this._sectionStart + 1 < this._index){
var entity = this._buffer.substring(this._sectionStart + 1, this._index),
map = this._xmlMode ? xmlMap : entityMap;
Tokenizer.prototype._parseNamedEntityStrict = function() {
//offset = 1
if (this._sectionStart + 1 < this._index) {
var entity = this._buffer.substring(
this._sectionStart + 1,
this._index
),
map = this._xmlMode ? xmlMap : entityMap;
if(map.hasOwnProperty(entity)){
this._emitPartial(map[entity]);
this._sectionStart = this._index + 1;
}
}
if (map.hasOwnProperty(entity)) {
this._emitPartial(map[entity]);
this._sectionStart = this._index + 1;
}
}
};
//parses legacy entities (without trailing semicolon)
Tokenizer.prototype._parseLegacyEntity = function(){
var start = this._sectionStart + 1,
limit = this._index - start;
Tokenizer.prototype._parseLegacyEntity = function() {
var start = this._sectionStart + 1,
limit = this._index - start;
if(limit > 6) limit = 6; //the max length of legacy entities is 6
if (limit > 6) limit = 6; //the max length of legacy entities is 6
while(limit >= 2){ //the min length of legacy entities is 2
var entity = this._buffer.substr(start, limit);
while (limit >= 2) {
//the min length of legacy entities is 2
var entity = this._buffer.substr(start, limit);
if(legacyMap.hasOwnProperty(entity)){
this._emitPartial(legacyMap[entity]);
this._sectionStart += limit + 1;
return;
} else {
limit--;
}
}
if (legacyMap.hasOwnProperty(entity)) {
this._emitPartial(legacyMap[entity]);
this._sectionStart += limit + 1;
return;
} else {
limit--;
}
}
};
Tokenizer.prototype._stateInNamedEntity = function(c){
if(c === ";"){
this._parseNamedEntityStrict();
if(this._sectionStart + 1 < this._index && !this._xmlMode){
this._parseLegacyEntity();
}
this._state = this._baseState;
} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
if(this._xmlMode);
else if(this._sectionStart + 1 === this._index);
else if(this._baseState !== TEXT){
if(c !== "="){
this._parseNamedEntityStrict();
}
} else {
this._parseLegacyEntity();
}
Tokenizer.prototype._stateInNamedEntity = function(c) {
if (c === ";") {
this._parseNamedEntityStrict();
if (this._sectionStart + 1 < this._index && !this._xmlMode) {
this._parseLegacyEntity();
}
this._state = this._baseState;
} else if (
(c < "a" || c > "z") &&
(c < "A" || c > "Z") &&
(c < "0" || c > "9")
) {
if (this._xmlMode);
else if (this._sectionStart + 1 === this._index);
else if (this._baseState !== TEXT) {
if (c !== "=") {
this._parseNamedEntityStrict();
}
} else {
this._parseLegacyEntity();
}
this._state = this._baseState;
this._index--;
}
this._state = this._baseState;
this._index--;
}
};
Tokenizer.prototype._decodeNumericEntity = function(offset, base){
var sectionStart = this._sectionStart + offset;
Tokenizer.prototype._decodeNumericEntity = function(offset, base) {
var sectionStart = this._sectionStart + offset;
if(sectionStart !== this._index){
//parse entity
var entity = this._buffer.substring(sectionStart, this._index);
var parsed = parseInt(entity, base);
if (sectionStart !== this._index) {
//parse entity
var entity = this._buffer.substring(sectionStart, this._index);
var parsed = parseInt(entity, base);
this._emitPartial(decodeCodePoint(parsed));
this._sectionStart = this._index;
} else {
this._sectionStart--;
}
this._emitPartial(decodeCodePoint(parsed));
this._sectionStart = this._index;
} else {
this._sectionStart--;
}
this._state = this._baseState;
this._state = this._baseState;
};
Tokenizer.prototype._stateInNumericEntity = function(c){
if(c === ";"){
this._decodeNumericEntity(2, 10);
this._sectionStart++;
} else if(c < "0" || c > "9"){
if(!this._xmlMode){
this._decodeNumericEntity(2, 10);
} else {
this._state = this._baseState;
}
this._index--;
}
Tokenizer.prototype._stateInNumericEntity = function(c) {
if (c === ";") {
this._decodeNumericEntity(2, 10);
this._sectionStart++;
} else if (c < "0" || c > "9") {
if (!this._xmlMode) {
this._decodeNumericEntity(2, 10);
} else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._stateInHexEntity = function(c){
if(c === ";"){
this._decodeNumericEntity(3, 16);
this._sectionStart++;
} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){
if(!this._xmlMode){
this._decodeNumericEntity(3, 16);
} else {
this._state = this._baseState;
}
this._index--;
}
Tokenizer.prototype._stateInHexEntity = function(c) {
if (c === ";") {
this._decodeNumericEntity(3, 16);
this._sectionStart++;
} else if (
(c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")
) {
if (!this._xmlMode) {
this._decodeNumericEntity(3, 16);
} else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._cleanup = function (){
if(this._sectionStart < 0){
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if(this._running){
if(this._state === TEXT){
if(this._sectionStart !== this._index){
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if(this._sectionStart === this._index){
//the section just started
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
Tokenizer.prototype._cleanup = function() {
if (this._sectionStart < 0) {
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._running) {
if (this._state === TEXT) {
if (this._sectionStart !== this._index) {
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._sectionStart === this._index) {
//the section just started
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
this._sectionStart = 0;
}
this._sectionStart = 0;
}
};
//TODO make events conditional
Tokenizer.prototype.write = function(chunk){
if(this._ended) this._cbs.onerror(Error(".write() after done!"));
Tokenizer.prototype.write = function(chunk) {
if (this._ended) this._cbs.onerror(Error(".write() after done!"));
this._buffer += chunk;
this._parse();
this._buffer += chunk;
this._parse();
};
Tokenizer.prototype._parse = function(){
while(this._index < this._buffer.length && this._running){
var c = this._buffer.charAt(this._index);
if(this._state === TEXT) {
this._stateText(c);
} else if(this._state === BEFORE_TAG_NAME){
this._stateBeforeTagName(c);
} else if(this._state === IN_TAG_NAME) {
this._stateInTagName(c);
} else if(this._state === BEFORE_CLOSING_TAG_NAME){
this._stateBeforeCloseingTagName(c);
} else if(this._state === IN_CLOSING_TAG_NAME){
this._stateInCloseingTagName(c);
} else if(this._state === AFTER_CLOSING_TAG_NAME){
this._stateAfterCloseingTagName(c);
} else if(this._state === IN_SELF_CLOSING_TAG){
this._stateInSelfClosingTag(c);
}
Tokenizer.prototype._parse = function() {
while (this._index < this._buffer.length && this._running) {
var c = this._buffer.charAt(this._index);
if (this._state === TEXT) {
this._stateText(c);
} else if (this._state === BEFORE_TAG_NAME) {
this._stateBeforeTagName(c);
} else if (this._state === IN_TAG_NAME) {
this._stateInTagName(c);
} else if (this._state === BEFORE_CLOSING_TAG_NAME) {
this._stateBeforeCloseingTagName(c);
} else if (this._state === IN_CLOSING_TAG_NAME) {
this._stateInCloseingTagName(c);
} else if (this._state === AFTER_CLOSING_TAG_NAME) {
this._stateAfterCloseingTagName(c);
} else if (this._state === IN_SELF_CLOSING_TAG) {
this._stateInSelfClosingTag(c);
} else if (this._state === BEFORE_ATTRIBUTE_NAME) {
/*
/*
* attributes
*/
else if(this._state === BEFORE_ATTRIBUTE_NAME){
this._stateBeforeAttributeName(c);
} else if(this._state === IN_ATTRIBUTE_NAME){
this._stateInAttributeName(c);
} else if(this._state === AFTER_ATTRIBUTE_NAME){
this._stateAfterAttributeName(c);
} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
this._stateBeforeAttributeValue(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_DQ){
this._stateInAttributeValueDoubleQuotes(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_SQ){
this._stateInAttributeValueSingleQuotes(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
this._stateInAttributeValueNoQuotes(c);
}
this._stateBeforeAttributeName(c);
} else if (this._state === IN_ATTRIBUTE_NAME) {
this._stateInAttributeName(c);
} else if (this._state === AFTER_ATTRIBUTE_NAME) {
this._stateAfterAttributeName(c);
} else if (this._state === BEFORE_ATTRIBUTE_VALUE) {
this._stateBeforeAttributeValue(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_DQ) {
this._stateInAttributeValueDoubleQuotes(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_SQ) {
this._stateInAttributeValueSingleQuotes(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_NQ) {
this._stateInAttributeValueNoQuotes(c);
} else if (this._state === BEFORE_DECLARATION) {
/*
/*
* declarations
*/
else if(this._state === BEFORE_DECLARATION){
this._stateBeforeDeclaration(c);
} else if(this._state === IN_DECLARATION){
this._stateInDeclaration(c);
}
this._stateBeforeDeclaration(c);
} else if (this._state === IN_DECLARATION) {
this._stateInDeclaration(c);
} else if (this._state === IN_PROCESSING_INSTRUCTION) {
/*
/*
* processing instructions
*/
else if(this._state === IN_PROCESSING_INSTRUCTION){
this._stateInProcessingInstruction(c);
}
this._stateInProcessingInstruction(c);
} else if (this._state === BEFORE_COMMENT) {
/*
/*
* comments
*/
else if(this._state === BEFORE_COMMENT){
this._stateBeforeComment(c);
} else if(this._state === IN_COMMENT){
this._stateInComment(c);
} else if(this._state === AFTER_COMMENT_1){
this._stateAfterComment1(c);
} else if(this._state === AFTER_COMMENT_2){
this._stateAfterComment2(c);
}
this._stateBeforeComment(c);
} else if (this._state === IN_COMMENT) {
this._stateInComment(c);
} else if (this._state === AFTER_COMMENT_1) {
this._stateAfterComment1(c);
} else if (this._state === AFTER_COMMENT_2) {
this._stateAfterComment2(c);
} else if (this._state === BEFORE_CDATA_1) {
/*
/*
* cdata
*/
else if(this._state === BEFORE_CDATA_1){
this._stateBeforeCdata1(c);
} else if(this._state === BEFORE_CDATA_2){
this._stateBeforeCdata2(c);
} else if(this._state === BEFORE_CDATA_3){
this._stateBeforeCdata3(c);
} else if(this._state === BEFORE_CDATA_4){
this._stateBeforeCdata4(c);
} else if(this._state === BEFORE_CDATA_5){
this._stateBeforeCdata5(c);
} else if(this._state === BEFORE_CDATA_6){
this._stateBeforeCdata6(c);
} else if(this._state === IN_CDATA){
this._stateInCdata(c);
} else if(this._state === AFTER_CDATA_1){
this._stateAfterCdata1(c);
} else if(this._state === AFTER_CDATA_2){
this._stateAfterCdata2(c);
}
this._stateBeforeCdata1(c);
} else if (this._state === BEFORE_CDATA_2) {
this._stateBeforeCdata2(c);
} else if (this._state === BEFORE_CDATA_3) {
this._stateBeforeCdata3(c);
} else if (this._state === BEFORE_CDATA_4) {
this._stateBeforeCdata4(c);
} else if (this._state === BEFORE_CDATA_5) {
this._stateBeforeCdata5(c);
} else if (this._state === BEFORE_CDATA_6) {
this._stateBeforeCdata6(c);
} else if (this._state === IN_CDATA) {
this._stateInCdata(c);
} else if (this._state === AFTER_CDATA_1) {
this._stateAfterCdata1(c);
} else if (this._state === AFTER_CDATA_2) {
this._stateAfterCdata2(c);
} else if (this._state === BEFORE_SPECIAL) {
/*
/*
* special tags
*/
else if(this._state === BEFORE_SPECIAL){
this._stateBeforeSpecial(c);
} else if(this._state === BEFORE_SPECIAL_END){
this._stateBeforeSpecialEnd(c);
}
this._stateBeforeSpecial(c);
} else if (this._state === BEFORE_SPECIAL_END) {
this._stateBeforeSpecialEnd(c);
} else if (this._state === BEFORE_SCRIPT_1) {
/*
/*
* script
*/
else if(this._state === BEFORE_SCRIPT_1){
this._stateBeforeScript1(c);
} else if(this._state === BEFORE_SCRIPT_2){
this._stateBeforeScript2(c);
} else if(this._state === BEFORE_SCRIPT_3){
this._stateBeforeScript3(c);
} else if(this._state === BEFORE_SCRIPT_4){
this._stateBeforeScript4(c);
} else if(this._state === BEFORE_SCRIPT_5){
this._stateBeforeScript5(c);
}
this._stateBeforeScript1(c);
} else if (this._state === BEFORE_SCRIPT_2) {
this._stateBeforeScript2(c);
} else if (this._state === BEFORE_SCRIPT_3) {
this._stateBeforeScript3(c);
} else if (this._state === BEFORE_SCRIPT_4) {
this._stateBeforeScript4(c);
} else if (this._state === BEFORE_SCRIPT_5) {
this._stateBeforeScript5(c);
} else if (this._state === AFTER_SCRIPT_1) {
this._stateAfterScript1(c);
} else if (this._state === AFTER_SCRIPT_2) {
this._stateAfterScript2(c);
} else if (this._state === AFTER_SCRIPT_3) {
this._stateAfterScript3(c);
} else if (this._state === AFTER_SCRIPT_4) {
this._stateAfterScript4(c);
} else if (this._state === AFTER_SCRIPT_5) {
this._stateAfterScript5(c);
} else if (this._state === BEFORE_STYLE_1) {
else if(this._state === AFTER_SCRIPT_1){
this._stateAfterScript1(c);
} else if(this._state === AFTER_SCRIPT_2){
this._stateAfterScript2(c);
} else if(this._state === AFTER_SCRIPT_3){
this._stateAfterScript3(c);
} else if(this._state === AFTER_SCRIPT_4){
this._stateAfterScript4(c);
} else if(this._state === AFTER_SCRIPT_5){
this._stateAfterScript5(c);
}
/*
/*
* style
*/
else if(this._state === BEFORE_STYLE_1){
this._stateBeforeStyle1(c);
} else if(this._state === BEFORE_STYLE_2){
this._stateBeforeStyle2(c);
} else if(this._state === BEFORE_STYLE_3){
this._stateBeforeStyle3(c);
} else if(this._state === BEFORE_STYLE_4){
this._stateBeforeStyle4(c);
}
this._stateBeforeStyle1(c);
} else if (this._state === BEFORE_STYLE_2) {
this._stateBeforeStyle2(c);
} else if (this._state === BEFORE_STYLE_3) {
this._stateBeforeStyle3(c);
} else if (this._state === BEFORE_STYLE_4) {
this._stateBeforeStyle4(c);
} else if (this._state === AFTER_STYLE_1) {
this._stateAfterStyle1(c);
} else if (this._state === AFTER_STYLE_2) {
this._stateAfterStyle2(c);
} else if (this._state === AFTER_STYLE_3) {
this._stateAfterStyle3(c);
} else if (this._state === AFTER_STYLE_4) {
this._stateAfterStyle4(c);
} else if (this._state === BEFORE_ENTITY) {
else if(this._state === AFTER_STYLE_1){
this._stateAfterStyle1(c);
} else if(this._state === AFTER_STYLE_2){
this._stateAfterStyle2(c);
} else if(this._state === AFTER_STYLE_3){
this._stateAfterStyle3(c);
} else if(this._state === AFTER_STYLE_4){
this._stateAfterStyle4(c);
}
/*
/*
* entities
*/
else if(this._state === BEFORE_ENTITY){
this._stateBeforeEntity(c);
} else if(this._state === BEFORE_NUMERIC_ENTITY){
this._stateBeforeNumericEntity(c);
} else if(this._state === IN_NAMED_ENTITY){
this._stateInNamedEntity(c);
} else if(this._state === IN_NUMERIC_ENTITY){
this._stateInNumericEntity(c);
} else if(this._state === IN_HEX_ENTITY){
this._stateInHexEntity(c);
}
this._stateBeforeEntity(c);
} else if (this._state === BEFORE_NUMERIC_ENTITY) {
this._stateBeforeNumericEntity(c);
} else if (this._state === IN_NAMED_ENTITY) {
this._stateInNamedEntity(c);
} else if (this._state === IN_NUMERIC_ENTITY) {
this._stateInNumericEntity(c);
} else if (this._state === IN_HEX_ENTITY) {
this._stateInHexEntity(c);
} else {
this._cbs.onerror(Error("unknown _state"), this._state);
}
else {
this._cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;
}
this._index++;
}
this._cleanup();
this._cleanup();
};
Tokenizer.prototype.pause = function(){
this._running = false;
Tokenizer.prototype.pause = function() {
this._running = false;
};
Tokenizer.prototype.resume = function(){
this._running = true;
Tokenizer.prototype.resume = function() {
this._running = true;
if(this._index < this._buffer.length){
this._parse();
}
if(this._ended){
this._finish();
}
if (this._index < this._buffer.length) {
this._parse();
}
if (this._ended) {
this._finish();
}
};
Tokenizer.prototype.end = function(chunk){
if(this._ended) this._cbs.onerror(Error(".end() after done!"));
if(chunk) this.write(chunk);
Tokenizer.prototype.end = function(chunk) {
if (this._ended) this._cbs.onerror(Error(".end() after done!"));
if (chunk) this.write(chunk);
this._ended = true;
this._ended = true;
if(this._running) this._finish();
if (this._running) this._finish();
};
Tokenizer.prototype._finish = function(){
//if there is remaining data, emit it in a reasonable way
if(this._sectionStart < this._index){
this._handleTrailingData();
}
Tokenizer.prototype._finish = function() {
//if there is remaining data, emit it in a reasonable way
if (this._sectionStart < this._index) {
this._handleTrailingData();
}
this._cbs.onend();
this._cbs.onend();
};
Tokenizer.prototype._handleTrailingData = function(){
var data = this._buffer.substr(this._sectionStart);
Tokenizer.prototype._handleTrailingData = function() {
var data = this._buffer.substr(this._sectionStart);
if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
this._cbs.oncdata(data);
} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
this._cbs.oncomment(data);
} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
this._parseLegacyEntity();
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
this._decodeNumericEntity(2, 10);
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
this._decodeNumericEntity(3, 16);
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
){
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
if (
this._state === IN_CDATA ||
this._state === AFTER_CDATA_1 ||
this._state === AFTER_CDATA_2
) {
this._cbs.oncdata(data);
} else if (
this._state === IN_COMMENT ||
this._state === AFTER_COMMENT_1 ||
this._state === AFTER_COMMENT_2
) {
this._cbs.oncomment(data);
} else if (this._state === IN_NAMED_ENTITY && !this._xmlMode) {
this._parseLegacyEntity();
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (this._state === IN_NUMERIC_ENTITY && !this._xmlMode) {
this._decodeNumericEntity(2, 10);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (this._state === IN_HEX_ENTITY && !this._xmlMode) {
this._decodeNumericEntity(3, 16);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
) {
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
};
Tokenizer.prototype.reset = function(){
Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs);
Tokenizer.prototype.reset = function() {
Tokenizer.call(
this,
{ xmlMode: this._xmlMode, decodeEntities: this._decodeEntities },
this._cbs
);
};
Tokenizer.prototype.getAbsoluteIndex = function(){
return this._bufferOffset + this._index;
Tokenizer.prototype.getAbsoluteIndex = function() {
return this._bufferOffset + this._index;
};
Tokenizer.prototype._getSection = function(){
return this._buffer.substring(this._sectionStart, this._index);
Tokenizer.prototype._getSection = function() {
return this._buffer.substring(this._sectionStart, this._index);
};
Tokenizer.prototype._emitToken = function(name){
this._cbs[name](this._getSection());
this._sectionStart = -1;
Tokenizer.prototype._emitToken = function(name) {
this._cbs[name](this._getSection());
this._sectionStart = -1;
};
Tokenizer.prototype._emitPartial = function(value){
if(this._baseState !== TEXT){
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
}
Tokenizer.prototype._emitPartial = function(value) {
if (this._baseState !== TEXT) {
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
}
};

@@ -8,11 +8,11 @@ module.exports = Stream;

function Stream(cbs, options){
var parser = this._parser = new Parser(cbs, options);
var decoder = this._decoder = new StringDecoder();
function Stream(cbs, options) {
var parser = (this._parser = new Parser(cbs, options));
var decoder = (this._decoder = new StringDecoder());
WritableStream.call(this, {decodeStrings: false});
WritableStream.call(this, { decodeStrings: false });
this.once("finish", function(){
parser.end(decoder.end());
});
this.once("finish", function() {
parser.end(decoder.end());
});
}

@@ -22,6 +22,6 @@

WritableStream.prototype._write = function(chunk, encoding, cb){
if(chunk instanceof Buffer) chunk = this._decoder.write(chunk);
this._parser.write(chunk);
cb();
Stream.prototype._write = function(chunk, encoding, cb) {
if (chunk instanceof Buffer) chunk = this._decoder.write(chunk);
this._parser.write(chunk);
cb();
};
{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML/RSS parser",
"version": "3.10.0",
"author": "Felix Boehm <me@feedic.com>",
"keywords": [
"html",
"parser",
"streams",
"xml",
"dom",
"rss",
"feed",
"atom"
],
"repository": {
"type": "git",
"url": "git://github.com/fb55/htmlparser2.git"
},
"bugs": {
"mail": "me@feedic.com",
"url": "http://github.com/fb55/htmlparser2/issues"
},
"directories": {
"lib": "lib/"
},
"main": "lib/index.js",
"files": [
"lib"
],
"scripts": {
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
"test": "mocha && npm run lint",
"lint": "eslint lib test"
},
"dependencies": {
"domelementtype": "^1.3.0",
"domhandler": "^2.3.0",
"domutils": "^1.5.1",
"entities": "^1.1.1",
"inherits": "^2.0.1",
"readable-stream": "^3.0.6"
},
"devDependencies": {
"coveralls": "^3.0.1",
"eslint": "^4.19.1",
"istanbul": "^0.4.3",
"mocha": "^5.2.0",
"mocha-lcov-reporter": "^1.2.0"
},
"browser": {
"readable-stream": false
},
"license": "MIT"
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML/RSS parser",
"version": "3.10.1",
"author": "Felix Boehm <me@feedic.com>",
"keywords": [
"html",
"parser",
"streams",
"xml",
"dom",
"rss",
"feed",
"atom"
],
"repository": {
"type": "git",
"url": "git://github.com/fb55/htmlparser2.git"
},
"bugs": {
"mail": "me@feedic.com",
"url": "http://github.com/fb55/htmlparser2/issues"
},
"directories": {
"lib": "lib/"
},
"main": "lib/index.js",
"files": [
"lib"
],
"scripts": {
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
"test": "mocha && npm run lint",
"lint": "eslint lib test"
},
"dependencies": {
"domelementtype": "^1.3.1",
"domhandler": "^2.3.0",
"domutils": "^1.5.1",
"entities": "^1.1.1",
"inherits": "^2.0.1",
"readable-stream": "^3.1.1"
},
"devDependencies": {
"coveralls": "^3.0.1",
"eslint": "^5.13.0",
"istanbul": "^0.4.3",
"mocha": "^5.2.0",
"mocha-lcov-reporter": "^1.2.0"
},
"browser": {
"readable-stream": false
},
"license": "MIT",
"prettier": {
"tabWidth": 4
}
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc