htmlparser2
Advanced tools
Comparing version 3.10.0 to 3.10.1
module.exports = CollectingHandler; | ||
function CollectingHandler(cbs){ | ||
this._cbs = cbs || {}; | ||
this.events = []; | ||
function CollectingHandler(cbs) { | ||
this._cbs = cbs || {}; | ||
this.events = []; | ||
} | ||
var EVENTS = require("./").EVENTS; | ||
Object.keys(EVENTS).forEach(function(name){ | ||
if(EVENTS[name] === 0){ | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(){ | ||
this.events.push([name]); | ||
if(this._cbs[name]) this._cbs[name](); | ||
}; | ||
} else if(EVENTS[name] === 1){ | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a){ | ||
this.events.push([name, a]); | ||
if(this._cbs[name]) this._cbs[name](a); | ||
}; | ||
} else if(EVENTS[name] === 2){ | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a, b){ | ||
this.events.push([name, a, b]); | ||
if(this._cbs[name]) this._cbs[name](a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments"); | ||
} | ||
Object.keys(EVENTS).forEach(function(name) { | ||
if (EVENTS[name] === 0) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function() { | ||
this.events.push([name]); | ||
if (this._cbs[name]) this._cbs[name](); | ||
}; | ||
} else if (EVENTS[name] === 1) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a) { | ||
this.events.push([name, a]); | ||
if (this._cbs[name]) this._cbs[name](a); | ||
}; | ||
} else if (EVENTS[name] === 2) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a, b) { | ||
this.events.push([name, a, b]); | ||
if (this._cbs[name]) this._cbs[name](a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments"); | ||
} | ||
}); | ||
CollectingHandler.prototype.onreset = function(){ | ||
this.events = []; | ||
if(this._cbs.onreset) this._cbs.onreset(); | ||
CollectingHandler.prototype.onreset = function() { | ||
this.events = []; | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
}; | ||
CollectingHandler.prototype.restart = function(){ | ||
if(this._cbs.onreset) this._cbs.onreset(); | ||
CollectingHandler.prototype.restart = function() { | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
for(var i = 0, len = this.events.length; i < len; i++){ | ||
if(this._cbs[this.events[i][0]]){ | ||
for (var i = 0, len = this.events.length; i < len; i++) { | ||
if (this._cbs[this.events[i][0]]) { | ||
var num = this.events[i].length; | ||
var num = this.events[i].length; | ||
if(num === 1){ | ||
this._cbs[this.events[i][0]](); | ||
} else if(num === 2){ | ||
this._cbs[this.events[i][0]](this.events[i][1]); | ||
} else { | ||
this._cbs[this.events[i][0]](this.events[i][1], this.events[i][2]); | ||
} | ||
} | ||
} | ||
if (num === 1) { | ||
this._cbs[this.events[i][0]](); | ||
} else if (num === 2) { | ||
this._cbs[this.events[i][0]](this.events[i][1]); | ||
} else { | ||
this._cbs[this.events[i][0]]( | ||
this.events[i][1], | ||
this.events[i][2] | ||
); | ||
} | ||
} | ||
} | ||
}; |
@@ -1,8 +0,7 @@ | ||
var index = require("./index.js"); | ||
var DomHandler = index.DomHandler; | ||
var DomUtils = index.DomUtils; | ||
var DomHandler = require("domhandler"); | ||
var DomUtils = require("domutils"); | ||
//TODO: make this a streamable handler | ||
function FeedHandler(callback, options){ | ||
this.init(callback, options); | ||
function FeedHandler(callback, options) { | ||
this.init(callback, options); | ||
} | ||
@@ -14,83 +13,103 @@ | ||
function getElements(what, where){ | ||
return DomUtils.getElementsByTagName(what, where, true); | ||
function getElements(what, where) { | ||
return DomUtils.getElementsByTagName(what, where, true); | ||
} | ||
function getOneElement(what, where){ | ||
return DomUtils.getElementsByTagName(what, where, true, 1)[0]; | ||
function getOneElement(what, where) { | ||
return DomUtils.getElementsByTagName(what, where, true, 1)[0]; | ||
} | ||
function fetch(what, where, recurse){ | ||
return DomUtils.getText( | ||
DomUtils.getElementsByTagName(what, where, recurse, 1) | ||
).trim(); | ||
function fetch(what, where, recurse) { | ||
return DomUtils.getText( | ||
DomUtils.getElementsByTagName(what, where, recurse, 1) | ||
).trim(); | ||
} | ||
function addConditionally(obj, prop, what, where, recurse){ | ||
var tmp = fetch(what, where, recurse); | ||
if(tmp) obj[prop] = tmp; | ||
function addConditionally(obj, prop, what, where, recurse) { | ||
var tmp = fetch(what, where, recurse); | ||
if (tmp) obj[prop] = tmp; | ||
} | ||
var isValidFeed = function(value){ | ||
return value === "rss" || value === "feed" || value === "rdf:RDF"; | ||
var isValidFeed = function(value) { | ||
return value === "rss" || value === "feed" || value === "rdf:RDF"; | ||
}; | ||
FeedHandler.prototype.onend = function(){ | ||
var feed = {}, | ||
feedRoot = getOneElement(isValidFeed, this.dom), | ||
tmp, childs; | ||
FeedHandler.prototype.onend = function() { | ||
var feed = {}, | ||
feedRoot = getOneElement(isValidFeed, this.dom), | ||
tmp, | ||
childs; | ||
if(feedRoot){ | ||
if(feedRoot.name === "feed"){ | ||
childs = feedRoot.children; | ||
if (feedRoot) { | ||
if (feedRoot.name === "feed") { | ||
childs = feedRoot.children; | ||
feed.type = "atom"; | ||
addConditionally(feed, "id", "id", childs); | ||
addConditionally(feed, "title", "title", childs); | ||
if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp; | ||
addConditionally(feed, "description", "subtitle", childs); | ||
if((tmp = fetch("updated", childs))) feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "email", childs, true); | ||
feed.type = "atom"; | ||
addConditionally(feed, "id", "id", childs); | ||
addConditionally(feed, "title", "title", childs); | ||
if ( | ||
(tmp = getOneElement("link", childs)) && | ||
(tmp = tmp.attribs) && | ||
(tmp = tmp.href) | ||
) | ||
feed.link = tmp; | ||
addConditionally(feed, "description", "subtitle", childs); | ||
if ((tmp = fetch("updated", childs))) feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "email", childs, true); | ||
feed.items = getElements("entry", childs).map(function(item){ | ||
var entry = {}, tmp; | ||
feed.items = getElements("entry", childs).map(function(item) { | ||
var entry = {}, | ||
tmp; | ||
item = item.children; | ||
item = item.children; | ||
addConditionally(entry, "id", "id", item); | ||
addConditionally(entry, "title", "title", item); | ||
if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp; | ||
if((tmp = fetch("summary", item) || fetch("content", item))) entry.description = tmp; | ||
if((tmp = fetch("updated", item))) entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} else { | ||
childs = getOneElement("channel", feedRoot.children).children; | ||
addConditionally(entry, "id", "id", item); | ||
addConditionally(entry, "title", "title", item); | ||
if ( | ||
(tmp = getOneElement("link", item)) && | ||
(tmp = tmp.attribs) && | ||
(tmp = tmp.href) | ||
) | ||
entry.link = tmp; | ||
if ((tmp = fetch("summary", item) || fetch("content", item))) | ||
entry.description = tmp; | ||
if ((tmp = fetch("updated", item))) | ||
entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} else { | ||
childs = getOneElement("channel", feedRoot.children).children; | ||
feed.type = feedRoot.name.substr(0, 3); | ||
feed.id = ""; | ||
addConditionally(feed, "title", "title", childs); | ||
addConditionally(feed, "link", "link", childs); | ||
addConditionally(feed, "description", "description", childs); | ||
if((tmp = fetch("lastBuildDate", childs))) feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "managingEditor", childs, true); | ||
feed.type = feedRoot.name.substr(0, 3); | ||
feed.id = ""; | ||
addConditionally(feed, "title", "title", childs); | ||
addConditionally(feed, "link", "link", childs); | ||
addConditionally(feed, "description", "description", childs); | ||
if ((tmp = fetch("lastBuildDate", childs))) | ||
feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "managingEditor", childs, true); | ||
feed.items = getElements("item", feedRoot.children).map(function(item){ | ||
var entry = {}, tmp; | ||
feed.items = getElements("item", feedRoot.children).map(function( | ||
item | ||
) { | ||
var entry = {}, | ||
tmp; | ||
item = item.children; | ||
item = item.children; | ||
addConditionally(entry, "id", "guid", item); | ||
addConditionally(entry, "title", "title", item); | ||
addConditionally(entry, "link", "link", item); | ||
addConditionally(entry, "description", "description", item); | ||
if((tmp = fetch("pubDate", item))) entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} | ||
} | ||
this.dom = feed; | ||
DomHandler.prototype._handleCallback.call( | ||
this, feedRoot ? null : Error("couldn't find root of feed") | ||
); | ||
addConditionally(entry, "id", "guid", item); | ||
addConditionally(entry, "title", "title", item); | ||
addConditionally(entry, "link", "link", item); | ||
addConditionally(entry, "description", "description", item); | ||
if ((tmp = fetch("pubDate", item))) | ||
entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} | ||
} | ||
this.dom = feed; | ||
DomHandler.prototype._handleCallback.call( | ||
this, | ||
feedRoot ? null : Error("couldn't find root of feed") | ||
); | ||
}; | ||
module.exports = FeedHandler; |
126
lib/index.js
var Parser = require("./Parser.js"); | ||
var DomHandler = require("domhandler"); | ||
function defineProp(name, value){ | ||
delete module.exports[name]; | ||
module.exports[name] = value; | ||
return value; | ||
function defineProp(name, value) { | ||
delete module.exports[name]; | ||
module.exports[name] = value; | ||
return value; | ||
} | ||
module.exports = { | ||
Parser: Parser, | ||
Tokenizer: require("./Tokenizer.js"), | ||
ElementType: require("domelementtype"), | ||
DomHandler: DomHandler, | ||
get FeedHandler(){ | ||
return defineProp("FeedHandler", require("./FeedHandler.js")); | ||
}, | ||
get Stream(){ | ||
return defineProp("Stream", require("./Stream.js")); | ||
}, | ||
get WritableStream(){ | ||
return defineProp("WritableStream", require("./WritableStream.js")); | ||
}, | ||
get ProxyHandler(){ | ||
return defineProp("ProxyHandler", require("./ProxyHandler.js")); | ||
}, | ||
get DomUtils(){ | ||
return defineProp("DomUtils", require("domutils")); | ||
}, | ||
get CollectingHandler(){ | ||
return defineProp("CollectingHandler", require("./CollectingHandler.js")); | ||
}, | ||
// For legacy support | ||
DefaultHandler: DomHandler, | ||
get RssHandler(){ | ||
return defineProp("RssHandler", this.FeedHandler); | ||
}, | ||
//helper methods | ||
parseDOM: function(data, options){ | ||
var handler = new DomHandler(options); | ||
new Parser(handler, options).end(data); | ||
return handler.dom; | ||
}, | ||
parseFeed: function(feed, options){ | ||
var handler = new module.exports.FeedHandler(options); | ||
new Parser(handler, options).end(feed); | ||
return handler.dom; | ||
}, | ||
createDomStream: function(cb, options, elementCb){ | ||
var handler = new DomHandler(cb, options, elementCb); | ||
return new Parser(handler, options); | ||
}, | ||
// List of all events that the parser emits | ||
EVENTS: { /* Format: eventname: number of arguments */ | ||
attribute: 2, | ||
cdatastart: 0, | ||
cdataend: 0, | ||
text: 1, | ||
processinginstruction: 2, | ||
comment: 1, | ||
commentend: 0, | ||
closetag: 1, | ||
opentag: 2, | ||
opentagname: 1, | ||
error: 1, | ||
end: 0 | ||
} | ||
Parser: Parser, | ||
Tokenizer: require("./Tokenizer.js"), | ||
ElementType: require("domelementtype"), | ||
DomHandler: DomHandler, | ||
get FeedHandler() { | ||
return defineProp("FeedHandler", require("./FeedHandler.js")); | ||
}, | ||
get Stream() { | ||
return defineProp("Stream", require("./Stream.js")); | ||
}, | ||
get WritableStream() { | ||
return defineProp("WritableStream", require("./WritableStream.js")); | ||
}, | ||
get ProxyHandler() { | ||
return defineProp("ProxyHandler", require("./ProxyHandler.js")); | ||
}, | ||
get DomUtils() { | ||
return defineProp("DomUtils", require("domutils")); | ||
}, | ||
get CollectingHandler() { | ||
return defineProp( | ||
"CollectingHandler", | ||
require("./CollectingHandler.js") | ||
); | ||
}, | ||
// For legacy support | ||
DefaultHandler: DomHandler, | ||
get RssHandler() { | ||
return defineProp("RssHandler", this.FeedHandler); | ||
}, | ||
//helper methods | ||
parseDOM: function(data, options) { | ||
var handler = new DomHandler(options); | ||
new Parser(handler, options).end(data); | ||
return handler.dom; | ||
}, | ||
parseFeed: function(feed, options) { | ||
var handler = new module.exports.FeedHandler(options); | ||
new Parser(handler, options).end(feed); | ||
return handler.dom; | ||
}, | ||
createDomStream: function(cb, options, elementCb) { | ||
var handler = new DomHandler(cb, options, elementCb); | ||
return new Parser(handler, options); | ||
}, | ||
// List of all events that the parser emits | ||
EVENTS: { | ||
/* Format: eventname: number of arguments */ | ||
attribute: 2, | ||
cdatastart: 0, | ||
cdataend: 0, | ||
text: 1, | ||
processinginstruction: 2, | ||
comment: 1, | ||
commentend: 0, | ||
closetag: 1, | ||
opentag: 2, | ||
opentagname: 1, | ||
error: 1, | ||
end: 0 | ||
} | ||
}; |
@@ -27,104 +27,106 @@ var Tokenizer = require("./Tokenizer.js"); | ||
var formTags = { | ||
input: true, | ||
option: true, | ||
optgroup: true, | ||
select: true, | ||
button: true, | ||
datalist: true, | ||
textarea: true | ||
input: true, | ||
option: true, | ||
optgroup: true, | ||
select: true, | ||
button: true, | ||
datalist: true, | ||
textarea: true | ||
}; | ||
var openImpliesClose = { | ||
tr : { tr:true, th:true, td:true }, | ||
th : { th:true }, | ||
td : { thead:true, th:true, td:true }, | ||
body : { head:true, link:true, script:true }, | ||
li : { li:true }, | ||
p : { p:true }, | ||
h1 : { p:true }, | ||
h2 : { p:true }, | ||
h3 : { p:true }, | ||
h4 : { p:true }, | ||
h5 : { p:true }, | ||
h6 : { p:true }, | ||
select : formTags, | ||
input : formTags, | ||
output : formTags, | ||
button : formTags, | ||
datalist: formTags, | ||
textarea: formTags, | ||
option : { option:true }, | ||
optgroup: { optgroup:true } | ||
tr: { tr: true, th: true, td: true }, | ||
th: { th: true }, | ||
td: { thead: true, th: true, td: true }, | ||
body: { head: true, link: true, script: true }, | ||
li: { li: true }, | ||
p: { p: true }, | ||
h1: { p: true }, | ||
h2: { p: true }, | ||
h3: { p: true }, | ||
h4: { p: true }, | ||
h5: { p: true }, | ||
h6: { p: true }, | ||
select: formTags, | ||
input: formTags, | ||
output: formTags, | ||
button: formTags, | ||
datalist: formTags, | ||
textarea: formTags, | ||
option: { option: true }, | ||
optgroup: { optgroup: true } | ||
}; | ||
var voidElements = { | ||
__proto__: null, | ||
area: true, | ||
base: true, | ||
basefont: true, | ||
br: true, | ||
col: true, | ||
command: true, | ||
embed: true, | ||
frame: true, | ||
hr: true, | ||
img: true, | ||
input: true, | ||
isindex: true, | ||
keygen: true, | ||
link: true, | ||
meta: true, | ||
param: true, | ||
source: true, | ||
track: true, | ||
wbr: true, | ||
__proto__: null, | ||
area: true, | ||
base: true, | ||
basefont: true, | ||
br: true, | ||
col: true, | ||
command: true, | ||
embed: true, | ||
frame: true, | ||
hr: true, | ||
img: true, | ||
input: true, | ||
isindex: true, | ||
keygen: true, | ||
link: true, | ||
meta: true, | ||
param: true, | ||
source: true, | ||
track: true, | ||
wbr: true | ||
}; | ||
var foreignContextElements = { | ||
__proto__: null, | ||
math: true, | ||
svg: true | ||
} | ||
__proto__: null, | ||
math: true, | ||
svg: true | ||
}; | ||
var htmlIntegrationElements = { | ||
__proto__: null, | ||
mi: true, | ||
mo: true, | ||
mn: true, | ||
ms: true, | ||
mtext: true, | ||
"annotation-xml": true, | ||
foreignObject: true, | ||
desc: true, | ||
title: true | ||
} | ||
__proto__: null, | ||
mi: true, | ||
mo: true, | ||
mn: true, | ||
ms: true, | ||
mtext: true, | ||
"annotation-xml": true, | ||
foreignObject: true, | ||
desc: true, | ||
title: true | ||
}; | ||
var re_nameEnd = /\s|\//; | ||
function Parser(cbs, options){ | ||
this._options = options || {}; | ||
this._cbs = cbs || {}; | ||
function Parser(cbs, options) { | ||
this._options = options || {}; | ||
this._cbs = cbs || {}; | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
this._foreignContext = []; | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
this._foreignContext = []; | ||
this.startIndex = 0; | ||
this.endIndex = null; | ||
this.startIndex = 0; | ||
this.endIndex = null; | ||
this._lowerCaseTagNames = "lowerCaseTags" in this._options ? | ||
!!this._options.lowerCaseTags : | ||
!this._options.xmlMode; | ||
this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ? | ||
!!this._options.lowerCaseAttributeNames : | ||
!this._options.xmlMode; | ||
this._lowerCaseTagNames = | ||
"lowerCaseTags" in this._options | ||
? !!this._options.lowerCaseTags | ||
: !this._options.xmlMode; | ||
this._lowerCaseAttributeNames = | ||
"lowerCaseAttributeNames" in this._options | ||
? !!this._options.lowerCaseAttributeNames | ||
: !this._options.xmlMode; | ||
if(this._options.Tokenizer) { | ||
Tokenizer = this._options.Tokenizer; | ||
} | ||
this._tokenizer = new Tokenizer(this._options, this); | ||
if (this._options.Tokenizer) { | ||
Tokenizer = this._options.Tokenizer; | ||
} | ||
this._tokenizer = new Tokenizer(this._options, this); | ||
if(this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
if (this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
} | ||
@@ -134,230 +136,243 @@ | ||
Parser.prototype._updatePosition = function(initialOffset){ | ||
if(this.endIndex === null){ | ||
if(this._tokenizer._sectionStart <= initialOffset){ | ||
this.startIndex = 0; | ||
} else { | ||
this.startIndex = this._tokenizer._sectionStart - initialOffset; | ||
} | ||
} | ||
else this.startIndex = this.endIndex + 1; | ||
this.endIndex = this._tokenizer.getAbsoluteIndex(); | ||
Parser.prototype._updatePosition = function(initialOffset) { | ||
if (this.endIndex === null) { | ||
if (this._tokenizer._sectionStart <= initialOffset) { | ||
this.startIndex = 0; | ||
} else { | ||
this.startIndex = this._tokenizer._sectionStart - initialOffset; | ||
} | ||
} else this.startIndex = this.endIndex + 1; | ||
this.endIndex = this._tokenizer.getAbsoluteIndex(); | ||
}; | ||
//Tokenizer event handlers | ||
Parser.prototype.ontext = function(data){ | ||
this._updatePosition(1); | ||
this.endIndex--; | ||
Parser.prototype.ontext = function(data) { | ||
this._updatePosition(1); | ||
this.endIndex--; | ||
if(this._cbs.ontext) this._cbs.ontext(data); | ||
if (this._cbs.ontext) this._cbs.ontext(data); | ||
}; | ||
Parser.prototype.onopentagname = function(name){ | ||
if(this._lowerCaseTagNames){ | ||
name = name.toLowerCase(); | ||
} | ||
Parser.prototype.onopentagname = function(name) { | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._tagname = name; | ||
this._tagname = name; | ||
if(!this._options.xmlMode && name in openImpliesClose) { | ||
for( | ||
var el; | ||
(el = this._stack[this._stack.length - 1]) in openImpliesClose[name]; | ||
this.onclosetag(el) | ||
); | ||
} | ||
if (!this._options.xmlMode && name in openImpliesClose) { | ||
for ( | ||
var el; | ||
(el = this._stack[this._stack.length - 1]) in | ||
openImpliesClose[name]; | ||
this.onclosetag(el) | ||
); | ||
} | ||
if(this._options.xmlMode || !(name in voidElements)){ | ||
this._stack.push(name); | ||
if(name in foreignContextElements) this._foreignContext.push(true); | ||
else if(name in htmlIntegrationElements) this._foreignContext.push(false); | ||
} | ||
if (this._options.xmlMode || !(name in voidElements)) { | ||
this._stack.push(name); | ||
if (name in foreignContextElements) this._foreignContext.push(true); | ||
else if (name in htmlIntegrationElements) | ||
this._foreignContext.push(false); | ||
} | ||
if(this._cbs.onopentagname) this._cbs.onopentagname(name); | ||
if(this._cbs.onopentag) this._attribs = {}; | ||
if (this._cbs.onopentagname) this._cbs.onopentagname(name); | ||
if (this._cbs.onopentag) this._attribs = {}; | ||
}; | ||
Parser.prototype.onopentagend = function(){ | ||
this._updatePosition(1); | ||
Parser.prototype.onopentagend = function() { | ||
this._updatePosition(1); | ||
if(this._attribs){ | ||
if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs); | ||
this._attribs = null; | ||
} | ||
if (this._attribs) { | ||
if (this._cbs.onopentag) | ||
this._cbs.onopentag(this._tagname, this._attribs); | ||
this._attribs = null; | ||
} | ||
if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){ | ||
this._cbs.onclosetag(this._tagname); | ||
} | ||
if ( | ||
!this._options.xmlMode && | ||
this._cbs.onclosetag && | ||
this._tagname in voidElements | ||
) { | ||
this._cbs.onclosetag(this._tagname); | ||
} | ||
this._tagname = ""; | ||
this._tagname = ""; | ||
}; | ||
Parser.prototype.onclosetag = function(name){ | ||
this._updatePosition(1); | ||
Parser.prototype.onclosetag = function(name) { | ||
this._updatePosition(1); | ||
if(this._lowerCaseTagNames){ | ||
name = name.toLowerCase(); | ||
} | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
if (name in foreignContextElements || name in htmlIntegrationElements) { | ||
this._foreignContext.pop(); | ||
} | ||
if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){ | ||
var pos = this._stack.lastIndexOf(name); | ||
if(pos !== -1){ | ||
if(this._cbs.onclosetag){ | ||
pos = this._stack.length - pos; | ||
while(pos--) this._cbs.onclosetag(this._stack.pop()); | ||
} | ||
else this._stack.length = pos; | ||
} else if(name === "p" && !this._options.xmlMode){ | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
} else if(!this._options.xmlMode && (name === "br" || name === "p")){ | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
if ( | ||
this._stack.length && | ||
(!(name in voidElements) || this._options.xmlMode) | ||
) { | ||
var pos = this._stack.lastIndexOf(name); | ||
if (pos !== -1) { | ||
if (this._cbs.onclosetag) { | ||
pos = this._stack.length - pos; | ||
while (pos--) this._cbs.onclosetag(this._stack.pop()); | ||
} else this._stack.length = pos; | ||
} else if (name === "p" && !this._options.xmlMode) { | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
} else if (!this._options.xmlMode && (name === "br" || name === "p")) { | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
}; | ||
Parser.prototype.onselfclosingtag = function(){ | ||
if(this._options.xmlMode || this._options.recognizeSelfClosing | ||
|| this._foreignContext[this._foreignContext.length - 1]){ | ||
this._closeCurrentTag(); | ||
} else { | ||
this.onopentagend(); | ||
} | ||
Parser.prototype.onselfclosingtag = function() { | ||
if ( | ||
this._options.xmlMode || | ||
this._options.recognizeSelfClosing || | ||
this._foreignContext[this._foreignContext.length - 1] | ||
) { | ||
this._closeCurrentTag(); | ||
} else { | ||
this.onopentagend(); | ||
} | ||
}; | ||
Parser.prototype._closeCurrentTag = function(){ | ||
var name = this._tagname; | ||
Parser.prototype._closeCurrentTag = function() { | ||
var name = this._tagname; | ||
this.onopentagend(); | ||
this.onopentagend(); | ||
//self-closing tags will be on the top of the stack | ||
//(cheaper check than in onclosetag) | ||
if(this._stack[this._stack.length - 1] === name){ | ||
if(this._cbs.onclosetag){ | ||
this._cbs.onclosetag(name); | ||
} | ||
this._stack.pop(); | ||
if((name in foreignContextElements) || (name in htmlIntegrationElements)){ | ||
this._foreignContext.pop(); | ||
} | ||
} | ||
//self-closing tags will be on the top of the stack | ||
//(cheaper check than in onclosetag) | ||
if (this._stack[this._stack.length - 1] === name) { | ||
if (this._cbs.onclosetag) { | ||
this._cbs.onclosetag(name); | ||
} | ||
this._stack.pop(); | ||
} | ||
}; | ||
Parser.prototype.onattribname = function(name){ | ||
if(this._lowerCaseAttributeNames){ | ||
name = name.toLowerCase(); | ||
} | ||
this._attribname = name; | ||
Parser.prototype.onattribname = function(name) { | ||
if (this._lowerCaseAttributeNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._attribname = name; | ||
}; | ||
Parser.prototype.onattribdata = function(value){ | ||
this._attribvalue += value; | ||
Parser.prototype.onattribdata = function(value) { | ||
this._attribvalue += value; | ||
}; | ||
Parser.prototype.onattribend = function(){ | ||
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue); | ||
if( | ||
this._attribs && | ||
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) | ||
){ | ||
this._attribs[this._attribname] = this._attribvalue; | ||
} | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
Parser.prototype.onattribend = function() { | ||
if (this._cbs.onattribute) | ||
this._cbs.onattribute(this._attribname, this._attribvalue); | ||
if ( | ||
this._attribs && | ||
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) | ||
) { | ||
this._attribs[this._attribname] = this._attribvalue; | ||
} | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
}; | ||
Parser.prototype._getInstructionName = function(value){ | ||
var idx = value.search(re_nameEnd), | ||
name = idx < 0 ? value : value.substr(0, idx); | ||
Parser.prototype._getInstructionName = function(value) { | ||
var idx = value.search(re_nameEnd), | ||
name = idx < 0 ? value : value.substr(0, idx); | ||
if(this._lowerCaseTagNames){ | ||
name = name.toLowerCase(); | ||
} | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
return name; | ||
return name; | ||
}; | ||
Parser.prototype.ondeclaration = function(value){ | ||
if(this._cbs.onprocessinginstruction){ | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("!" + name, "!" + value); | ||
} | ||
Parser.prototype.ondeclaration = function(value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("!" + name, "!" + value); | ||
} | ||
}; | ||
Parser.prototype.onprocessinginstruction = function(value){ | ||
if(this._cbs.onprocessinginstruction){ | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("?" + name, "?" + value); | ||
} | ||
Parser.prototype.onprocessinginstruction = function(value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("?" + name, "?" + value); | ||
} | ||
}; | ||
Parser.prototype.oncomment = function(value){ | ||
this._updatePosition(4); | ||
Parser.prototype.oncomment = function(value) { | ||
this._updatePosition(4); | ||
if(this._cbs.oncomment) this._cbs.oncomment(value); | ||
if(this._cbs.oncommentend) this._cbs.oncommentend(); | ||
if (this._cbs.oncomment) this._cbs.oncomment(value); | ||
if (this._cbs.oncommentend) this._cbs.oncommentend(); | ||
}; | ||
Parser.prototype.oncdata = function(value){ | ||
this._updatePosition(1); | ||
Parser.prototype.oncdata = function(value) { | ||
this._updatePosition(1); | ||
if(this._options.xmlMode || this._options.recognizeCDATA){ | ||
if(this._cbs.oncdatastart) this._cbs.oncdatastart(); | ||
if(this._cbs.ontext) this._cbs.ontext(value); | ||
if(this._cbs.oncdataend) this._cbs.oncdataend(); | ||
} else { | ||
this.oncomment("[CDATA[" + value + "]]"); | ||
} | ||
if (this._options.xmlMode || this._options.recognizeCDATA) { | ||
if (this._cbs.oncdatastart) this._cbs.oncdatastart(); | ||
if (this._cbs.ontext) this._cbs.ontext(value); | ||
if (this._cbs.oncdataend) this._cbs.oncdataend(); | ||
} else { | ||
this.oncomment("[CDATA[" + value + "]]"); | ||
} | ||
}; | ||
Parser.prototype.onerror = function(err){ | ||
if(this._cbs.onerror) this._cbs.onerror(err); | ||
Parser.prototype.onerror = function(err) { | ||
if (this._cbs.onerror) this._cbs.onerror(err); | ||
}; | ||
Parser.prototype.onend = function(){ | ||
if(this._cbs.onclosetag){ | ||
for( | ||
var i = this._stack.length; | ||
i > 0; | ||
this._cbs.onclosetag(this._stack[--i]) | ||
); | ||
} | ||
if(this._cbs.onend) this._cbs.onend(); | ||
Parser.prototype.onend = function() { | ||
if (this._cbs.onclosetag) { | ||
for ( | ||
var i = this._stack.length; | ||
i > 0; | ||
this._cbs.onclosetag(this._stack[--i]) | ||
); | ||
} | ||
if (this._cbs.onend) this._cbs.onend(); | ||
}; | ||
//Resets the parser to a blank state, ready to parse a new HTML document | ||
Parser.prototype.reset = function(){ | ||
if(this._cbs.onreset) this._cbs.onreset(); | ||
this._tokenizer.reset(); | ||
Parser.prototype.reset = function() { | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
this._tokenizer.reset(); | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
if(this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
if (this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
}; | ||
//Parses a complete HTML document and pushes it to the handler | ||
Parser.prototype.parseComplete = function(data){ | ||
this.reset(); | ||
this.end(data); | ||
Parser.prototype.parseComplete = function(data) { | ||
this.reset(); | ||
this.end(data); | ||
}; | ||
Parser.prototype.write = function(chunk){ | ||
this._tokenizer.write(chunk); | ||
Parser.prototype.write = function(chunk) { | ||
this._tokenizer.write(chunk); | ||
}; | ||
Parser.prototype.end = function(chunk){ | ||
this._tokenizer.end(chunk); | ||
Parser.prototype.end = function(chunk) { | ||
this._tokenizer.end(chunk); | ||
}; | ||
Parser.prototype.pause = function(){ | ||
this._tokenizer.pause(); | ||
Parser.prototype.pause = function() { | ||
this._tokenizer.pause(); | ||
}; | ||
Parser.prototype.resume = function(){ | ||
this._tokenizer.resume(); | ||
Parser.prototype.resume = function() { | ||
this._tokenizer.resume(); | ||
}; | ||
@@ -364,0 +379,0 @@ |
module.exports = ProxyHandler; | ||
function ProxyHandler(cbs){ | ||
this._cbs = cbs || {}; | ||
function ProxyHandler(cbs) { | ||
this._cbs = cbs || {}; | ||
} | ||
var EVENTS = require("./").EVENTS; | ||
Object.keys(EVENTS).forEach(function(name){ | ||
if(EVENTS[name] === 0){ | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function(){ | ||
if(this._cbs[name]) this._cbs[name](); | ||
}; | ||
} else if(EVENTS[name] === 1){ | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function(a){ | ||
if(this._cbs[name]) this._cbs[name](a); | ||
}; | ||
} else if(EVENTS[name] === 2){ | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function(a, b){ | ||
if(this._cbs[name]) this._cbs[name](a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments"); | ||
} | ||
}); | ||
Object.keys(EVENTS).forEach(function(name) { | ||
if (EVENTS[name] === 0) { | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function() { | ||
if (this._cbs[name]) this._cbs[name](); | ||
}; | ||
} else if (EVENTS[name] === 1) { | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function(a) { | ||
if (this._cbs[name]) this._cbs[name](a); | ||
}; | ||
} else if (EVENTS[name] === 2) { | ||
name = "on" + name; | ||
ProxyHandler.prototype[name] = function(a, b) { | ||
if (this._cbs[name]) this._cbs[name](a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments"); | ||
} | ||
}); |
@@ -5,4 +5,4 @@ module.exports = Stream; | ||
function Stream(options){ | ||
Parser.call(this, new Cbs(this), options); | ||
function Stream(options) { | ||
Parser.call(this, new Cbs(this), options); | ||
} | ||
@@ -14,4 +14,4 @@ | ||
function Cbs(scope){ | ||
this.scope = scope; | ||
function Cbs(scope) { | ||
this.scope = scope; | ||
} | ||
@@ -21,18 +21,18 @@ | ||
Object.keys(EVENTS).forEach(function(name){ | ||
if(EVENTS[name] === 0){ | ||
Cbs.prototype["on" + name] = function(){ | ||
this.scope.emit(name); | ||
}; | ||
} else if(EVENTS[name] === 1){ | ||
Cbs.prototype["on" + name] = function(a){ | ||
this.scope.emit(name, a); | ||
}; | ||
} else if(EVENTS[name] === 2){ | ||
Cbs.prototype["on" + name] = function(a, b){ | ||
this.scope.emit(name, a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments!"); | ||
} | ||
}); | ||
Object.keys(EVENTS).forEach(function(name) { | ||
if (EVENTS[name] === 0) { | ||
Cbs.prototype["on" + name] = function() { | ||
this.scope.emit(name); | ||
}; | ||
} else if (EVENTS[name] === 1) { | ||
Cbs.prototype["on" + name] = function(a) { | ||
this.scope.emit(name, a); | ||
}; | ||
} else if (EVENTS[name] === 2) { | ||
Cbs.prototype["on" + name] = function(a, b) { | ||
this.scope.emit(name, a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments!"); | ||
} | ||
}); |
1481
lib/Tokenizer.js
@@ -6,26 +6,26 @@ module.exports = Tokenizer; | ||
var legacyMap = require("entities/maps/legacy.json"); | ||
var xmlMap = require("entities/maps/xml.json"); | ||
var xmlMap = require("entities/maps/xml.json"); | ||
var i = 0; | ||
var TEXT = i++; | ||
var BEFORE_TAG_NAME = i++; //after < | ||
var IN_TAG_NAME = i++; | ||
var IN_SELF_CLOSING_TAG = i++; | ||
var BEFORE_CLOSING_TAG_NAME = i++; | ||
var IN_CLOSING_TAG_NAME = i++; | ||
var AFTER_CLOSING_TAG_NAME = i++; | ||
var TEXT = i++; | ||
var BEFORE_TAG_NAME = i++; //after < | ||
var IN_TAG_NAME = i++; | ||
var IN_SELF_CLOSING_TAG = i++; | ||
var BEFORE_CLOSING_TAG_NAME = i++; | ||
var IN_CLOSING_TAG_NAME = i++; | ||
var AFTER_CLOSING_TAG_NAME = i++; | ||
//attributes | ||
var BEFORE_ATTRIBUTE_NAME = i++; | ||
var IN_ATTRIBUTE_NAME = i++; | ||
var AFTER_ATTRIBUTE_NAME = i++; | ||
var BEFORE_ATTRIBUTE_VALUE = i++; | ||
var IN_ATTRIBUTE_VALUE_DQ = i++; // " | ||
var IN_ATTRIBUTE_VALUE_SQ = i++; // ' | ||
var IN_ATTRIBUTE_VALUE_NQ = i++; | ||
var BEFORE_ATTRIBUTE_NAME = i++; | ||
var IN_ATTRIBUTE_NAME = i++; | ||
var AFTER_ATTRIBUTE_NAME = i++; | ||
var BEFORE_ATTRIBUTE_VALUE = i++; | ||
var IN_ATTRIBUTE_VALUE_DQ = i++; // " | ||
var IN_ATTRIBUTE_VALUE_SQ = i++; // ' | ||
var IN_ATTRIBUTE_VALUE_NQ = i++; | ||
//declarations | ||
var BEFORE_DECLARATION = i++; // ! | ||
var IN_DECLARATION = i++; | ||
var BEFORE_DECLARATION = i++; // ! | ||
var IN_DECLARATION = i++; | ||
@@ -36,413 +36,457 @@ //processing instructions | ||
//comments | ||
var BEFORE_COMMENT = i++; | ||
var IN_COMMENT = i++; | ||
var AFTER_COMMENT_1 = i++; | ||
var AFTER_COMMENT_2 = i++; | ||
var BEFORE_COMMENT = i++; | ||
var IN_COMMENT = i++; | ||
var AFTER_COMMENT_1 = i++; | ||
var AFTER_COMMENT_2 = i++; | ||
//cdata | ||
var BEFORE_CDATA_1 = i++; // [ | ||
var BEFORE_CDATA_2 = i++; // C | ||
var BEFORE_CDATA_3 = i++; // D | ||
var BEFORE_CDATA_4 = i++; // A | ||
var BEFORE_CDATA_5 = i++; // T | ||
var BEFORE_CDATA_6 = i++; // A | ||
var IN_CDATA = i++; // [ | ||
var AFTER_CDATA_1 = i++; // ] | ||
var AFTER_CDATA_2 = i++; // ] | ||
var BEFORE_CDATA_1 = i++; // [ | ||
var BEFORE_CDATA_2 = i++; // C | ||
var BEFORE_CDATA_3 = i++; // D | ||
var BEFORE_CDATA_4 = i++; // A | ||
var BEFORE_CDATA_5 = i++; // T | ||
var BEFORE_CDATA_6 = i++; // A | ||
var IN_CDATA = i++; // [ | ||
var AFTER_CDATA_1 = i++; // ] | ||
var AFTER_CDATA_2 = i++; // ] | ||
//special tags | ||
var BEFORE_SPECIAL = i++; //S | ||
var BEFORE_SPECIAL_END = i++; //S | ||
var BEFORE_SPECIAL = i++; //S | ||
var BEFORE_SPECIAL_END = i++; //S | ||
var BEFORE_SCRIPT_1 = i++; //C | ||
var BEFORE_SCRIPT_2 = i++; //R | ||
var BEFORE_SCRIPT_3 = i++; //I | ||
var BEFORE_SCRIPT_4 = i++; //P | ||
var BEFORE_SCRIPT_5 = i++; //T | ||
var AFTER_SCRIPT_1 = i++; //C | ||
var AFTER_SCRIPT_2 = i++; //R | ||
var AFTER_SCRIPT_3 = i++; //I | ||
var AFTER_SCRIPT_4 = i++; //P | ||
var AFTER_SCRIPT_5 = i++; //T | ||
var BEFORE_SCRIPT_1 = i++; //C | ||
var BEFORE_SCRIPT_2 = i++; //R | ||
var BEFORE_SCRIPT_3 = i++; //I | ||
var BEFORE_SCRIPT_4 = i++; //P | ||
var BEFORE_SCRIPT_5 = i++; //T | ||
var AFTER_SCRIPT_1 = i++; //C | ||
var AFTER_SCRIPT_2 = i++; //R | ||
var AFTER_SCRIPT_3 = i++; //I | ||
var AFTER_SCRIPT_4 = i++; //P | ||
var AFTER_SCRIPT_5 = i++; //T | ||
var BEFORE_STYLE_1 = i++; //T | ||
var BEFORE_STYLE_2 = i++; //Y | ||
var BEFORE_STYLE_3 = i++; //L | ||
var BEFORE_STYLE_4 = i++; //E | ||
var AFTER_STYLE_1 = i++; //T | ||
var AFTER_STYLE_2 = i++; //Y | ||
var AFTER_STYLE_3 = i++; //L | ||
var AFTER_STYLE_4 = i++; //E | ||
var BEFORE_STYLE_1 = i++; //T | ||
var BEFORE_STYLE_2 = i++; //Y | ||
var BEFORE_STYLE_3 = i++; //L | ||
var BEFORE_STYLE_4 = i++; //E | ||
var AFTER_STYLE_1 = i++; //T | ||
var AFTER_STYLE_2 = i++; //Y | ||
var AFTER_STYLE_3 = i++; //L | ||
var AFTER_STYLE_4 = i++; //E | ||
var BEFORE_ENTITY = i++; //& | ||
var BEFORE_NUMERIC_ENTITY = i++; //# | ||
var IN_NAMED_ENTITY = i++; | ||
var IN_NUMERIC_ENTITY = i++; | ||
var IN_HEX_ENTITY = i++; //X | ||
var BEFORE_ENTITY = i++; //& | ||
var BEFORE_NUMERIC_ENTITY = i++; //# | ||
var IN_NAMED_ENTITY = i++; | ||
var IN_NUMERIC_ENTITY = i++; | ||
var IN_HEX_ENTITY = i++; //X | ||
var j = 0; | ||
var SPECIAL_NONE = j++; | ||
var SPECIAL_SCRIPT = j++; | ||
var SPECIAL_STYLE = j++; | ||
var SPECIAL_NONE = j++; | ||
var SPECIAL_SCRIPT = j++; | ||
var SPECIAL_STYLE = j++; | ||
function whitespace(c){ | ||
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r"; | ||
function whitespace(c) { | ||
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r"; | ||
} | ||
function ifElseState(upper, SUCCESS, FAILURE){ | ||
var lower = upper.toLowerCase(); | ||
function ifElseState(upper, SUCCESS, FAILURE) { | ||
var lower = upper.toLowerCase(); | ||
if(upper === lower){ | ||
return function(c){ | ||
if(c === lower){ | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
} | ||
}; | ||
} else { | ||
return function(c){ | ||
if(c === lower || c === upper){ | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
} | ||
}; | ||
} | ||
if (upper === lower) { | ||
return function(c) { | ||
if (c === lower) { | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
} | ||
}; | ||
} else { | ||
return function(c) { | ||
if (c === lower || c === upper) { | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
} | ||
}; | ||
} | ||
} | ||
function consumeSpecialNameChar(upper, NEXT_STATE){ | ||
var lower = upper.toLowerCase(); | ||
function consumeSpecialNameChar(upper, NEXT_STATE) { | ||
var lower = upper.toLowerCase(); | ||
return function(c){ | ||
if(c === lower || c === upper){ | ||
this._state = NEXT_STATE; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
return function(c) { | ||
if (c === lower || c === upper) { | ||
this._state = NEXT_STATE; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
} | ||
function Tokenizer(options, cbs){ | ||
this._state = TEXT; | ||
this._buffer = ""; | ||
this._sectionStart = 0; | ||
this._index = 0; | ||
this._bufferOffset = 0; //chars removed from _buffer | ||
this._baseState = TEXT; | ||
this._special = SPECIAL_NONE; | ||
this._cbs = cbs; | ||
this._running = true; | ||
this._ended = false; | ||
this._xmlMode = !!(options && options.xmlMode); | ||
this._decodeEntities = !!(options && options.decodeEntities); | ||
function Tokenizer(options, cbs) { | ||
this._state = TEXT; | ||
this._buffer = ""; | ||
this._sectionStart = 0; | ||
this._index = 0; | ||
this._bufferOffset = 0; //chars removed from _buffer | ||
this._baseState = TEXT; | ||
this._special = SPECIAL_NONE; | ||
this._cbs = cbs; | ||
this._running = true; | ||
this._ended = false; | ||
this._xmlMode = !!(options && options.xmlMode); | ||
this._decodeEntities = !!(options && options.decodeEntities); | ||
} | ||
Tokenizer.prototype._stateText = function(c){ | ||
if(c === "<"){ | ||
if(this._index > this._sectionStart){ | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._state = BEFORE_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){ | ||
if(this._index > this._sectionStart){ | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._baseState = TEXT; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateText = function(c) { | ||
if (c === "<") { | ||
if (this._index > this._sectionStart) { | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._state = BEFORE_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} else if ( | ||
this._decodeEntities && | ||
this._special === SPECIAL_NONE && | ||
c === "&" | ||
) { | ||
if (this._index > this._sectionStart) { | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._baseState = TEXT; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeTagName = function(c){ | ||
if(c === "/"){ | ||
this._state = BEFORE_CLOSING_TAG_NAME; | ||
} else if(c === "<"){ | ||
this._cbs.ontext(this._getSection()); | ||
this._sectionStart = this._index; | ||
} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) { | ||
this._state = TEXT; | ||
} else if(c === "!"){ | ||
this._state = BEFORE_DECLARATION; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "?"){ | ||
this._state = IN_PROCESSING_INSTRUCTION; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = (!this._xmlMode && (c === "s" || c === "S")) ? | ||
BEFORE_SPECIAL : IN_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateBeforeTagName = function(c) { | ||
if (c === "/") { | ||
this._state = BEFORE_CLOSING_TAG_NAME; | ||
} else if (c === "<") { | ||
this._cbs.ontext(this._getSection()); | ||
this._sectionStart = this._index; | ||
} else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) { | ||
this._state = TEXT; | ||
} else if (c === "!") { | ||
this._state = BEFORE_DECLARATION; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "?") { | ||
this._state = IN_PROCESSING_INSTRUCTION; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = | ||
!this._xmlMode && (c === "s" || c === "S") | ||
? BEFORE_SPECIAL | ||
: IN_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInTagName = function(c){ | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._emitToken("onopentagname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInTagName = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._emitToken("onopentagname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeCloseingTagName = function(c){ | ||
if(whitespace(c)); | ||
else if(c === ">"){ | ||
this._state = TEXT; | ||
} else if(this._special !== SPECIAL_NONE){ | ||
if(c === "s" || c === "S"){ | ||
this._state = BEFORE_SPECIAL_END; | ||
} else { | ||
this._state = TEXT; | ||
this._index--; | ||
} | ||
} else { | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateBeforeCloseingTagName = function(c) { | ||
if (whitespace(c)); | ||
else if (c === ">") { | ||
this._state = TEXT; | ||
} else if (this._special !== SPECIAL_NONE) { | ||
if (c === "s" || c === "S") { | ||
this._state = BEFORE_SPECIAL_END; | ||
} else { | ||
this._state = TEXT; | ||
this._index--; | ||
} | ||
} else { | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCloseingTagName = function(c){ | ||
if(c === ">" || whitespace(c)){ | ||
this._emitToken("onclosetag"); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInCloseingTagName = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._emitToken("onclosetag"); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterCloseingTagName = function(c){ | ||
//skip everything until ">" | ||
if(c === ">"){ | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
Tokenizer.prototype._stateAfterCloseingTagName = function(c) { | ||
//skip everything until ">" | ||
if (c === ">") { | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeName = function(c){ | ||
if(c === ">"){ | ||
this._cbs.onopentagend(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "/"){ | ||
this._state = IN_SELF_CLOSING_TAG; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateBeforeAttributeName = function(c) { | ||
if (c === ">") { | ||
this._cbs.onopentagend(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "/") { | ||
this._state = IN_SELF_CLOSING_TAG; | ||
} else if (!whitespace(c)) { | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInSelfClosingTag = function(c){ | ||
if(c === ">"){ | ||
this._cbs.onselfclosingtag(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if(!whitespace(c)){ | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInSelfClosingTag = function(c) { | ||
if (c === ">") { | ||
this._cbs.onselfclosingtag(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (!whitespace(c)) { | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeName = function(c){ | ||
if(c === "=" || c === "/" || c === ">" || whitespace(c)){ | ||
this._cbs.onattribname(this._getSection()); | ||
this._sectionStart = -1; | ||
this._state = AFTER_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInAttributeName = function(c) { | ||
if (c === "=" || c === "/" || c === ">" || whitespace(c)) { | ||
this._cbs.onattribname(this._getSection()); | ||
this._sectionStart = -1; | ||
this._state = AFTER_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterAttributeName = function(c){ | ||
if(c === "="){ | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if(c === "/" || c === ">"){ | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if(!whitespace(c)){ | ||
this._cbs.onattribend(); | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateAfterAttributeName = function(c) { | ||
if (c === "=") { | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if (c === "/" || c === ">") { | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if (!whitespace(c)) { | ||
this._cbs.onattribend(); | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeValue = function(c){ | ||
if(c === "\""){ | ||
this._state = IN_ATTRIBUTE_VALUE_DQ; | ||
this._sectionStart = this._index + 1; | ||
} else if(c === "'"){ | ||
this._state = IN_ATTRIBUTE_VALUE_SQ; | ||
this._sectionStart = this._index + 1; | ||
} else if(!whitespace(c)){ | ||
this._state = IN_ATTRIBUTE_VALUE_NQ; | ||
this._sectionStart = this._index; | ||
this._index--; //reconsume token | ||
} | ||
Tokenizer.prototype._stateBeforeAttributeValue = function(c) { | ||
if (c === '"') { | ||
this._state = IN_ATTRIBUTE_VALUE_DQ; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "'") { | ||
this._state = IN_ATTRIBUTE_VALUE_SQ; | ||
this._sectionStart = this._index + 1; | ||
} else if (!whitespace(c)) { | ||
this._state = IN_ATTRIBUTE_VALUE_NQ; | ||
this._sectionStart = this._index; | ||
this._index--; //reconsume token | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){ | ||
if(c === "\""){ | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if(this._decodeEntities && c === "&"){ | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c) { | ||
if (c === '"') { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){ | ||
if(c === "'"){ | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if(this._decodeEntities && c === "&"){ | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c) { | ||
if (c === "'") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){ | ||
if(whitespace(c) || c === ">"){ | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if(this._decodeEntities && c === "&"){ | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c) { | ||
if (whitespace(c) || c === ">") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeDeclaration = function(c){ | ||
this._state = c === "[" ? BEFORE_CDATA_1 : | ||
c === "-" ? BEFORE_COMMENT : | ||
IN_DECLARATION; | ||
Tokenizer.prototype._stateBeforeDeclaration = function(c) { | ||
this._state = | ||
c === "[" | ||
? BEFORE_CDATA_1 | ||
: c === "-" | ||
? BEFORE_COMMENT | ||
: IN_DECLARATION; | ||
}; | ||
Tokenizer.prototype._stateInDeclaration = function(c){ | ||
if(c === ">"){ | ||
this._cbs.ondeclaration(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
Tokenizer.prototype._stateInDeclaration = function(c) { | ||
if (c === ">") { | ||
this._cbs.ondeclaration(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInProcessingInstruction = function(c){ | ||
if(c === ">"){ | ||
this._cbs.onprocessinginstruction(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
Tokenizer.prototype._stateInProcessingInstruction = function(c) { | ||
if (c === ">") { | ||
this._cbs.onprocessinginstruction(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeComment = function(c){ | ||
if(c === "-"){ | ||
this._state = IN_COMMENT; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
Tokenizer.prototype._stateBeforeComment = function(c) { | ||
if (c === "-") { | ||
this._state = IN_COMMENT; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInComment = function(c){ | ||
if(c === "-") this._state = AFTER_COMMENT_1; | ||
Tokenizer.prototype._stateInComment = function(c) { | ||
if (c === "-") this._state = AFTER_COMMENT_1; | ||
}; | ||
Tokenizer.prototype._stateAfterComment1 = function(c){ | ||
if(c === "-"){ | ||
this._state = AFTER_COMMENT_2; | ||
} else { | ||
this._state = IN_COMMENT; | ||
} | ||
Tokenizer.prototype._stateAfterComment1 = function(c) { | ||
if (c === "-") { | ||
this._state = AFTER_COMMENT_2; | ||
} else { | ||
this._state = IN_COMMENT; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterComment2 = function(c){ | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if(c !== "-"){ | ||
this._state = IN_COMMENT; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
Tokenizer.prototype._stateAfterComment2 = function(c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncomment( | ||
this._buffer.substring(this._sectionStart, this._index - 2) | ||
); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "-") { | ||
this._state = IN_COMMENT; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
}; | ||
Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION); | ||
Tokenizer.prototype._stateBeforeCdata1 = ifElseState( | ||
"C", | ||
BEFORE_CDATA_2, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata2 = ifElseState( | ||
"D", | ||
BEFORE_CDATA_3, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata3 = ifElseState( | ||
"A", | ||
BEFORE_CDATA_4, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata4 = ifElseState( | ||
"T", | ||
BEFORE_CDATA_5, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata5 = ifElseState( | ||
"A", | ||
BEFORE_CDATA_6, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata6 = function(c){ | ||
if(c === "["){ | ||
this._state = IN_CDATA; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateBeforeCdata6 = function(c) { | ||
if (c === "[") { | ||
this._state = IN_CDATA; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCdata = function(c){ | ||
if(c === "]") this._state = AFTER_CDATA_1; | ||
Tokenizer.prototype._stateInCdata = function(c) { | ||
if (c === "]") this._state = AFTER_CDATA_1; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata1 = function(c){ | ||
if(c === "]") this._state = AFTER_CDATA_2; | ||
else this._state = IN_CDATA; | ||
Tokenizer.prototype._stateAfterCdata1 = function(c) { | ||
if (c === "]") this._state = AFTER_CDATA_2; | ||
else this._state = IN_CDATA; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata2 = function(c){ | ||
if(c === ">"){ | ||
//remove 2 trailing chars | ||
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if(c !== "]") { | ||
this._state = IN_CDATA; | ||
} | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
Tokenizer.prototype._stateAfterCdata2 = function(c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncdata( | ||
this._buffer.substring(this._sectionStart, this._index - 2) | ||
); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "]") { | ||
this._state = IN_CDATA; | ||
} | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecial = function(c){ | ||
if(c === "c" || c === "C"){ | ||
this._state = BEFORE_SCRIPT_1; | ||
} else if(c === "t" || c === "T"){ | ||
this._state = BEFORE_STYLE_1; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
Tokenizer.prototype._stateBeforeSpecial = function(c) { | ||
if (c === "c" || c === "C") { | ||
this._state = BEFORE_SCRIPT_1; | ||
} else if (c === "t" || c === "T") { | ||
this._state = BEFORE_STYLE_1; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecialEnd = function(c){ | ||
if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){ | ||
this._state = AFTER_SCRIPT_1; | ||
} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){ | ||
this._state = AFTER_STYLE_1; | ||
} | ||
else this._state = TEXT; | ||
Tokenizer.prototype._stateBeforeSpecialEnd = function(c) { | ||
if (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) { | ||
this._state = AFTER_SCRIPT_1; | ||
} else if (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) { | ||
this._state = AFTER_STYLE_1; | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2); | ||
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3); | ||
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4); | ||
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5); | ||
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar( | ||
"R", | ||
BEFORE_SCRIPT_2 | ||
); | ||
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar( | ||
"I", | ||
BEFORE_SCRIPT_3 | ||
); | ||
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar( | ||
"P", | ||
BEFORE_SCRIPT_4 | ||
); | ||
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar( | ||
"T", | ||
BEFORE_SCRIPT_5 | ||
); | ||
Tokenizer.prototype._stateBeforeScript5 = function(c){ | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_SCRIPT; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
Tokenizer.prototype._stateBeforeScript5 = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_SCRIPT; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
@@ -455,22 +499,30 @@ | ||
Tokenizer.prototype._stateAfterScript5 = function(c){ | ||
if(c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 6; | ||
this._index--; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
Tokenizer.prototype._stateAfterScript5 = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 6; | ||
this._index--; //reconsume the token | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2); | ||
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3); | ||
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4); | ||
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar( | ||
"Y", | ||
BEFORE_STYLE_2 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar( | ||
"L", | ||
BEFORE_STYLE_3 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar( | ||
"E", | ||
BEFORE_STYLE_4 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle4 = function(c){ | ||
if(c === "/" || c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_STYLE; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
Tokenizer.prototype._stateBeforeStyle4 = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_STYLE; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
@@ -482,426 +534,441 @@ | ||
Tokenizer.prototype._stateAfterStyle4 = function(c){ | ||
if(c === ">" || whitespace(c)){ | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 5; | ||
this._index--; //reconsume the token | ||
} | ||
else this._state = TEXT; | ||
Tokenizer.prototype._stateAfterStyle4 = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 5; | ||
this._index--; //reconsume the token | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY); | ||
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY); | ||
Tokenizer.prototype._stateBeforeEntity = ifElseState( | ||
"#", | ||
BEFORE_NUMERIC_ENTITY, | ||
IN_NAMED_ENTITY | ||
); | ||
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState( | ||
"X", | ||
IN_HEX_ENTITY, | ||
IN_NUMERIC_ENTITY | ||
); | ||
//for entities terminated with a semicolon | ||
Tokenizer.prototype._parseNamedEntityStrict = function(){ | ||
//offset = 1 | ||
if(this._sectionStart + 1 < this._index){ | ||
var entity = this._buffer.substring(this._sectionStart + 1, this._index), | ||
map = this._xmlMode ? xmlMap : entityMap; | ||
Tokenizer.prototype._parseNamedEntityStrict = function() { | ||
//offset = 1 | ||
if (this._sectionStart + 1 < this._index) { | ||
var entity = this._buffer.substring( | ||
this._sectionStart + 1, | ||
this._index | ||
), | ||
map = this._xmlMode ? xmlMap : entityMap; | ||
if(map.hasOwnProperty(entity)){ | ||
this._emitPartial(map[entity]); | ||
this._sectionStart = this._index + 1; | ||
} | ||
} | ||
if (map.hasOwnProperty(entity)) { | ||
this._emitPartial(map[entity]); | ||
this._sectionStart = this._index + 1; | ||
} | ||
} | ||
}; | ||
//parses legacy entities (without trailing semicolon) | ||
Tokenizer.prototype._parseLegacyEntity = function(){ | ||
var start = this._sectionStart + 1, | ||
limit = this._index - start; | ||
Tokenizer.prototype._parseLegacyEntity = function() { | ||
var start = this._sectionStart + 1, | ||
limit = this._index - start; | ||
if(limit > 6) limit = 6; //the max length of legacy entities is 6 | ||
if (limit > 6) limit = 6; //the max length of legacy entities is 6 | ||
while(limit >= 2){ //the min length of legacy entities is 2 | ||
var entity = this._buffer.substr(start, limit); | ||
while (limit >= 2) { | ||
//the min length of legacy entities is 2 | ||
var entity = this._buffer.substr(start, limit); | ||
if(legacyMap.hasOwnProperty(entity)){ | ||
this._emitPartial(legacyMap[entity]); | ||
this._sectionStart += limit + 1; | ||
return; | ||
} else { | ||
limit--; | ||
} | ||
} | ||
if (legacyMap.hasOwnProperty(entity)) { | ||
this._emitPartial(legacyMap[entity]); | ||
this._sectionStart += limit + 1; | ||
return; | ||
} else { | ||
limit--; | ||
} | ||
} | ||
}; | ||
Tokenizer.prototype._stateInNamedEntity = function(c){ | ||
if(c === ";"){ | ||
this._parseNamedEntityStrict(); | ||
if(this._sectionStart + 1 < this._index && !this._xmlMode){ | ||
this._parseLegacyEntity(); | ||
} | ||
this._state = this._baseState; | ||
} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){ | ||
if(this._xmlMode); | ||
else if(this._sectionStart + 1 === this._index); | ||
else if(this._baseState !== TEXT){ | ||
if(c !== "="){ | ||
this._parseNamedEntityStrict(); | ||
} | ||
} else { | ||
this._parseLegacyEntity(); | ||
} | ||
Tokenizer.prototype._stateInNamedEntity = function(c) { | ||
if (c === ";") { | ||
this._parseNamedEntityStrict(); | ||
if (this._sectionStart + 1 < this._index && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
} | ||
this._state = this._baseState; | ||
} else if ( | ||
(c < "a" || c > "z") && | ||
(c < "A" || c > "Z") && | ||
(c < "0" || c > "9") | ||
) { | ||
if (this._xmlMode); | ||
else if (this._sectionStart + 1 === this._index); | ||
else if (this._baseState !== TEXT) { | ||
if (c !== "=") { | ||
this._parseNamedEntityStrict(); | ||
} | ||
} else { | ||
this._parseLegacyEntity(); | ||
} | ||
this._state = this._baseState; | ||
this._index--; | ||
} | ||
this._state = this._baseState; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._decodeNumericEntity = function(offset, base){ | ||
var sectionStart = this._sectionStart + offset; | ||
Tokenizer.prototype._decodeNumericEntity = function(offset, base) { | ||
var sectionStart = this._sectionStart + offset; | ||
if(sectionStart !== this._index){ | ||
//parse entity | ||
var entity = this._buffer.substring(sectionStart, this._index); | ||
var parsed = parseInt(entity, base); | ||
if (sectionStart !== this._index) { | ||
//parse entity | ||
var entity = this._buffer.substring(sectionStart, this._index); | ||
var parsed = parseInt(entity, base); | ||
this._emitPartial(decodeCodePoint(parsed)); | ||
this._sectionStart = this._index; | ||
} else { | ||
this._sectionStart--; | ||
} | ||
this._emitPartial(decodeCodePoint(parsed)); | ||
this._sectionStart = this._index; | ||
} else { | ||
this._sectionStart--; | ||
} | ||
this._state = this._baseState; | ||
this._state = this._baseState; | ||
}; | ||
Tokenizer.prototype._stateInNumericEntity = function(c){ | ||
if(c === ";"){ | ||
this._decodeNumericEntity(2, 10); | ||
this._sectionStart++; | ||
} else if(c < "0" || c > "9"){ | ||
if(!this._xmlMode){ | ||
this._decodeNumericEntity(2, 10); | ||
} else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInNumericEntity = function(c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(2, 10); | ||
this._sectionStart++; | ||
} else if (c < "0" || c > "9") { | ||
if (!this._xmlMode) { | ||
this._decodeNumericEntity(2, 10); | ||
} else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInHexEntity = function(c){ | ||
if(c === ";"){ | ||
this._decodeNumericEntity(3, 16); | ||
this._sectionStart++; | ||
} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){ | ||
if(!this._xmlMode){ | ||
this._decodeNumericEntity(3, 16); | ||
} else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
Tokenizer.prototype._stateInHexEntity = function(c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(3, 16); | ||
this._sectionStart++; | ||
} else if ( | ||
(c < "a" || c > "f") && | ||
(c < "A" || c > "F") && | ||
(c < "0" || c > "9") | ||
) { | ||
if (!this._xmlMode) { | ||
this._decodeNumericEntity(3, 16); | ||
} else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._cleanup = function (){ | ||
if(this._sectionStart < 0){ | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if(this._running){ | ||
if(this._state === TEXT){ | ||
if(this._sectionStart !== this._index){ | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
} | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if(this._sectionStart === this._index){ | ||
//the section just started | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else { | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
this._bufferOffset += this._sectionStart; | ||
} | ||
Tokenizer.prototype._cleanup = function() { | ||
if (this._sectionStart < 0) { | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if (this._running) { | ||
if (this._state === TEXT) { | ||
if (this._sectionStart !== this._index) { | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
} | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if (this._sectionStart === this._index) { | ||
//the section just started | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else { | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
this._bufferOffset += this._sectionStart; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
}; | ||
//TODO make events conditional | ||
Tokenizer.prototype.write = function(chunk){ | ||
if(this._ended) this._cbs.onerror(Error(".write() after done!")); | ||
Tokenizer.prototype.write = function(chunk) { | ||
if (this._ended) this._cbs.onerror(Error(".write() after done!")); | ||
this._buffer += chunk; | ||
this._parse(); | ||
this._buffer += chunk; | ||
this._parse(); | ||
}; | ||
Tokenizer.prototype._parse = function(){ | ||
while(this._index < this._buffer.length && this._running){ | ||
var c = this._buffer.charAt(this._index); | ||
if(this._state === TEXT) { | ||
this._stateText(c); | ||
} else if(this._state === BEFORE_TAG_NAME){ | ||
this._stateBeforeTagName(c); | ||
} else if(this._state === IN_TAG_NAME) { | ||
this._stateInTagName(c); | ||
} else if(this._state === BEFORE_CLOSING_TAG_NAME){ | ||
this._stateBeforeCloseingTagName(c); | ||
} else if(this._state === IN_CLOSING_TAG_NAME){ | ||
this._stateInCloseingTagName(c); | ||
} else if(this._state === AFTER_CLOSING_TAG_NAME){ | ||
this._stateAfterCloseingTagName(c); | ||
} else if(this._state === IN_SELF_CLOSING_TAG){ | ||
this._stateInSelfClosingTag(c); | ||
} | ||
Tokenizer.prototype._parse = function() { | ||
while (this._index < this._buffer.length && this._running) { | ||
var c = this._buffer.charAt(this._index); | ||
if (this._state === TEXT) { | ||
this._stateText(c); | ||
} else if (this._state === BEFORE_TAG_NAME) { | ||
this._stateBeforeTagName(c); | ||
} else if (this._state === IN_TAG_NAME) { | ||
this._stateInTagName(c); | ||
} else if (this._state === BEFORE_CLOSING_TAG_NAME) { | ||
this._stateBeforeCloseingTagName(c); | ||
} else if (this._state === IN_CLOSING_TAG_NAME) { | ||
this._stateInCloseingTagName(c); | ||
} else if (this._state === AFTER_CLOSING_TAG_NAME) { | ||
this._stateAfterCloseingTagName(c); | ||
} else if (this._state === IN_SELF_CLOSING_TAG) { | ||
this._stateInSelfClosingTag(c); | ||
} else if (this._state === BEFORE_ATTRIBUTE_NAME) { | ||
/* | ||
/* | ||
* attributes | ||
*/ | ||
else if(this._state === BEFORE_ATTRIBUTE_NAME){ | ||
this._stateBeforeAttributeName(c); | ||
} else if(this._state === IN_ATTRIBUTE_NAME){ | ||
this._stateInAttributeName(c); | ||
} else if(this._state === AFTER_ATTRIBUTE_NAME){ | ||
this._stateAfterAttributeName(c); | ||
} else if(this._state === BEFORE_ATTRIBUTE_VALUE){ | ||
this._stateBeforeAttributeValue(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_DQ){ | ||
this._stateInAttributeValueDoubleQuotes(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_SQ){ | ||
this._stateInAttributeValueSingleQuotes(c); | ||
} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){ | ||
this._stateInAttributeValueNoQuotes(c); | ||
} | ||
this._stateBeforeAttributeName(c); | ||
} else if (this._state === IN_ATTRIBUTE_NAME) { | ||
this._stateInAttributeName(c); | ||
} else if (this._state === AFTER_ATTRIBUTE_NAME) { | ||
this._stateAfterAttributeName(c); | ||
} else if (this._state === BEFORE_ATTRIBUTE_VALUE) { | ||
this._stateBeforeAttributeValue(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_DQ) { | ||
this._stateInAttributeValueDoubleQuotes(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_SQ) { | ||
this._stateInAttributeValueSingleQuotes(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_NQ) { | ||
this._stateInAttributeValueNoQuotes(c); | ||
} else if (this._state === BEFORE_DECLARATION) { | ||
/* | ||
/* | ||
* declarations | ||
*/ | ||
else if(this._state === BEFORE_DECLARATION){ | ||
this._stateBeforeDeclaration(c); | ||
} else if(this._state === IN_DECLARATION){ | ||
this._stateInDeclaration(c); | ||
} | ||
this._stateBeforeDeclaration(c); | ||
} else if (this._state === IN_DECLARATION) { | ||
this._stateInDeclaration(c); | ||
} else if (this._state === IN_PROCESSING_INSTRUCTION) { | ||
/* | ||
/* | ||
* processing instructions | ||
*/ | ||
else if(this._state === IN_PROCESSING_INSTRUCTION){ | ||
this._stateInProcessingInstruction(c); | ||
} | ||
this._stateInProcessingInstruction(c); | ||
} else if (this._state === BEFORE_COMMENT) { | ||
/* | ||
/* | ||
* comments | ||
*/ | ||
else if(this._state === BEFORE_COMMENT){ | ||
this._stateBeforeComment(c); | ||
} else if(this._state === IN_COMMENT){ | ||
this._stateInComment(c); | ||
} else if(this._state === AFTER_COMMENT_1){ | ||
this._stateAfterComment1(c); | ||
} else if(this._state === AFTER_COMMENT_2){ | ||
this._stateAfterComment2(c); | ||
} | ||
this._stateBeforeComment(c); | ||
} else if (this._state === IN_COMMENT) { | ||
this._stateInComment(c); | ||
} else if (this._state === AFTER_COMMENT_1) { | ||
this._stateAfterComment1(c); | ||
} else if (this._state === AFTER_COMMENT_2) { | ||
this._stateAfterComment2(c); | ||
} else if (this._state === BEFORE_CDATA_1) { | ||
/* | ||
/* | ||
* cdata | ||
*/ | ||
else if(this._state === BEFORE_CDATA_1){ | ||
this._stateBeforeCdata1(c); | ||
} else if(this._state === BEFORE_CDATA_2){ | ||
this._stateBeforeCdata2(c); | ||
} else if(this._state === BEFORE_CDATA_3){ | ||
this._stateBeforeCdata3(c); | ||
} else if(this._state === BEFORE_CDATA_4){ | ||
this._stateBeforeCdata4(c); | ||
} else if(this._state === BEFORE_CDATA_5){ | ||
this._stateBeforeCdata5(c); | ||
} else if(this._state === BEFORE_CDATA_6){ | ||
this._stateBeforeCdata6(c); | ||
} else if(this._state === IN_CDATA){ | ||
this._stateInCdata(c); | ||
} else if(this._state === AFTER_CDATA_1){ | ||
this._stateAfterCdata1(c); | ||
} else if(this._state === AFTER_CDATA_2){ | ||
this._stateAfterCdata2(c); | ||
} | ||
this._stateBeforeCdata1(c); | ||
} else if (this._state === BEFORE_CDATA_2) { | ||
this._stateBeforeCdata2(c); | ||
} else if (this._state === BEFORE_CDATA_3) { | ||
this._stateBeforeCdata3(c); | ||
} else if (this._state === BEFORE_CDATA_4) { | ||
this._stateBeforeCdata4(c); | ||
} else if (this._state === BEFORE_CDATA_5) { | ||
this._stateBeforeCdata5(c); | ||
} else if (this._state === BEFORE_CDATA_6) { | ||
this._stateBeforeCdata6(c); | ||
} else if (this._state === IN_CDATA) { | ||
this._stateInCdata(c); | ||
} else if (this._state === AFTER_CDATA_1) { | ||
this._stateAfterCdata1(c); | ||
} else if (this._state === AFTER_CDATA_2) { | ||
this._stateAfterCdata2(c); | ||
} else if (this._state === BEFORE_SPECIAL) { | ||
/* | ||
/* | ||
* special tags | ||
*/ | ||
else if(this._state === BEFORE_SPECIAL){ | ||
this._stateBeforeSpecial(c); | ||
} else if(this._state === BEFORE_SPECIAL_END){ | ||
this._stateBeforeSpecialEnd(c); | ||
} | ||
this._stateBeforeSpecial(c); | ||
} else if (this._state === BEFORE_SPECIAL_END) { | ||
this._stateBeforeSpecialEnd(c); | ||
} else if (this._state === BEFORE_SCRIPT_1) { | ||
/* | ||
/* | ||
* script | ||
*/ | ||
else if(this._state === BEFORE_SCRIPT_1){ | ||
this._stateBeforeScript1(c); | ||
} else if(this._state === BEFORE_SCRIPT_2){ | ||
this._stateBeforeScript2(c); | ||
} else if(this._state === BEFORE_SCRIPT_3){ | ||
this._stateBeforeScript3(c); | ||
} else if(this._state === BEFORE_SCRIPT_4){ | ||
this._stateBeforeScript4(c); | ||
} else if(this._state === BEFORE_SCRIPT_5){ | ||
this._stateBeforeScript5(c); | ||
} | ||
this._stateBeforeScript1(c); | ||
} else if (this._state === BEFORE_SCRIPT_2) { | ||
this._stateBeforeScript2(c); | ||
} else if (this._state === BEFORE_SCRIPT_3) { | ||
this._stateBeforeScript3(c); | ||
} else if (this._state === BEFORE_SCRIPT_4) { | ||
this._stateBeforeScript4(c); | ||
} else if (this._state === BEFORE_SCRIPT_5) { | ||
this._stateBeforeScript5(c); | ||
} else if (this._state === AFTER_SCRIPT_1) { | ||
this._stateAfterScript1(c); | ||
} else if (this._state === AFTER_SCRIPT_2) { | ||
this._stateAfterScript2(c); | ||
} else if (this._state === AFTER_SCRIPT_3) { | ||
this._stateAfterScript3(c); | ||
} else if (this._state === AFTER_SCRIPT_4) { | ||
this._stateAfterScript4(c); | ||
} else if (this._state === AFTER_SCRIPT_5) { | ||
this._stateAfterScript5(c); | ||
} else if (this._state === BEFORE_STYLE_1) { | ||
else if(this._state === AFTER_SCRIPT_1){ | ||
this._stateAfterScript1(c); | ||
} else if(this._state === AFTER_SCRIPT_2){ | ||
this._stateAfterScript2(c); | ||
} else if(this._state === AFTER_SCRIPT_3){ | ||
this._stateAfterScript3(c); | ||
} else if(this._state === AFTER_SCRIPT_4){ | ||
this._stateAfterScript4(c); | ||
} else if(this._state === AFTER_SCRIPT_5){ | ||
this._stateAfterScript5(c); | ||
} | ||
/* | ||
/* | ||
* style | ||
*/ | ||
else if(this._state === BEFORE_STYLE_1){ | ||
this._stateBeforeStyle1(c); | ||
} else if(this._state === BEFORE_STYLE_2){ | ||
this._stateBeforeStyle2(c); | ||
} else if(this._state === BEFORE_STYLE_3){ | ||
this._stateBeforeStyle3(c); | ||
} else if(this._state === BEFORE_STYLE_4){ | ||
this._stateBeforeStyle4(c); | ||
} | ||
this._stateBeforeStyle1(c); | ||
} else if (this._state === BEFORE_STYLE_2) { | ||
this._stateBeforeStyle2(c); | ||
} else if (this._state === BEFORE_STYLE_3) { | ||
this._stateBeforeStyle3(c); | ||
} else if (this._state === BEFORE_STYLE_4) { | ||
this._stateBeforeStyle4(c); | ||
} else if (this._state === AFTER_STYLE_1) { | ||
this._stateAfterStyle1(c); | ||
} else if (this._state === AFTER_STYLE_2) { | ||
this._stateAfterStyle2(c); | ||
} else if (this._state === AFTER_STYLE_3) { | ||
this._stateAfterStyle3(c); | ||
} else if (this._state === AFTER_STYLE_4) { | ||
this._stateAfterStyle4(c); | ||
} else if (this._state === BEFORE_ENTITY) { | ||
else if(this._state === AFTER_STYLE_1){ | ||
this._stateAfterStyle1(c); | ||
} else if(this._state === AFTER_STYLE_2){ | ||
this._stateAfterStyle2(c); | ||
} else if(this._state === AFTER_STYLE_3){ | ||
this._stateAfterStyle3(c); | ||
} else if(this._state === AFTER_STYLE_4){ | ||
this._stateAfterStyle4(c); | ||
} | ||
/* | ||
/* | ||
* entities | ||
*/ | ||
else if(this._state === BEFORE_ENTITY){ | ||
this._stateBeforeEntity(c); | ||
} else if(this._state === BEFORE_NUMERIC_ENTITY){ | ||
this._stateBeforeNumericEntity(c); | ||
} else if(this._state === IN_NAMED_ENTITY){ | ||
this._stateInNamedEntity(c); | ||
} else if(this._state === IN_NUMERIC_ENTITY){ | ||
this._stateInNumericEntity(c); | ||
} else if(this._state === IN_HEX_ENTITY){ | ||
this._stateInHexEntity(c); | ||
} | ||
this._stateBeforeEntity(c); | ||
} else if (this._state === BEFORE_NUMERIC_ENTITY) { | ||
this._stateBeforeNumericEntity(c); | ||
} else if (this._state === IN_NAMED_ENTITY) { | ||
this._stateInNamedEntity(c); | ||
} else if (this._state === IN_NUMERIC_ENTITY) { | ||
this._stateInNumericEntity(c); | ||
} else if (this._state === IN_HEX_ENTITY) { | ||
this._stateInHexEntity(c); | ||
} else { | ||
this._cbs.onerror(Error("unknown _state"), this._state); | ||
} | ||
else { | ||
this._cbs.onerror(Error("unknown _state"), this._state); | ||
} | ||
this._index++; | ||
} | ||
this._index++; | ||
} | ||
this._cleanup(); | ||
this._cleanup(); | ||
}; | ||
Tokenizer.prototype.pause = function(){ | ||
this._running = false; | ||
Tokenizer.prototype.pause = function() { | ||
this._running = false; | ||
}; | ||
Tokenizer.prototype.resume = function(){ | ||
this._running = true; | ||
Tokenizer.prototype.resume = function() { | ||
this._running = true; | ||
if(this._index < this._buffer.length){ | ||
this._parse(); | ||
} | ||
if(this._ended){ | ||
this._finish(); | ||
} | ||
if (this._index < this._buffer.length) { | ||
this._parse(); | ||
} | ||
if (this._ended) { | ||
this._finish(); | ||
} | ||
}; | ||
Tokenizer.prototype.end = function(chunk){ | ||
if(this._ended) this._cbs.onerror(Error(".end() after done!")); | ||
if(chunk) this.write(chunk); | ||
Tokenizer.prototype.end = function(chunk) { | ||
if (this._ended) this._cbs.onerror(Error(".end() after done!")); | ||
if (chunk) this.write(chunk); | ||
this._ended = true; | ||
this._ended = true; | ||
if(this._running) this._finish(); | ||
if (this._running) this._finish(); | ||
}; | ||
Tokenizer.prototype._finish = function(){ | ||
//if there is remaining data, emit it in a reasonable way | ||
if(this._sectionStart < this._index){ | ||
this._handleTrailingData(); | ||
} | ||
Tokenizer.prototype._finish = function() { | ||
//if there is remaining data, emit it in a reasonable way | ||
if (this._sectionStart < this._index) { | ||
this._handleTrailingData(); | ||
} | ||
this._cbs.onend(); | ||
this._cbs.onend(); | ||
}; | ||
Tokenizer.prototype._handleTrailingData = function(){ | ||
var data = this._buffer.substr(this._sectionStart); | ||
Tokenizer.prototype._handleTrailingData = function() { | ||
var data = this._buffer.substr(this._sectionStart); | ||
if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){ | ||
this._cbs.oncdata(data); | ||
} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){ | ||
this._cbs.oncomment(data); | ||
} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){ | ||
this._parseLegacyEntity(); | ||
if(this._sectionStart < this._index){ | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){ | ||
this._decodeNumericEntity(2, 10); | ||
if(this._sectionStart < this._index){ | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){ | ||
this._decodeNumericEntity(3, 16); | ||
if(this._sectionStart < this._index){ | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if( | ||
this._state !== IN_TAG_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_VALUE && | ||
this._state !== AFTER_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_VALUE_SQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_DQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_NQ && | ||
this._state !== IN_CLOSING_TAG_NAME | ||
){ | ||
this._cbs.ontext(data); | ||
} | ||
//else, ignore remaining data | ||
//TODO add a way to remove current tag | ||
if ( | ||
this._state === IN_CDATA || | ||
this._state === AFTER_CDATA_1 || | ||
this._state === AFTER_CDATA_2 | ||
) { | ||
this._cbs.oncdata(data); | ||
} else if ( | ||
this._state === IN_COMMENT || | ||
this._state === AFTER_COMMENT_1 || | ||
this._state === AFTER_COMMENT_2 | ||
) { | ||
this._cbs.oncomment(data); | ||
} else if (this._state === IN_NAMED_ENTITY && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if (this._state === IN_NUMERIC_ENTITY && !this._xmlMode) { | ||
this._decodeNumericEntity(2, 10); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if (this._state === IN_HEX_ENTITY && !this._xmlMode) { | ||
this._decodeNumericEntity(3, 16); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if ( | ||
this._state !== IN_TAG_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_VALUE && | ||
this._state !== AFTER_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_VALUE_SQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_DQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_NQ && | ||
this._state !== IN_CLOSING_TAG_NAME | ||
) { | ||
this._cbs.ontext(data); | ||
} | ||
//else, ignore remaining data | ||
//TODO add a way to remove current tag | ||
}; | ||
Tokenizer.prototype.reset = function(){ | ||
Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs); | ||
Tokenizer.prototype.reset = function() { | ||
Tokenizer.call( | ||
this, | ||
{ xmlMode: this._xmlMode, decodeEntities: this._decodeEntities }, | ||
this._cbs | ||
); | ||
}; | ||
Tokenizer.prototype.getAbsoluteIndex = function(){ | ||
return this._bufferOffset + this._index; | ||
Tokenizer.prototype.getAbsoluteIndex = function() { | ||
return this._bufferOffset + this._index; | ||
}; | ||
Tokenizer.prototype._getSection = function(){ | ||
return this._buffer.substring(this._sectionStart, this._index); | ||
Tokenizer.prototype._getSection = function() { | ||
return this._buffer.substring(this._sectionStart, this._index); | ||
}; | ||
Tokenizer.prototype._emitToken = function(name){ | ||
this._cbs[name](this._getSection()); | ||
this._sectionStart = -1; | ||
Tokenizer.prototype._emitToken = function(name) { | ||
this._cbs[name](this._getSection()); | ||
this._sectionStart = -1; | ||
}; | ||
Tokenizer.prototype._emitPartial = function(value){ | ||
if(this._baseState !== TEXT){ | ||
this._cbs.onattribdata(value); //TODO implement the new event | ||
} else { | ||
this._cbs.ontext(value); | ||
} | ||
Tokenizer.prototype._emitPartial = function(value) { | ||
if (this._baseState !== TEXT) { | ||
this._cbs.onattribdata(value); //TODO implement the new event | ||
} else { | ||
this._cbs.ontext(value); | ||
} | ||
}; |
@@ -8,11 +8,11 @@ module.exports = Stream; | ||
function Stream(cbs, options){ | ||
var parser = this._parser = new Parser(cbs, options); | ||
var decoder = this._decoder = new StringDecoder(); | ||
function Stream(cbs, options) { | ||
var parser = (this._parser = new Parser(cbs, options)); | ||
var decoder = (this._decoder = new StringDecoder()); | ||
WritableStream.call(this, {decodeStrings: false}); | ||
WritableStream.call(this, { decodeStrings: false }); | ||
this.once("finish", function(){ | ||
parser.end(decoder.end()); | ||
}); | ||
this.once("finish", function() { | ||
parser.end(decoder.end()); | ||
}); | ||
} | ||
@@ -22,6 +22,6 @@ | ||
WritableStream.prototype._write = function(chunk, encoding, cb){ | ||
if(chunk instanceof Buffer) chunk = this._decoder.write(chunk); | ||
this._parser.write(chunk); | ||
cb(); | ||
Stream.prototype._write = function(chunk, encoding, cb) { | ||
if (chunk instanceof Buffer) chunk = this._decoder.write(chunk); | ||
this._parser.write(chunk); | ||
cb(); | ||
}; |
111
package.json
{ | ||
"name": "htmlparser2", | ||
"description": "Fast & forgiving HTML/XML/RSS parser", | ||
"version": "3.10.0", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
"keywords": [ | ||
"html", | ||
"parser", | ||
"streams", | ||
"xml", | ||
"dom", | ||
"rss", | ||
"feed", | ||
"atom" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/fb55/htmlparser2.git" | ||
}, | ||
"bugs": { | ||
"mail": "me@feedic.com", | ||
"url": "http://github.com/fb55/htmlparser2/issues" | ||
}, | ||
"directories": { | ||
"lib": "lib/" | ||
}, | ||
"main": "lib/index.js", | ||
"files": [ | ||
"lib" | ||
], | ||
"scripts": { | ||
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec", | ||
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)", | ||
"test": "mocha && npm run lint", | ||
"lint": "eslint lib test" | ||
}, | ||
"dependencies": { | ||
"domelementtype": "^1.3.0", | ||
"domhandler": "^2.3.0", | ||
"domutils": "^1.5.1", | ||
"entities": "^1.1.1", | ||
"inherits": "^2.0.1", | ||
"readable-stream": "^3.0.6" | ||
}, | ||
"devDependencies": { | ||
"coveralls": "^3.0.1", | ||
"eslint": "^4.19.1", | ||
"istanbul": "^0.4.3", | ||
"mocha": "^5.2.0", | ||
"mocha-lcov-reporter": "^1.2.0" | ||
}, | ||
"browser": { | ||
"readable-stream": false | ||
}, | ||
"license": "MIT" | ||
"name": "htmlparser2", | ||
"description": "Fast & forgiving HTML/XML/RSS parser", | ||
"version": "3.10.1", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
"keywords": [ | ||
"html", | ||
"parser", | ||
"streams", | ||
"xml", | ||
"dom", | ||
"rss", | ||
"feed", | ||
"atom" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/fb55/htmlparser2.git" | ||
}, | ||
"bugs": { | ||
"mail": "me@feedic.com", | ||
"url": "http://github.com/fb55/htmlparser2/issues" | ||
}, | ||
"directories": { | ||
"lib": "lib/" | ||
}, | ||
"main": "lib/index.js", | ||
"files": [ | ||
"lib" | ||
], | ||
"scripts": { | ||
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec", | ||
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)", | ||
"test": "mocha && npm run lint", | ||
"lint": "eslint lib test" | ||
}, | ||
"dependencies": { | ||
"domelementtype": "^1.3.1", | ||
"domhandler": "^2.3.0", | ||
"domutils": "^1.5.1", | ||
"entities": "^1.1.1", | ||
"inherits": "^2.0.1", | ||
"readable-stream": "^3.1.1" | ||
}, | ||
"devDependencies": { | ||
"coveralls": "^3.0.1", | ||
"eslint": "^5.13.0", | ||
"istanbul": "^0.4.3", | ||
"mocha": "^5.2.0", | ||
"mocha-lcov-reporter": "^1.2.0" | ||
}, | ||
"browser": { | ||
"readable-stream": false | ||
}, | ||
"license": "MIT", | ||
"prettier": { | ||
"tabWidth": 4 | ||
} | ||
} |
55027
1480
Updateddomelementtype@^1.3.1
Updatedreadable-stream@^3.1.1