htmlparser2
Advanced tools
Comparing version 3.10.1 to 4.0.0
@@ -1,57 +0,59 @@ | ||
module.exports = CollectingHandler; | ||
function CollectingHandler(cbs) { | ||
this._cbs = cbs || {}; | ||
this.events = []; | ||
} | ||
var EVENTS = require("./").EVENTS; | ||
Object.keys(EVENTS).forEach(function(name) { | ||
if (EVENTS[name] === 0) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function() { | ||
this.events.push([name]); | ||
if (this._cbs[name]) this._cbs[name](); | ||
}; | ||
} else if (EVENTS[name] === 1) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a) { | ||
this.events.push([name, a]); | ||
if (this._cbs[name]) this._cbs[name](a); | ||
}; | ||
} else if (EVENTS[name] === 2) { | ||
name = "on" + name; | ||
CollectingHandler.prototype[name] = function(a, b) { | ||
this.events.push([name, a, b]); | ||
if (this._cbs[name]) this._cbs[name](a, b); | ||
}; | ||
} else { | ||
throw Error("wrong number of arguments"); | ||
} | ||
}); | ||
CollectingHandler.prototype.onreset = function() { | ||
this.events = []; | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
"use strict"; | ||
var __extends = (this && this.__extends) || (function () { | ||
var extendStatics = function (d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
return function (d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
})(); | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
CollectingHandler.prototype.restart = function() { | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
for (var i = 0, len = this.events.length; i < len; i++) { | ||
if (this._cbs[this.events[i][0]]) { | ||
var num = this.events[i].length; | ||
if (num === 1) { | ||
this._cbs[this.events[i][0]](); | ||
} else if (num === 2) { | ||
this._cbs[this.events[i][0]](this.events[i][1]); | ||
} else { | ||
this._cbs[this.events[i][0]]( | ||
this.events[i][1], | ||
this.events[i][2] | ||
); | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var MultiplexHandler_1 = __importDefault(require("./MultiplexHandler")); | ||
var CollectingHandler = /** @class */ (function (_super) { | ||
__extends(CollectingHandler, _super); | ||
function CollectingHandler(cbs) { | ||
if (cbs === void 0) { cbs = {}; } | ||
var _this = _super.call(this, function (name) { | ||
var _a; | ||
var args = []; | ||
for (var _i = 1; _i < arguments.length; _i++) { | ||
args[_i - 1] = arguments[_i]; | ||
} | ||
_this.events.push([name].concat(args)); | ||
// @ts-ignore | ||
if (_this._cbs[name]) | ||
(_a = _this._cbs)[name].apply(_a, args); | ||
}) || this; | ||
_this._cbs = cbs; | ||
_this.events = []; | ||
return _this; | ||
} | ||
CollectingHandler.prototype.onreset = function () { | ||
this.events = []; | ||
if (this._cbs.onreset) | ||
this._cbs.onreset(); | ||
}; | ||
CollectingHandler.prototype.restart = function () { | ||
var _a; | ||
if (this._cbs.onreset) | ||
this._cbs.onreset(); | ||
for (var i = 0; i < this.events.length; i++) { | ||
var _b = this.events[i], name_1 = _b[0], args = _b.slice(1); | ||
if (!this._cbs[name_1]) { | ||
continue; | ||
} | ||
// @ts-ignore | ||
(_a = this._cbs)[name_1].apply(_a, args); | ||
} | ||
} | ||
}; | ||
}; | ||
return CollectingHandler; | ||
}(MultiplexHandler_1.default)); | ||
exports.CollectingHandler = CollectingHandler; |
@@ -1,13 +0,119 @@ | ||
var DomHandler = require("domhandler"); | ||
var DomUtils = require("domutils"); | ||
//TODO: make this a streamable handler | ||
function FeedHandler(callback, options) { | ||
this.init(callback, options); | ||
} | ||
require("inherits")(FeedHandler, DomHandler); | ||
FeedHandler.prototype.init = DomHandler; | ||
"use strict"; | ||
var __extends = (this && this.__extends) || (function () { | ||
var extendStatics = function (d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
return function (d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
})(); | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; | ||
result["default"] = mod; | ||
return result; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var domhandler_1 = __importDefault(require("domhandler")); | ||
var DomUtils = __importStar(require("domutils")); | ||
var Parser_1 = require("./Parser"); | ||
//TODO: Consume data as it is coming in | ||
var FeedHandler = /** @class */ (function (_super) { | ||
__extends(FeedHandler, _super); | ||
/** | ||
* | ||
* @param callback | ||
* @param options | ||
*/ | ||
function FeedHandler(callback, options) { | ||
var _this = this; | ||
if (typeof callback === "object" && callback !== null) { | ||
callback = undefined; | ||
options = callback; | ||
} | ||
_this = _super.call(this, callback, options) || this; | ||
return _this; | ||
} | ||
FeedHandler.prototype.onend = function () { | ||
var feed = {}; | ||
var feedRoot = getOneElement(isValidFeed, this.dom); | ||
if (feedRoot) { | ||
if (feedRoot.name === "feed") { | ||
var childs = feedRoot.children; | ||
feed.type = "atom"; | ||
addConditionally(feed, "id", "id", childs); | ||
addConditionally(feed, "title", "title", childs); | ||
var href = getAttribute("href", getOneElement("link", childs)); | ||
if (href) { | ||
feed.link = href; | ||
} | ||
addConditionally(feed, "description", "subtitle", childs); | ||
var updated = fetch("updated", childs); | ||
if (updated) { | ||
feed.updated = new Date(updated); | ||
} | ||
addConditionally(feed, "author", "email", childs, true); | ||
feed.items = getElements("entry", childs).map(function (item) { | ||
var entry = {}; | ||
var children = item.children; | ||
addConditionally(entry, "id", "id", children); | ||
addConditionally(entry, "title", "title", children); | ||
var href = getAttribute("href", getOneElement("link", children)); | ||
if (href) { | ||
entry.link = href; | ||
} | ||
var description = fetch("summary", children) || | ||
fetch("content", children); | ||
if (description) { | ||
entry.description = description; | ||
} | ||
var pubDate = fetch("updated", children); | ||
if (pubDate) { | ||
entry.pubDate = new Date(pubDate); | ||
} | ||
return entry; | ||
}); | ||
} | ||
else { | ||
var childs = getOneElement("channel", feedRoot.children) | ||
.children; | ||
feed.type = feedRoot.name.substr(0, 3); | ||
feed.id = ""; | ||
addConditionally(feed, "title", "title", childs); | ||
addConditionally(feed, "link", "link", childs); | ||
addConditionally(feed, "description", "description", childs); | ||
var updated = fetch("lastBuildDate", childs); | ||
if (updated) { | ||
feed.updated = new Date(updated); | ||
} | ||
addConditionally(feed, "author", "managingEditor", childs, true); | ||
feed.items = getElements("item", feedRoot.children).map(function (item) { | ||
var entry = {}; | ||
var children = item.children; | ||
addConditionally(entry, "id", "guid", children); | ||
addConditionally(entry, "title", "title", children); | ||
addConditionally(entry, "link", "link", children); | ||
addConditionally(entry, "description", "description", children); | ||
var pubDate = fetch("pubDate", children); | ||
if (pubDate) | ||
entry.pubDate = new Date(pubDate); | ||
return entry; | ||
}); | ||
} | ||
} | ||
this.feed = feed; | ||
this.handleCallback(feedRoot ? null : Error("couldn't find root of feed")); | ||
}; | ||
return FeedHandler; | ||
}(domhandler_1.default)); | ||
exports.FeedHandler = FeedHandler; | ||
function getElements(what, where) { | ||
@@ -20,96 +126,35 @@ return DomUtils.getElementsByTagName(what, where, true); | ||
function fetch(what, where, recurse) { | ||
return DomUtils.getText( | ||
DomUtils.getElementsByTagName(what, where, recurse, 1) | ||
).trim(); | ||
if (recurse === void 0) { recurse = false; } | ||
return DomUtils.getText(DomUtils.getElementsByTagName(what, where, recurse, 1)).trim(); | ||
} | ||
function getAttribute(name, elem) { | ||
if (!elem) { | ||
return null; | ||
} | ||
var attribs = elem.attribs; | ||
return attribs[name]; | ||
} | ||
function addConditionally(obj, prop, what, where, recurse) { | ||
if (recurse === void 0) { recurse = false; } | ||
var tmp = fetch(what, where, recurse); | ||
if (tmp) obj[prop] = tmp; | ||
// @ts-ignore | ||
if (tmp) | ||
obj[prop] = tmp; | ||
} | ||
var isValidFeed = function(value) { | ||
function isValidFeed(value) { | ||
return value === "rss" || value === "feed" || value === "rdf:RDF"; | ||
}; | ||
FeedHandler.prototype.onend = function() { | ||
var feed = {}, | ||
feedRoot = getOneElement(isValidFeed, this.dom), | ||
tmp, | ||
childs; | ||
if (feedRoot) { | ||
if (feedRoot.name === "feed") { | ||
childs = feedRoot.children; | ||
feed.type = "atom"; | ||
addConditionally(feed, "id", "id", childs); | ||
addConditionally(feed, "title", "title", childs); | ||
if ( | ||
(tmp = getOneElement("link", childs)) && | ||
(tmp = tmp.attribs) && | ||
(tmp = tmp.href) | ||
) | ||
feed.link = tmp; | ||
addConditionally(feed, "description", "subtitle", childs); | ||
if ((tmp = fetch("updated", childs))) feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "email", childs, true); | ||
feed.items = getElements("entry", childs).map(function(item) { | ||
var entry = {}, | ||
tmp; | ||
item = item.children; | ||
addConditionally(entry, "id", "id", item); | ||
addConditionally(entry, "title", "title", item); | ||
if ( | ||
(tmp = getOneElement("link", item)) && | ||
(tmp = tmp.attribs) && | ||
(tmp = tmp.href) | ||
) | ||
entry.link = tmp; | ||
if ((tmp = fetch("summary", item) || fetch("content", item))) | ||
entry.description = tmp; | ||
if ((tmp = fetch("updated", item))) | ||
entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} else { | ||
childs = getOneElement("channel", feedRoot.children).children; | ||
feed.type = feedRoot.name.substr(0, 3); | ||
feed.id = ""; | ||
addConditionally(feed, "title", "title", childs); | ||
addConditionally(feed, "link", "link", childs); | ||
addConditionally(feed, "description", "description", childs); | ||
if ((tmp = fetch("lastBuildDate", childs))) | ||
feed.updated = new Date(tmp); | ||
addConditionally(feed, "author", "managingEditor", childs, true); | ||
feed.items = getElements("item", feedRoot.children).map(function( | ||
item | ||
) { | ||
var entry = {}, | ||
tmp; | ||
item = item.children; | ||
addConditionally(entry, "id", "guid", item); | ||
addConditionally(entry, "title", "title", item); | ||
addConditionally(entry, "link", "link", item); | ||
addConditionally(entry, "description", "description", item); | ||
if ((tmp = fetch("pubDate", item))) | ||
entry.pubDate = new Date(tmp); | ||
return entry; | ||
}); | ||
} | ||
} | ||
this.dom = feed; | ||
DomHandler.prototype._handleCallback.call( | ||
this, | ||
feedRoot ? null : Error("couldn't find root of feed") | ||
); | ||
}; | ||
module.exports = FeedHandler; | ||
} | ||
var defaultOptions = { xmlMode: true }; | ||
/** | ||
* Parse a feed. | ||
* | ||
* @param feed The feed that should be parsed, as a string. | ||
* @param options Optionally, options for parsing. When using this option, you probably want to set `xmlMode` to `true`. | ||
*/ | ||
function parseFeed(feed, options) { | ||
if (options === void 0) { options = defaultOptions; } | ||
var handler = new FeedHandler(options); | ||
new Parser_1.Parser(handler, options).end(feed); | ||
return handler.feed; | ||
} | ||
exports.parseFeed = parseFeed; |
144
lib/index.js
@@ -1,72 +0,76 @@ | ||
var Parser = require("./Parser.js"); | ||
var DomHandler = require("domhandler"); | ||
function defineProp(name, value) { | ||
delete module.exports[name]; | ||
module.exports[name] = value; | ||
return value; | ||
"use strict"; | ||
function __export(m) { | ||
for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p]; | ||
} | ||
module.exports = { | ||
Parser: Parser, | ||
Tokenizer: require("./Tokenizer.js"), | ||
ElementType: require("domelementtype"), | ||
DomHandler: DomHandler, | ||
get FeedHandler() { | ||
return defineProp("FeedHandler", require("./FeedHandler.js")); | ||
}, | ||
get Stream() { | ||
return defineProp("Stream", require("./Stream.js")); | ||
}, | ||
get WritableStream() { | ||
return defineProp("WritableStream", require("./WritableStream.js")); | ||
}, | ||
get ProxyHandler() { | ||
return defineProp("ProxyHandler", require("./ProxyHandler.js")); | ||
}, | ||
get DomUtils() { | ||
return defineProp("DomUtils", require("domutils")); | ||
}, | ||
get CollectingHandler() { | ||
return defineProp( | ||
"CollectingHandler", | ||
require("./CollectingHandler.js") | ||
); | ||
}, | ||
// For legacy support | ||
DefaultHandler: DomHandler, | ||
get RssHandler() { | ||
return defineProp("RssHandler", this.FeedHandler); | ||
}, | ||
//helper methods | ||
parseDOM: function(data, options) { | ||
var handler = new DomHandler(options); | ||
new Parser(handler, options).end(data); | ||
return handler.dom; | ||
}, | ||
parseFeed: function(feed, options) { | ||
var handler = new module.exports.FeedHandler(options); | ||
new Parser(handler, options).end(feed); | ||
return handler.dom; | ||
}, | ||
createDomStream: function(cb, options, elementCb) { | ||
var handler = new DomHandler(cb, options, elementCb); | ||
return new Parser(handler, options); | ||
}, | ||
// List of all events that the parser emits | ||
EVENTS: { | ||
/* Format: eventname: number of arguments */ | ||
attribute: 2, | ||
cdatastart: 0, | ||
cdataend: 0, | ||
text: 1, | ||
processinginstruction: 2, | ||
comment: 1, | ||
commentend: 0, | ||
closetag: 1, | ||
opentag: 2, | ||
opentagname: 1, | ||
error: 1, | ||
end: 0 | ||
} | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; | ||
result["default"] = mod; | ||
return result; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var Parser_1 = require("./Parser"); | ||
exports.Parser = Parser_1.Parser; | ||
var domhandler_1 = require("domhandler"); | ||
exports.DomHandler = domhandler_1.DomHandler; | ||
exports.DefaultHandler = domhandler_1.DomHandler; | ||
// Helper methods | ||
/** | ||
* Parses data, returns the resulting DOM. | ||
* | ||
* @param data The data that should be parsed. | ||
* @param options Optional options for the parser and DOM builder. | ||
*/ | ||
function parseDOM(data, options) { | ||
var handler = new domhandler_1.DomHandler(void 0, options); | ||
new Parser_1.Parser(handler, options).end(data); | ||
return handler.dom; | ||
} | ||
exports.parseDOM = parseDOM; | ||
/** | ||
* Creates a parser instance, with an attached DOM handler. | ||
* | ||
* @param cb A callback that will be called once parsing has been completed. | ||
* @param options Optional options for the parser and DOM builder. | ||
* @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM. | ||
*/ | ||
function createDomStream(cb, options, elementCb) { | ||
var handler = new domhandler_1.DomHandler(cb, options, elementCb); | ||
return new Parser_1.Parser(handler, options); | ||
} | ||
exports.createDomStream = createDomStream; | ||
var Tokenizer_1 = require("./Tokenizer"); | ||
exports.Tokenizer = Tokenizer_1.default; | ||
var ElementType = __importStar(require("domelementtype")); | ||
exports.ElementType = ElementType; | ||
/** | ||
* List of all events that the parser emits. | ||
* | ||
* Format: eventname: number of arguments. | ||
*/ | ||
exports.EVENTS = { | ||
attribute: 2, | ||
cdatastart: 0, | ||
cdataend: 0, | ||
text: 1, | ||
processinginstruction: 2, | ||
comment: 1, | ||
commentend: 0, | ||
closetag: 1, | ||
opentag: 2, | ||
opentagname: 1, | ||
error: 1, | ||
end: 0 | ||
}; | ||
/* | ||
All of the following exports exist for backwards-compatibility. | ||
They should probably be removed eventually. | ||
*/ | ||
__export(require("./FeedHandler")); | ||
__export(require("./WritableStream")); | ||
__export(require("./CollectingHandler")); | ||
var DomUtils = __importStar(require("domutils")); | ||
exports.DomUtils = DomUtils; | ||
var FeedHandler_1 = require("./FeedHandler"); | ||
exports.RssHandler = FeedHandler_1.FeedHandler; |
@@ -1,49 +0,44 @@ | ||
var Tokenizer = require("./Tokenizer.js"); | ||
/* | ||
Options: | ||
xmlMode: Disables the special behavior for script/style tags (false by default) | ||
lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`) | ||
lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`) | ||
*/ | ||
/* | ||
Callbacks: | ||
oncdataend, | ||
oncdatastart, | ||
onclosetag, | ||
oncomment, | ||
oncommentend, | ||
onerror, | ||
onopentag, | ||
onprocessinginstruction, | ||
onreset, | ||
ontext | ||
*/ | ||
var formTags = { | ||
input: true, | ||
option: true, | ||
optgroup: true, | ||
select: true, | ||
button: true, | ||
datalist: true, | ||
textarea: true | ||
"use strict"; | ||
var __extends = (this && this.__extends) || (function () { | ||
var extendStatics = function (d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
return function (d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
})(); | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var Tokenizer_1 = __importDefault(require("./Tokenizer")); | ||
var events_1 = require("events"); | ||
var formTags = new Set([ | ||
"input", | ||
"option", | ||
"optgroup", | ||
"select", | ||
"button", | ||
"datalist", | ||
"textarea" | ||
]); | ||
var pTag = new Set(["p"]); | ||
var openImpliesClose = { | ||
tr: { tr: true, th: true, td: true }, | ||
th: { th: true }, | ||
td: { thead: true, th: true, td: true }, | ||
body: { head: true, link: true, script: true }, | ||
li: { li: true }, | ||
p: { p: true }, | ||
h1: { p: true }, | ||
h2: { p: true }, | ||
h3: { p: true }, | ||
h4: { p: true }, | ||
h5: { p: true }, | ||
h6: { p: true }, | ||
tr: new Set(["tr", "th", "td"]), | ||
th: new Set(["th"]), | ||
td: new Set(["thead", "th", "td"]), | ||
body: new Set(["head", "link", "script"]), | ||
li: new Set(["li"]), | ||
p: pTag, | ||
h1: pTag, | ||
h2: pTag, | ||
h3: pTag, | ||
h4: pTag, | ||
h5: pTag, | ||
h6: pTag, | ||
select: formTags, | ||
@@ -55,329 +50,322 @@ input: formTags, | ||
textarea: formTags, | ||
option: { option: true }, | ||
optgroup: { optgroup: true } | ||
option: new Set(["option"]), | ||
optgroup: new Set(["optgroup", "option"]), | ||
dd: new Set(["dt", "dd"]), | ||
dt: new Set(["dt", "dd"]), | ||
address: pTag, | ||
article: pTag, | ||
aside: pTag, | ||
blockquote: pTag, | ||
details: pTag, | ||
div: pTag, | ||
dl: pTag, | ||
fieldset: pTag, | ||
figcaption: pTag, | ||
figure: pTag, | ||
footer: pTag, | ||
form: pTag, | ||
header: pTag, | ||
hr: pTag, | ||
main: pTag, | ||
nav: pTag, | ||
ol: pTag, | ||
pre: pTag, | ||
section: pTag, | ||
table: pTag, | ||
ul: pTag, | ||
rt: new Set(["rt", "rp"]), | ||
rp: new Set(["rt", "rp"]), | ||
tbody: new Set(["thead", "tbody"]), | ||
tfoot: new Set(["thead", "tbody"]) | ||
}; | ||
var voidElements = { | ||
__proto__: null, | ||
area: true, | ||
base: true, | ||
basefont: true, | ||
br: true, | ||
col: true, | ||
command: true, | ||
embed: true, | ||
frame: true, | ||
hr: true, | ||
img: true, | ||
input: true, | ||
isindex: true, | ||
keygen: true, | ||
link: true, | ||
meta: true, | ||
param: true, | ||
source: true, | ||
track: true, | ||
wbr: true | ||
}; | ||
var foreignContextElements = { | ||
__proto__: null, | ||
math: true, | ||
svg: true | ||
}; | ||
var htmlIntegrationElements = { | ||
__proto__: null, | ||
mi: true, | ||
mo: true, | ||
mn: true, | ||
ms: true, | ||
mtext: true, | ||
"annotation-xml": true, | ||
foreignObject: true, | ||
desc: true, | ||
title: true | ||
}; | ||
var re_nameEnd = /\s|\//; | ||
function Parser(cbs, options) { | ||
this._options = options || {}; | ||
this._cbs = cbs || {}; | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
this._foreignContext = []; | ||
this.startIndex = 0; | ||
this.endIndex = null; | ||
this._lowerCaseTagNames = | ||
"lowerCaseTags" in this._options | ||
? !!this._options.lowerCaseTags | ||
: !this._options.xmlMode; | ||
this._lowerCaseAttributeNames = | ||
"lowerCaseAttributeNames" in this._options | ||
? !!this._options.lowerCaseAttributeNames | ||
: !this._options.xmlMode; | ||
if (this._options.Tokenizer) { | ||
Tokenizer = this._options.Tokenizer; | ||
var voidElements = new Set([ | ||
"area", | ||
"base", | ||
"basefont", | ||
"br", | ||
"col", | ||
"command", | ||
"embed", | ||
"frame", | ||
"hr", | ||
"img", | ||
"input", | ||
"isindex", | ||
"keygen", | ||
"link", | ||
"meta", | ||
"param", | ||
"source", | ||
"track", | ||
"wbr" | ||
]); | ||
var foreignContextElements = new Set(["math", "svg"]); | ||
var htmlIntegrationElements = new Set([ | ||
"mi", | ||
"mo", | ||
"mn", | ||
"ms", | ||
"mtext", | ||
"annotation-xml", | ||
"foreignObject", | ||
"desc", | ||
"title" | ||
]); | ||
var reNameEnd = /\s|\//; | ||
var Parser = /** @class */ (function (_super) { | ||
__extends(Parser, _super); | ||
function Parser(cbs, options) { | ||
var _this = _super.call(this) || this; | ||
_this._tagname = ""; | ||
_this._attribname = ""; | ||
_this._attribvalue = ""; | ||
_this._attribs = null; | ||
_this._stack = []; | ||
_this._foreignContext = []; | ||
_this.startIndex = 0; | ||
_this.endIndex = null; | ||
// Aliases for backwards compatibility | ||
_this.parseChunk = Parser.prototype.write; | ||
_this.done = Parser.prototype.end; | ||
_this._options = options || {}; | ||
_this._cbs = cbs || {}; | ||
_this._tagname = ""; | ||
_this._attribname = ""; | ||
_this._attribvalue = ""; | ||
_this._attribs = null; | ||
_this._stack = []; | ||
_this._foreignContext = []; | ||
_this.startIndex = 0; | ||
_this.endIndex = null; | ||
_this._lowerCaseTagNames = | ||
"lowerCaseTags" in _this._options | ||
? !!_this._options.lowerCaseTags | ||
: !_this._options.xmlMode; | ||
_this._lowerCaseAttributeNames = | ||
"lowerCaseAttributeNames" in _this._options | ||
? !!_this._options.lowerCaseAttributeNames | ||
: !_this._options.xmlMode; | ||
_this._tokenizer = new (_this._options.Tokenizer || Tokenizer_1.default)(_this._options, _this); | ||
if (_this._cbs.onparserinit) | ||
_this._cbs.onparserinit(_this); | ||
return _this; | ||
} | ||
this._tokenizer = new Tokenizer(this._options, this); | ||
if (this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
} | ||
require("inherits")(Parser, require("events").EventEmitter); | ||
Parser.prototype._updatePosition = function(initialOffset) { | ||
if (this.endIndex === null) { | ||
if (this._tokenizer._sectionStart <= initialOffset) { | ||
this.startIndex = 0; | ||
} else { | ||
this.startIndex = this._tokenizer._sectionStart - initialOffset; | ||
Parser.prototype._updatePosition = function (initialOffset) { | ||
if (this.endIndex === null) { | ||
if (this._tokenizer._sectionStart <= initialOffset) { | ||
this.startIndex = 0; | ||
} | ||
else { | ||
this.startIndex = this._tokenizer._sectionStart - initialOffset; | ||
} | ||
} | ||
} else this.startIndex = this.endIndex + 1; | ||
this.endIndex = this._tokenizer.getAbsoluteIndex(); | ||
}; | ||
//Tokenizer event handlers | ||
Parser.prototype.ontext = function(data) { | ||
this._updatePosition(1); | ||
this.endIndex--; | ||
if (this._cbs.ontext) this._cbs.ontext(data); | ||
}; | ||
Parser.prototype.onopentagname = function(name) { | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._tagname = name; | ||
if (!this._options.xmlMode && name in openImpliesClose) { | ||
for ( | ||
var el; | ||
(el = this._stack[this._stack.length - 1]) in | ||
openImpliesClose[name]; | ||
this.onclosetag(el) | ||
); | ||
} | ||
if (this._options.xmlMode || !(name in voidElements)) { | ||
this._stack.push(name); | ||
if (name in foreignContextElements) this._foreignContext.push(true); | ||
else if (name in htmlIntegrationElements) | ||
this._foreignContext.push(false); | ||
} | ||
if (this._cbs.onopentagname) this._cbs.onopentagname(name); | ||
if (this._cbs.onopentag) this._attribs = {}; | ||
}; | ||
Parser.prototype.onopentagend = function() { | ||
this._updatePosition(1); | ||
if (this._attribs) { | ||
else | ||
this.startIndex = this.endIndex + 1; | ||
this.endIndex = this._tokenizer.getAbsoluteIndex(); | ||
}; | ||
//Tokenizer event handlers | ||
Parser.prototype.ontext = function (data) { | ||
this._updatePosition(1); | ||
// @ts-ignore | ||
this.endIndex--; | ||
if (this._cbs.ontext) | ||
this._cbs.ontext(data); | ||
}; | ||
Parser.prototype.onopentagname = function (name) { | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._tagname = name; | ||
if (!this._options.xmlMode && name in openImpliesClose) { | ||
for (var el = void 0; | ||
// @ts-ignore | ||
openImpliesClose[name].has((el = this._stack[this._stack.length - 1])); this.onclosetag(el)) | ||
; | ||
} | ||
if (this._options.xmlMode || !voidElements.has(name)) { | ||
this._stack.push(name); | ||
if (foreignContextElements.has(name)) { | ||
this._foreignContext.push(true); | ||
} | ||
else if (htmlIntegrationElements.has(name)) { | ||
this._foreignContext.push(false); | ||
} | ||
} | ||
if (this._cbs.onopentagname) | ||
this._cbs.onopentagname(name); | ||
if (this._cbs.onopentag) | ||
this._cbs.onopentag(this._tagname, this._attribs); | ||
this._attribs = null; | ||
} | ||
if ( | ||
!this._options.xmlMode && | ||
this._cbs.onclosetag && | ||
this._tagname in voidElements | ||
) { | ||
this._cbs.onclosetag(this._tagname); | ||
} | ||
this._tagname = ""; | ||
}; | ||
Parser.prototype.onclosetag = function(name) { | ||
this._updatePosition(1); | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
if (name in foreignContextElements || name in htmlIntegrationElements) { | ||
this._foreignContext.pop(); | ||
} | ||
if ( | ||
this._stack.length && | ||
(!(name in voidElements) || this._options.xmlMode) | ||
) { | ||
var pos = this._stack.lastIndexOf(name); | ||
if (pos !== -1) { | ||
if (this._cbs.onclosetag) { | ||
pos = this._stack.length - pos; | ||
while (pos--) this._cbs.onclosetag(this._stack.pop()); | ||
} else this._stack.length = pos; | ||
} else if (name === "p" && !this._options.xmlMode) { | ||
this._attribs = {}; | ||
}; | ||
Parser.prototype.onopentagend = function () { | ||
this._updatePosition(1); | ||
if (this._attribs) { | ||
if (this._cbs.onopentag) { | ||
this._cbs.onopentag(this._tagname, this._attribs); | ||
} | ||
this._attribs = null; | ||
} | ||
if (!this._options.xmlMode && | ||
this._cbs.onclosetag && | ||
voidElements.has(this._tagname)) { | ||
this._cbs.onclosetag(this._tagname); | ||
} | ||
this._tagname = ""; | ||
}; | ||
Parser.prototype.onclosetag = function (name) { | ||
this._updatePosition(1); | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
if (foreignContextElements.has(name) || | ||
htmlIntegrationElements.has(name)) { | ||
this._foreignContext.pop(); | ||
} | ||
if (this._stack.length && | ||
(this._options.xmlMode || !voidElements.has(name))) { | ||
var pos = this._stack.lastIndexOf(name); | ||
if (pos !== -1) { | ||
if (this._cbs.onclosetag) { | ||
pos = this._stack.length - pos; | ||
// @ts-ignore | ||
while (pos--) | ||
this._cbs.onclosetag(this._stack.pop()); | ||
} | ||
else | ||
this._stack.length = pos; | ||
} | ||
else if (name === "p" && !this._options.xmlMode) { | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
} | ||
else if (!this._options.xmlMode && (name === "br" || name === "p")) { | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
} else if (!this._options.xmlMode && (name === "br" || name === "p")) { | ||
this.onopentagname(name); | ||
this._closeCurrentTag(); | ||
} | ||
}; | ||
Parser.prototype.onselfclosingtag = function() { | ||
if ( | ||
this._options.xmlMode || | ||
this._options.recognizeSelfClosing || | ||
this._foreignContext[this._foreignContext.length - 1] | ||
) { | ||
this._closeCurrentTag(); | ||
} else { | ||
}; | ||
Parser.prototype.onselfclosingtag = function () { | ||
if (this._options.xmlMode || | ||
this._options.recognizeSelfClosing || | ||
this._foreignContext[this._foreignContext.length - 1]) { | ||
this._closeCurrentTag(); | ||
} | ||
else { | ||
this.onopentagend(); | ||
} | ||
}; | ||
Parser.prototype._closeCurrentTag = function () { | ||
var name = this._tagname; | ||
this.onopentagend(); | ||
} | ||
}; | ||
Parser.prototype._closeCurrentTag = function() { | ||
var name = this._tagname; | ||
this.onopentagend(); | ||
//self-closing tags will be on the top of the stack | ||
//(cheaper check than in onclosetag) | ||
if (this._stack[this._stack.length - 1] === name) { | ||
//self-closing tags will be on the top of the stack | ||
//(cheaper check than in onclosetag) | ||
if (this._stack[this._stack.length - 1] === name) { | ||
if (this._cbs.onclosetag) { | ||
this._cbs.onclosetag(name); | ||
} | ||
this._stack.pop(); | ||
} | ||
}; | ||
Parser.prototype.onattribname = function (name) { | ||
if (this._lowerCaseAttributeNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._attribname = name; | ||
}; | ||
Parser.prototype.onattribdata = function (value) { | ||
this._attribvalue += value; | ||
}; | ||
Parser.prototype.onattribend = function () { | ||
if (this._cbs.onattribute) | ||
this._cbs.onattribute(this._attribname, this._attribvalue); | ||
if (this._attribs && | ||
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)) { | ||
this._attribs[this._attribname] = this._attribvalue; | ||
} | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
}; | ||
Parser.prototype._getInstructionName = function (value) { | ||
var idx = value.search(reNameEnd); | ||
var name = idx < 0 ? value : value.substr(0, idx); | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
return name; | ||
}; | ||
Parser.prototype.ondeclaration = function (value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name_1 = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("!" + name_1, "!" + value); | ||
} | ||
}; | ||
Parser.prototype.onprocessinginstruction = function (value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name_2 = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("?" + name_2, "?" + value); | ||
} | ||
}; | ||
Parser.prototype.oncomment = function (value) { | ||
this._updatePosition(4); | ||
if (this._cbs.oncomment) | ||
this._cbs.oncomment(value); | ||
if (this._cbs.oncommentend) | ||
this._cbs.oncommentend(); | ||
}; | ||
Parser.prototype.oncdata = function (value) { | ||
this._updatePosition(1); | ||
if (this._options.xmlMode || this._options.recognizeCDATA) { | ||
if (this._cbs.oncdatastart) | ||
this._cbs.oncdatastart(); | ||
if (this._cbs.ontext) | ||
this._cbs.ontext(value); | ||
if (this._cbs.oncdataend) | ||
this._cbs.oncdataend(); | ||
} | ||
else { | ||
this.oncomment("[CDATA[" + value + "]]"); | ||
} | ||
}; | ||
Parser.prototype.onerror = function (err) { | ||
if (this._cbs.onerror) | ||
this._cbs.onerror(err); | ||
}; | ||
Parser.prototype.onend = function () { | ||
if (this._cbs.onclosetag) { | ||
this._cbs.onclosetag(name); | ||
for (var i = this._stack.length; i > 0; this._cbs.onclosetag(this._stack[--i])) | ||
; | ||
} | ||
this._stack.pop(); | ||
} | ||
}; | ||
Parser.prototype.onattribname = function(name) { | ||
if (this._lowerCaseAttributeNames) { | ||
name = name.toLowerCase(); | ||
} | ||
this._attribname = name; | ||
}; | ||
Parser.prototype.onattribdata = function(value) { | ||
this._attribvalue += value; | ||
}; | ||
Parser.prototype.onattribend = function() { | ||
if (this._cbs.onattribute) | ||
this._cbs.onattribute(this._attribname, this._attribvalue); | ||
if ( | ||
this._attribs && | ||
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) | ||
) { | ||
this._attribs[this._attribname] = this._attribvalue; | ||
} | ||
this._attribname = ""; | ||
this._attribvalue = ""; | ||
}; | ||
Parser.prototype._getInstructionName = function(value) { | ||
var idx = value.search(re_nameEnd), | ||
name = idx < 0 ? value : value.substr(0, idx); | ||
if (this._lowerCaseTagNames) { | ||
name = name.toLowerCase(); | ||
} | ||
return name; | ||
}; | ||
Parser.prototype.ondeclaration = function(value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("!" + name, "!" + value); | ||
} | ||
}; | ||
Parser.prototype.onprocessinginstruction = function(value) { | ||
if (this._cbs.onprocessinginstruction) { | ||
var name = this._getInstructionName(value); | ||
this._cbs.onprocessinginstruction("?" + name, "?" + value); | ||
} | ||
}; | ||
Parser.prototype.oncomment = function(value) { | ||
this._updatePosition(4); | ||
if (this._cbs.oncomment) this._cbs.oncomment(value); | ||
if (this._cbs.oncommentend) this._cbs.oncommentend(); | ||
}; | ||
Parser.prototype.oncdata = function(value) { | ||
this._updatePosition(1); | ||
if (this._options.xmlMode || this._options.recognizeCDATA) { | ||
if (this._cbs.oncdatastart) this._cbs.oncdatastart(); | ||
if (this._cbs.ontext) this._cbs.ontext(value); | ||
if (this._cbs.oncdataend) this._cbs.oncdataend(); | ||
} else { | ||
this.oncomment("[CDATA[" + value + "]]"); | ||
} | ||
}; | ||
Parser.prototype.onerror = function(err) { | ||
if (this._cbs.onerror) this._cbs.onerror(err); | ||
}; | ||
Parser.prototype.onend = function() { | ||
if (this._cbs.onclosetag) { | ||
for ( | ||
var i = this._stack.length; | ||
i > 0; | ||
this._cbs.onclosetag(this._stack[--i]) | ||
); | ||
} | ||
if (this._cbs.onend) this._cbs.onend(); | ||
}; | ||
//Resets the parser to a blank state, ready to parse a new HTML document | ||
Parser.prototype.reset = function() { | ||
if (this._cbs.onreset) this._cbs.onreset(); | ||
this._tokenizer.reset(); | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
if (this._cbs.onparserinit) this._cbs.onparserinit(this); | ||
}; | ||
//Parses a complete HTML document and pushes it to the handler | ||
Parser.prototype.parseComplete = function(data) { | ||
this.reset(); | ||
this.end(data); | ||
}; | ||
Parser.prototype.write = function(chunk) { | ||
this._tokenizer.write(chunk); | ||
}; | ||
Parser.prototype.end = function(chunk) { | ||
this._tokenizer.end(chunk); | ||
}; | ||
Parser.prototype.pause = function() { | ||
this._tokenizer.pause(); | ||
}; | ||
Parser.prototype.resume = function() { | ||
this._tokenizer.resume(); | ||
}; | ||
//alias for backwards compat | ||
Parser.prototype.parseChunk = Parser.prototype.write; | ||
Parser.prototype.done = Parser.prototype.end; | ||
module.exports = Parser; | ||
if (this._cbs.onend) | ||
this._cbs.onend(); | ||
}; | ||
//Resets the parser to a blank state, ready to parse a new HTML document | ||
Parser.prototype.reset = function () { | ||
if (this._cbs.onreset) | ||
this._cbs.onreset(); | ||
this._tokenizer.reset(); | ||
this._tagname = ""; | ||
this._attribname = ""; | ||
this._attribs = null; | ||
this._stack = []; | ||
if (this._cbs.onparserinit) | ||
this._cbs.onparserinit(this); | ||
}; | ||
//Parses a complete HTML document and pushes it to the handler | ||
Parser.prototype.parseComplete = function (data) { | ||
this.reset(); | ||
this.end(data); | ||
}; | ||
Parser.prototype.write = function (chunk) { | ||
this._tokenizer.write(chunk); | ||
}; | ||
Parser.prototype.end = function (chunk) { | ||
this._tokenizer.end(chunk); | ||
}; | ||
Parser.prototype.pause = function () { | ||
this._tokenizer.pause(); | ||
}; | ||
Parser.prototype.resume = function () { | ||
this._tokenizer.resume(); | ||
}; | ||
return Parser; | ||
}(events_1.EventEmitter)); | ||
exports.Parser = Parser; |
1731
lib/Tokenizer.js
@@ -1,970 +0,859 @@ | ||
module.exports = Tokenizer; | ||
var decodeCodePoint = require("entities/lib/decode_codepoint.js"); | ||
var entityMap = require("entities/maps/entities.json"); | ||
var legacyMap = require("entities/maps/legacy.json"); | ||
var xmlMap = require("entities/maps/xml.json"); | ||
var i = 0; | ||
var TEXT = i++; | ||
var BEFORE_TAG_NAME = i++; //after < | ||
var IN_TAG_NAME = i++; | ||
var IN_SELF_CLOSING_TAG = i++; | ||
var BEFORE_CLOSING_TAG_NAME = i++; | ||
var IN_CLOSING_TAG_NAME = i++; | ||
var AFTER_CLOSING_TAG_NAME = i++; | ||
//attributes | ||
var BEFORE_ATTRIBUTE_NAME = i++; | ||
var IN_ATTRIBUTE_NAME = i++; | ||
var AFTER_ATTRIBUTE_NAME = i++; | ||
var BEFORE_ATTRIBUTE_VALUE = i++; | ||
var IN_ATTRIBUTE_VALUE_DQ = i++; // " | ||
var IN_ATTRIBUTE_VALUE_SQ = i++; // ' | ||
var IN_ATTRIBUTE_VALUE_NQ = i++; | ||
//declarations | ||
var BEFORE_DECLARATION = i++; // ! | ||
var IN_DECLARATION = i++; | ||
//processing instructions | ||
var IN_PROCESSING_INSTRUCTION = i++; // ? | ||
//comments | ||
var BEFORE_COMMENT = i++; | ||
var IN_COMMENT = i++; | ||
var AFTER_COMMENT_1 = i++; | ||
var AFTER_COMMENT_2 = i++; | ||
//cdata | ||
var BEFORE_CDATA_1 = i++; // [ | ||
var BEFORE_CDATA_2 = i++; // C | ||
var BEFORE_CDATA_3 = i++; // D | ||
var BEFORE_CDATA_4 = i++; // A | ||
var BEFORE_CDATA_5 = i++; // T | ||
var BEFORE_CDATA_6 = i++; // A | ||
var IN_CDATA = i++; // [ | ||
var AFTER_CDATA_1 = i++; // ] | ||
var AFTER_CDATA_2 = i++; // ] | ||
//special tags | ||
var BEFORE_SPECIAL = i++; //S | ||
var BEFORE_SPECIAL_END = i++; //S | ||
var BEFORE_SCRIPT_1 = i++; //C | ||
var BEFORE_SCRIPT_2 = i++; //R | ||
var BEFORE_SCRIPT_3 = i++; //I | ||
var BEFORE_SCRIPT_4 = i++; //P | ||
var BEFORE_SCRIPT_5 = i++; //T | ||
var AFTER_SCRIPT_1 = i++; //C | ||
var AFTER_SCRIPT_2 = i++; //R | ||
var AFTER_SCRIPT_3 = i++; //I | ||
var AFTER_SCRIPT_4 = i++; //P | ||
var AFTER_SCRIPT_5 = i++; //T | ||
var BEFORE_STYLE_1 = i++; //T | ||
var BEFORE_STYLE_2 = i++; //Y | ||
var BEFORE_STYLE_3 = i++; //L | ||
var BEFORE_STYLE_4 = i++; //E | ||
var AFTER_STYLE_1 = i++; //T | ||
var AFTER_STYLE_2 = i++; //Y | ||
var AFTER_STYLE_3 = i++; //L | ||
var AFTER_STYLE_4 = i++; //E | ||
var BEFORE_ENTITY = i++; //& | ||
var BEFORE_NUMERIC_ENTITY = i++; //# | ||
var IN_NAMED_ENTITY = i++; | ||
var IN_NUMERIC_ENTITY = i++; | ||
var IN_HEX_ENTITY = i++; //X | ||
var j = 0; | ||
var SPECIAL_NONE = j++; | ||
var SPECIAL_SCRIPT = j++; | ||
var SPECIAL_STYLE = j++; | ||
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint")); | ||
var entities_json_1 = __importDefault(require("entities/lib/maps/entities.json")); | ||
var legacy_json_1 = __importDefault(require("entities/lib/maps/legacy.json")); | ||
var xml_json_1 = __importDefault(require("entities/lib/maps/xml.json")); | ||
function whitespace(c) { | ||
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r"; | ||
} | ||
function ifElseState(upper, SUCCESS, FAILURE) { | ||
var lower = upper.toLowerCase(); | ||
if (upper === lower) { | ||
return function(c) { | ||
return function (t, c) { | ||
if (c === lower) { | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
t._state = SUCCESS; | ||
} | ||
else { | ||
t._state = FAILURE; | ||
t._index--; | ||
} | ||
}; | ||
} else { | ||
return function(c) { | ||
} | ||
else { | ||
return function (t, c) { | ||
if (c === lower || c === upper) { | ||
this._state = SUCCESS; | ||
} else { | ||
this._state = FAILURE; | ||
this._index--; | ||
t._state = SUCCESS; | ||
} | ||
else { | ||
t._state = FAILURE; | ||
t._index--; | ||
} | ||
}; | ||
} | ||
} | ||
function consumeSpecialNameChar(upper, NEXT_STATE) { | ||
var lower = upper.toLowerCase(); | ||
return function(c) { | ||
return function (t, c) { | ||
if (c === lower || c === upper) { | ||
this._state = NEXT_STATE; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
t._state = NEXT_STATE; | ||
} | ||
else { | ||
t._state = 3 /* InTagName */; | ||
t._index--; //consume the token again | ||
} | ||
}; | ||
} | ||
function Tokenizer(options, cbs) { | ||
this._state = TEXT; | ||
this._buffer = ""; | ||
this._sectionStart = 0; | ||
this._index = 0; | ||
this._bufferOffset = 0; //chars removed from _buffer | ||
this._baseState = TEXT; | ||
this._special = SPECIAL_NONE; | ||
this._cbs = cbs; | ||
this._running = true; | ||
this._ended = false; | ||
this._xmlMode = !!(options && options.xmlMode); | ||
this._decodeEntities = !!(options && options.decodeEntities); | ||
} | ||
Tokenizer.prototype._stateText = function(c) { | ||
if (c === "<") { | ||
if (this._index > this._sectionStart) { | ||
this._cbs.ontext(this._getSection()); | ||
var stateBeforeCdata1 = ifElseState("C", 23 /* BeforeCdata2 */, 16 /* InDeclaration */); | ||
var stateBeforeCdata2 = ifElseState("D", 24 /* BeforeCdata3 */, 16 /* InDeclaration */); | ||
var stateBeforeCdata3 = ifElseState("A", 25 /* BeforeCdata4 */, 16 /* InDeclaration */); | ||
var stateBeforeCdata4 = ifElseState("T", 26 /* BeforeCdata5 */, 16 /* InDeclaration */); | ||
var stateBeforeCdata5 = ifElseState("A", 27 /* BeforeCdata6 */, 16 /* InDeclaration */); | ||
var stateBeforeScript1 = consumeSpecialNameChar("R", 34 /* BeforeScript2 */); | ||
var stateBeforeScript2 = consumeSpecialNameChar("I", 35 /* BeforeScript3 */); | ||
var stateBeforeScript3 = consumeSpecialNameChar("P", 36 /* BeforeScript4 */); | ||
var stateBeforeScript4 = consumeSpecialNameChar("T", 37 /* BeforeScript5 */); | ||
var stateAfterScript1 = ifElseState("R", 39 /* AfterScript2 */, 1 /* Text */); | ||
var stateAfterScript2 = ifElseState("I", 40 /* AfterScript3 */, 1 /* Text */); | ||
var stateAfterScript3 = ifElseState("P", 41 /* AfterScript4 */, 1 /* Text */); | ||
var stateAfterScript4 = ifElseState("T", 42 /* AfterScript5 */, 1 /* Text */); | ||
var stateBeforeStyle1 = consumeSpecialNameChar("Y", 44 /* BeforeStyle2 */); | ||
var stateBeforeStyle2 = consumeSpecialNameChar("L", 45 /* BeforeStyle3 */); | ||
var stateBeforeStyle3 = consumeSpecialNameChar("E", 46 /* BeforeStyle4 */); | ||
var stateAfterStyle1 = ifElseState("Y", 48 /* AfterStyle2 */, 1 /* Text */); | ||
var stateAfterStyle2 = ifElseState("L", 49 /* AfterStyle3 */, 1 /* Text */); | ||
var stateAfterStyle3 = ifElseState("E", 50 /* AfterStyle4 */, 1 /* Text */); | ||
var stateBeforeEntity = ifElseState("#", 52 /* BeforeNumericEntity */, 53 /* InNamedEntity */); | ||
var stateBeforeNumericEntity = ifElseState("X", 55 /* InHexEntity */, 54 /* InNumericEntity */); | ||
var Tokenizer = /** @class */ (function () { | ||
function Tokenizer(options, cbs) { | ||
/** The current state the tokenizer is in. */ | ||
this._state = 1 /* Text */; | ||
/** The read buffer. */ | ||
this._buffer = ""; | ||
/** The beginning of the section that is currently being read. */ | ||
this._sectionStart = 0; | ||
/** The index within the buffer that we are currently looking at. */ | ||
this._index = 0; | ||
/** | ||
* Data that has already been processed will be removed from the buffer occasionally. | ||
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. | ||
*/ | ||
this._bufferOffset = 0; | ||
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ | ||
this._baseState = 1 /* Text */; | ||
/** For special parsing behavior inside of script and style tags. */ | ||
this._special = 1 /* None */; | ||
/** Indicates whether the tokenizer has been paused. */ | ||
this._running = true; | ||
/** Indicates whether the tokenizer has finished running / `.end` has been called. */ | ||
this._ended = false; | ||
this._cbs = cbs; | ||
this._xmlMode = !!(options && options.xmlMode); | ||
this._decodeEntities = !!(options && options.decodeEntities); | ||
} | ||
Tokenizer.prototype.reset = function () { | ||
this._state = 1 /* Text */; | ||
this._buffer = ""; | ||
this._sectionStart = 0; | ||
this._index = 0; | ||
this._bufferOffset = 0; | ||
this._baseState = 1 /* Text */; | ||
this._special = 1 /* None */; | ||
this._running = true; | ||
this._ended = false; | ||
}; | ||
Tokenizer.prototype._stateText = function (c) { | ||
if (c === "<") { | ||
if (this._index > this._sectionStart) { | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._state = 2 /* BeforeTagName */; | ||
this._sectionStart = this._index; | ||
} | ||
this._state = BEFORE_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} else if ( | ||
this._decodeEntities && | ||
this._special === SPECIAL_NONE && | ||
c === "&" | ||
) { | ||
if (this._index > this._sectionStart) { | ||
else if (this._decodeEntities && | ||
this._special === 1 /* None */ && | ||
c === "&") { | ||
if (this._index > this._sectionStart) { | ||
this._cbs.ontext(this._getSection()); | ||
} | ||
this._baseState = 1 /* Text */; | ||
this._state = 51 /* BeforeEntity */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeTagName = function (c) { | ||
if (c === "/") { | ||
this._state = 5 /* BeforeClosingTagName */; | ||
} | ||
else if (c === "<") { | ||
this._cbs.ontext(this._getSection()); | ||
this._sectionStart = this._index; | ||
} | ||
this._baseState = TEXT; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeTagName = function(c) { | ||
if (c === "/") { | ||
this._state = BEFORE_CLOSING_TAG_NAME; | ||
} else if (c === "<") { | ||
this._cbs.ontext(this._getSection()); | ||
this._sectionStart = this._index; | ||
} else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) { | ||
this._state = TEXT; | ||
} else if (c === "!") { | ||
this._state = BEFORE_DECLARATION; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "?") { | ||
this._state = IN_PROCESSING_INSTRUCTION; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
else if (c === ">" || | ||
this._special !== 1 /* None */ || | ||
whitespace(c)) { | ||
this._state = 1 /* Text */; | ||
} | ||
else if (c === "!") { | ||
this._state = 15 /* BeforeDeclaration */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (c === "?") { | ||
this._state = 17 /* InProcessingInstruction */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else { | ||
this._state = | ||
!this._xmlMode && (c === "s" || c === "S") | ||
? 31 /* BeforeSpecial */ | ||
: 3 /* InTagName */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInTagName = function (c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._emitToken("onopentagname"); | ||
this._state = 8 /* BeforeAttributeName */; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeCloseingTagName = function (c) { | ||
if (whitespace(c)) { | ||
// ignore | ||
} | ||
else if (c === ">") { | ||
this._state = 1 /* Text */; | ||
} | ||
else if (this._special !== 1 /* None */) { | ||
if (c === "s" || c === "S") { | ||
this._state = 32 /* BeforeSpecialEnd */; | ||
} | ||
else { | ||
this._state = 1 /* Text */; | ||
this._index--; | ||
} | ||
} | ||
else { | ||
this._state = 6 /* InClosingTagName */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCloseingTagName = function (c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._emitToken("onclosetag"); | ||
this._state = 7 /* AfterClosingTagName */; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterCloseingTagName = function (c) { | ||
//skip everything until ">" | ||
if (c === ">") { | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeName = function (c) { | ||
if (c === ">") { | ||
this._cbs.onopentagend(); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (c === "/") { | ||
this._state = 4 /* InSelfClosingTag */; | ||
} | ||
else if (!whitespace(c)) { | ||
this._state = 9 /* InAttributeName */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInSelfClosingTag = function (c) { | ||
if (c === ">") { | ||
this._cbs.onselfclosingtag(); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (!whitespace(c)) { | ||
this._state = 8 /* BeforeAttributeName */; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeName = function (c) { | ||
if (c === "=" || c === "/" || c === ">" || whitespace(c)) { | ||
this._cbs.onattribname(this._getSection()); | ||
this._sectionStart = -1; | ||
this._state = 10 /* AfterAttributeName */; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterAttributeName = function (c) { | ||
if (c === "=") { | ||
this._state = 11 /* BeforeAttributeValue */; | ||
} | ||
else if (c === "/" || c === ">") { | ||
this._cbs.onattribend(); | ||
this._state = 8 /* BeforeAttributeName */; | ||
this._index--; | ||
} | ||
else if (!whitespace(c)) { | ||
this._cbs.onattribend(); | ||
this._state = 9 /* InAttributeName */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeValue = function (c) { | ||
if (c === '"') { | ||
this._state = 12 /* InAttributeValueDq */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (c === "'") { | ||
this._state = 13 /* InAttributeValueSq */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (!whitespace(c)) { | ||
this._state = 14 /* InAttributeValueNq */; | ||
this._sectionStart = this._index; | ||
this._index--; //reconsume token | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) { | ||
if (c === '"') { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = 8 /* BeforeAttributeName */; | ||
} | ||
else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = 51 /* BeforeEntity */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) { | ||
if (c === "'") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = 8 /* BeforeAttributeName */; | ||
} | ||
else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = 51 /* BeforeEntity */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) { | ||
if (whitespace(c) || c === ">") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = 8 /* BeforeAttributeName */; | ||
this._index--; | ||
} | ||
else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = 51 /* BeforeEntity */; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeDeclaration = function (c) { | ||
this._state = | ||
!this._xmlMode && (c === "s" || c === "S") | ||
? BEFORE_SPECIAL | ||
: IN_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInTagName = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._emitToken("onopentagname"); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeCloseingTagName = function(c) { | ||
if (whitespace(c)); | ||
else if (c === ">") { | ||
this._state = TEXT; | ||
} else if (this._special !== SPECIAL_NONE) { | ||
if (c === "s" || c === "S") { | ||
this._state = BEFORE_SPECIAL_END; | ||
} else { | ||
this._state = TEXT; | ||
c === "[" | ||
? 22 /* BeforeCdata1 */ | ||
: c === "-" | ||
? 18 /* BeforeComment */ | ||
: 16 /* InDeclaration */; | ||
}; | ||
Tokenizer.prototype._stateInDeclaration = function (c) { | ||
if (c === ">") { | ||
this._cbs.ondeclaration(this._getSection()); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInProcessingInstruction = function (c) { | ||
if (c === ">") { | ||
this._cbs.onprocessinginstruction(this._getSection()); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeComment = function (c) { | ||
if (c === "-") { | ||
this._state = 19 /* InComment */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else { | ||
this._state = 16 /* InDeclaration */; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInComment = function (c) { | ||
if (c === "-") | ||
this._state = 20 /* AfterComment1 */; | ||
}; | ||
Tokenizer.prototype._stateAfterComment1 = function (c) { | ||
if (c === "-") { | ||
this._state = 21 /* AfterComment2 */; | ||
} | ||
else { | ||
this._state = 19 /* InComment */; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterComment2 = function (c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else if (c !== "-") { | ||
this._state = 19 /* InComment */; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
}; | ||
Tokenizer.prototype._stateBeforeCdata6 = function (c) { | ||
if (c === "[") { | ||
this._state = 28 /* InCdata */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
else { | ||
this._state = 16 /* InDeclaration */; | ||
this._index--; | ||
} | ||
} else { | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCloseingTagName = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._emitToken("onclosetag"); | ||
this._state = AFTER_CLOSING_TAG_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterCloseingTagName = function(c) { | ||
//skip everything until ">" | ||
if (c === ">") { | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeName = function(c) { | ||
if (c === ">") { | ||
this._cbs.onopentagend(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "/") { | ||
this._state = IN_SELF_CLOSING_TAG; | ||
} else if (!whitespace(c)) { | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInSelfClosingTag = function(c) { | ||
if (c === ">") { | ||
this._cbs.onselfclosingtag(); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (!whitespace(c)) { | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeName = function(c) { | ||
if (c === "=" || c === "/" || c === ">" || whitespace(c)) { | ||
this._cbs.onattribname(this._getSection()); | ||
this._sectionStart = -1; | ||
this._state = AFTER_ATTRIBUTE_NAME; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterAttributeName = function(c) { | ||
if (c === "=") { | ||
this._state = BEFORE_ATTRIBUTE_VALUE; | ||
} else if (c === "/" || c === ">") { | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if (!whitespace(c)) { | ||
this._cbs.onattribend(); | ||
this._state = IN_ATTRIBUTE_NAME; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeAttributeValue = function(c) { | ||
if (c === '"') { | ||
this._state = IN_ATTRIBUTE_VALUE_DQ; | ||
this._sectionStart = this._index + 1; | ||
} else if (c === "'") { | ||
this._state = IN_ATTRIBUTE_VALUE_SQ; | ||
this._sectionStart = this._index + 1; | ||
} else if (!whitespace(c)) { | ||
this._state = IN_ATTRIBUTE_VALUE_NQ; | ||
this._sectionStart = this._index; | ||
this._index--; //reconsume token | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c) { | ||
if (c === '"') { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c) { | ||
if (c === "'") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c) { | ||
if (whitespace(c) || c === ">") { | ||
this._emitToken("onattribdata"); | ||
this._cbs.onattribend(); | ||
this._state = BEFORE_ATTRIBUTE_NAME; | ||
this._index--; | ||
} else if (this._decodeEntities && c === "&") { | ||
this._emitToken("onattribdata"); | ||
this._baseState = this._state; | ||
this._state = BEFORE_ENTITY; | ||
this._sectionStart = this._index; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeDeclaration = function(c) { | ||
this._state = | ||
c === "[" | ||
? BEFORE_CDATA_1 | ||
: c === "-" | ||
? BEFORE_COMMENT | ||
: IN_DECLARATION; | ||
}; | ||
Tokenizer.prototype._stateInDeclaration = function(c) { | ||
if (c === ">") { | ||
this._cbs.ondeclaration(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInProcessingInstruction = function(c) { | ||
if (c === ">") { | ||
this._cbs.onprocessinginstruction(this._getSection()); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeComment = function(c) { | ||
if (c === "-") { | ||
this._state = IN_COMMENT; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInComment = function(c) { | ||
if (c === "-") this._state = AFTER_COMMENT_1; | ||
}; | ||
Tokenizer.prototype._stateAfterComment1 = function(c) { | ||
if (c === "-") { | ||
this._state = AFTER_COMMENT_2; | ||
} else { | ||
this._state = IN_COMMENT; | ||
} | ||
}; | ||
Tokenizer.prototype._stateAfterComment2 = function(c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncomment( | ||
this._buffer.substring(this._sectionStart, this._index - 2) | ||
); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "-") { | ||
this._state = IN_COMMENT; | ||
} | ||
// else: stay in AFTER_COMMENT_2 (`--->`) | ||
}; | ||
Tokenizer.prototype._stateBeforeCdata1 = ifElseState( | ||
"C", | ||
BEFORE_CDATA_2, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata2 = ifElseState( | ||
"D", | ||
BEFORE_CDATA_3, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata3 = ifElseState( | ||
"A", | ||
BEFORE_CDATA_4, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata4 = ifElseState( | ||
"T", | ||
BEFORE_CDATA_5, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata5 = ifElseState( | ||
"A", | ||
BEFORE_CDATA_6, | ||
IN_DECLARATION | ||
); | ||
Tokenizer.prototype._stateBeforeCdata6 = function(c) { | ||
if (c === "[") { | ||
this._state = IN_CDATA; | ||
this._sectionStart = this._index + 1; | ||
} else { | ||
this._state = IN_DECLARATION; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInCdata = function(c) { | ||
if (c === "]") this._state = AFTER_CDATA_1; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata1 = function(c) { | ||
if (c === "]") this._state = AFTER_CDATA_2; | ||
else this._state = IN_CDATA; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata2 = function(c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncdata( | ||
this._buffer.substring(this._sectionStart, this._index - 2) | ||
); | ||
this._state = TEXT; | ||
this._sectionStart = this._index + 1; | ||
} else if (c !== "]") { | ||
this._state = IN_CDATA; | ||
} | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecial = function(c) { | ||
if (c === "c" || c === "C") { | ||
this._state = BEFORE_SCRIPT_1; | ||
} else if (c === "t" || c === "T") { | ||
this._state = BEFORE_STYLE_1; | ||
} else { | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecialEnd = function(c) { | ||
if (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) { | ||
this._state = AFTER_SCRIPT_1; | ||
} else if (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) { | ||
this._state = AFTER_STYLE_1; | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar( | ||
"R", | ||
BEFORE_SCRIPT_2 | ||
); | ||
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar( | ||
"I", | ||
BEFORE_SCRIPT_3 | ||
); | ||
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar( | ||
"P", | ||
BEFORE_SCRIPT_4 | ||
); | ||
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar( | ||
"T", | ||
BEFORE_SCRIPT_5 | ||
); | ||
Tokenizer.prototype._stateBeforeScript5 = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_SCRIPT; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT); | ||
Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT); | ||
Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT); | ||
Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT); | ||
Tokenizer.prototype._stateAfterScript5 = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 6; | ||
this._index--; //reconsume the token | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar( | ||
"Y", | ||
BEFORE_STYLE_2 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar( | ||
"L", | ||
BEFORE_STYLE_3 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar( | ||
"E", | ||
BEFORE_STYLE_4 | ||
); | ||
Tokenizer.prototype._stateBeforeStyle4 = function(c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_STYLE; | ||
} | ||
this._state = IN_TAG_NAME; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT); | ||
Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT); | ||
Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT); | ||
Tokenizer.prototype._stateAfterStyle4 = function(c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = SPECIAL_NONE; | ||
this._state = IN_CLOSING_TAG_NAME; | ||
this._sectionStart = this._index - 5; | ||
this._index--; //reconsume the token | ||
} else this._state = TEXT; | ||
}; | ||
Tokenizer.prototype._stateBeforeEntity = ifElseState( | ||
"#", | ||
BEFORE_NUMERIC_ENTITY, | ||
IN_NAMED_ENTITY | ||
); | ||
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState( | ||
"X", | ||
IN_HEX_ENTITY, | ||
IN_NUMERIC_ENTITY | ||
); | ||
//for entities terminated with a semicolon | ||
Tokenizer.prototype._parseNamedEntityStrict = function() { | ||
//offset = 1 | ||
if (this._sectionStart + 1 < this._index) { | ||
var entity = this._buffer.substring( | ||
this._sectionStart + 1, | ||
this._index | ||
), | ||
map = this._xmlMode ? xmlMap : entityMap; | ||
if (map.hasOwnProperty(entity)) { | ||
this._emitPartial(map[entity]); | ||
}; | ||
Tokenizer.prototype._stateInCdata = function (c) { | ||
if (c === "]") | ||
this._state = 29 /* AfterCdata1 */; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata1 = function (c) { | ||
if (c === "]") | ||
this._state = 30 /* AfterCdata2 */; | ||
else | ||
this._state = 28 /* InCdata */; | ||
}; | ||
Tokenizer.prototype._stateAfterCdata2 = function (c) { | ||
if (c === ">") { | ||
//remove 2 trailing chars | ||
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2)); | ||
this._state = 1 /* Text */; | ||
this._sectionStart = this._index + 1; | ||
} | ||
} | ||
}; | ||
//parses legacy entities (without trailing semicolon) | ||
Tokenizer.prototype._parseLegacyEntity = function() { | ||
var start = this._sectionStart + 1, | ||
limit = this._index - start; | ||
if (limit > 6) limit = 6; //the max length of legacy entities is 6 | ||
while (limit >= 2) { | ||
//the min length of legacy entities is 2 | ||
var entity = this._buffer.substr(start, limit); | ||
if (legacyMap.hasOwnProperty(entity)) { | ||
this._emitPartial(legacyMap[entity]); | ||
this._sectionStart += limit + 1; | ||
return; | ||
} else { | ||
limit--; | ||
else if (c !== "]") { | ||
this._state = 28 /* InCdata */; | ||
} | ||
} | ||
}; | ||
Tokenizer.prototype._stateInNamedEntity = function(c) { | ||
if (c === ";") { | ||
this._parseNamedEntityStrict(); | ||
if (this._sectionStart + 1 < this._index && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
//else: stay in AFTER_CDATA_2 (`]]]>`) | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecial = function (c) { | ||
if (c === "c" || c === "C") { | ||
this._state = 33 /* BeforeScript1 */; | ||
} | ||
this._state = this._baseState; | ||
} else if ( | ||
(c < "a" || c > "z") && | ||
(c < "A" || c > "Z") && | ||
(c < "0" || c > "9") | ||
) { | ||
if (this._xmlMode); | ||
else if (this._sectionStart + 1 === this._index); | ||
else if (this._baseState !== TEXT) { | ||
if (c !== "=") { | ||
this._parseNamedEntityStrict(); | ||
else if (c === "t" || c === "T") { | ||
this._state = 43 /* BeforeStyle1 */; | ||
} | ||
else { | ||
this._state = 3 /* InTagName */; | ||
this._index--; //consume the token again | ||
} | ||
}; | ||
Tokenizer.prototype._stateBeforeSpecialEnd = function (c) { | ||
if (this._special === 2 /* Script */ && (c === "c" || c === "C")) { | ||
this._state = 38 /* AfterScript1 */; | ||
} | ||
else if (this._special === 3 /* Style */ && | ||
(c === "t" || c === "T")) { | ||
this._state = 47 /* AfterStyle1 */; | ||
} | ||
else | ||
this._state = 1 /* Text */; | ||
}; | ||
Tokenizer.prototype._stateBeforeScript5 = function (c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = 2 /* Script */; | ||
} | ||
this._state = 3 /* InTagName */; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterScript5 = function (c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = 1 /* None */; | ||
this._state = 6 /* InClosingTagName */; | ||
this._sectionStart = this._index - 6; | ||
this._index--; //reconsume the token | ||
} | ||
else | ||
this._state = 1 /* Text */; | ||
}; | ||
Tokenizer.prototype._stateBeforeStyle4 = function (c) { | ||
if (c === "/" || c === ">" || whitespace(c)) { | ||
this._special = 3 /* Style */; | ||
} | ||
this._state = 3 /* InTagName */; | ||
this._index--; //consume the token again | ||
}; | ||
Tokenizer.prototype._stateAfterStyle4 = function (c) { | ||
if (c === ">" || whitespace(c)) { | ||
this._special = 1 /* None */; | ||
this._state = 6 /* InClosingTagName */; | ||
this._sectionStart = this._index - 5; | ||
this._index--; //reconsume the token | ||
} | ||
else | ||
this._state = 1 /* Text */; | ||
}; | ||
//for entities terminated with a semicolon | ||
Tokenizer.prototype._parseNamedEntityStrict = function () { | ||
//offset = 1 | ||
if (this._sectionStart + 1 < this._index) { | ||
var entity = this._buffer.substring(this._sectionStart + 1, this._index), map = this._xmlMode ? xml_json_1.default : entities_json_1.default; | ||
if (Object.prototype.hasOwnProperty.call(map, entity)) { | ||
// @ts-ignore | ||
this._emitPartial(map[entity]); | ||
this._sectionStart = this._index + 1; | ||
} | ||
} else { | ||
this._parseLegacyEntity(); | ||
} | ||
}; | ||
//parses legacy entities (without trailing semicolon) | ||
Tokenizer.prototype._parseLegacyEntity = function () { | ||
var start = this._sectionStart + 1; | ||
var limit = this._index - start; | ||
if (limit > 6) | ||
limit = 6; // The max length of legacy entities is 6 | ||
while (limit >= 2) { | ||
// The min length of legacy entities is 2 | ||
var entity = this._buffer.substr(start, limit); | ||
if (Object.prototype.hasOwnProperty.call(legacy_json_1.default, entity)) { | ||
// @ts-ignore | ||
this._emitPartial(legacy_json_1.default[entity]); | ||
this._sectionStart += limit + 1; | ||
return; | ||
} | ||
else { | ||
limit--; | ||
} | ||
} | ||
}; | ||
Tokenizer.prototype._stateInNamedEntity = function (c) { | ||
if (c === ";") { | ||
this._parseNamedEntityStrict(); | ||
if (this._sectionStart + 1 < this._index && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
} | ||
this._state = this._baseState; | ||
} | ||
else if ((c < "a" || c > "z") && | ||
(c < "A" || c > "Z") && | ||
(c < "0" || c > "9")) { | ||
if (this._xmlMode || this._sectionStart + 1 === this._index) { | ||
// ignore | ||
} | ||
else if (this._baseState !== 1 /* Text */) { | ||
if (c !== "=") { | ||
this._parseNamedEntityStrict(); | ||
} | ||
} | ||
else { | ||
this._parseLegacyEntity(); | ||
} | ||
this._state = this._baseState; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._decodeNumericEntity = function (offset, base) { | ||
var sectionStart = this._sectionStart + offset; | ||
if (sectionStart !== this._index) { | ||
//parse entity | ||
var entity = this._buffer.substring(sectionStart, this._index); | ||
var parsed = parseInt(entity, base); | ||
this._emitPartial(decode_codepoint_1.default(parsed)); | ||
this._sectionStart = this._index; | ||
} | ||
else { | ||
this._sectionStart--; | ||
} | ||
this._state = this._baseState; | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._decodeNumericEntity = function(offset, base) { | ||
var sectionStart = this._sectionStart + offset; | ||
if (sectionStart !== this._index) { | ||
//parse entity | ||
var entity = this._buffer.substring(sectionStart, this._index); | ||
var parsed = parseInt(entity, base); | ||
this._emitPartial(decodeCodePoint(parsed)); | ||
this._sectionStart = this._index; | ||
} else { | ||
this._sectionStart--; | ||
} | ||
this._state = this._baseState; | ||
}; | ||
Tokenizer.prototype._stateInNumericEntity = function(c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(2, 10); | ||
this._sectionStart++; | ||
} else if (c < "0" || c > "9") { | ||
if (!this._xmlMode) { | ||
}; | ||
Tokenizer.prototype._stateInNumericEntity = function (c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(2, 10); | ||
} else { | ||
this._state = this._baseState; | ||
this._sectionStart++; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInHexEntity = function(c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(3, 16); | ||
this._sectionStart++; | ||
} else if ( | ||
(c < "a" || c > "f") && | ||
(c < "A" || c > "F") && | ||
(c < "0" || c > "9") | ||
) { | ||
if (!this._xmlMode) { | ||
else if (c < "0" || c > "9") { | ||
if (!this._xmlMode) { | ||
this._decodeNumericEntity(2, 10); | ||
} | ||
else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._stateInHexEntity = function (c) { | ||
if (c === ";") { | ||
this._decodeNumericEntity(3, 16); | ||
} else { | ||
this._state = this._baseState; | ||
this._sectionStart++; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._cleanup = function() { | ||
if (this._sectionStart < 0) { | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if (this._running) { | ||
if (this._state === TEXT) { | ||
if (this._sectionStart !== this._index) { | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
else if ((c < "a" || c > "f") && | ||
(c < "A" || c > "F") && | ||
(c < "0" || c > "9")) { | ||
if (!this._xmlMode) { | ||
this._decodeNumericEntity(3, 16); | ||
} | ||
else { | ||
this._state = this._baseState; | ||
} | ||
this._index--; | ||
} | ||
}; | ||
Tokenizer.prototype._cleanup = function () { | ||
if (this._sectionStart < 0) { | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else if (this._sectionStart === this._index) { | ||
//the section just started | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} else { | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
this._bufferOffset += this._sectionStart; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
}; | ||
//TODO make events conditional | ||
Tokenizer.prototype.write = function(chunk) { | ||
if (this._ended) this._cbs.onerror(Error(".write() after done!")); | ||
this._buffer += chunk; | ||
this._parse(); | ||
}; | ||
Tokenizer.prototype._parse = function() { | ||
while (this._index < this._buffer.length && this._running) { | ||
var c = this._buffer.charAt(this._index); | ||
if (this._state === TEXT) { | ||
this._stateText(c); | ||
} else if (this._state === BEFORE_TAG_NAME) { | ||
this._stateBeforeTagName(c); | ||
} else if (this._state === IN_TAG_NAME) { | ||
this._stateInTagName(c); | ||
} else if (this._state === BEFORE_CLOSING_TAG_NAME) { | ||
this._stateBeforeCloseingTagName(c); | ||
} else if (this._state === IN_CLOSING_TAG_NAME) { | ||
this._stateInCloseingTagName(c); | ||
} else if (this._state === AFTER_CLOSING_TAG_NAME) { | ||
this._stateAfterCloseingTagName(c); | ||
} else if (this._state === IN_SELF_CLOSING_TAG) { | ||
this._stateInSelfClosingTag(c); | ||
} else if (this._state === BEFORE_ATTRIBUTE_NAME) { | ||
/* | ||
* attributes | ||
*/ | ||
this._stateBeforeAttributeName(c); | ||
} else if (this._state === IN_ATTRIBUTE_NAME) { | ||
this._stateInAttributeName(c); | ||
} else if (this._state === AFTER_ATTRIBUTE_NAME) { | ||
this._stateAfterAttributeName(c); | ||
} else if (this._state === BEFORE_ATTRIBUTE_VALUE) { | ||
this._stateBeforeAttributeValue(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_DQ) { | ||
this._stateInAttributeValueDoubleQuotes(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_SQ) { | ||
this._stateInAttributeValueSingleQuotes(c); | ||
} else if (this._state === IN_ATTRIBUTE_VALUE_NQ) { | ||
this._stateInAttributeValueNoQuotes(c); | ||
} else if (this._state === BEFORE_DECLARATION) { | ||
/* | ||
* declarations | ||
*/ | ||
this._stateBeforeDeclaration(c); | ||
} else if (this._state === IN_DECLARATION) { | ||
this._stateInDeclaration(c); | ||
} else if (this._state === IN_PROCESSING_INSTRUCTION) { | ||
/* | ||
* processing instructions | ||
*/ | ||
this._stateInProcessingInstruction(c); | ||
} else if (this._state === BEFORE_COMMENT) { | ||
/* | ||
* comments | ||
*/ | ||
this._stateBeforeComment(c); | ||
} else if (this._state === IN_COMMENT) { | ||
this._stateInComment(c); | ||
} else if (this._state === AFTER_COMMENT_1) { | ||
this._stateAfterComment1(c); | ||
} else if (this._state === AFTER_COMMENT_2) { | ||
this._stateAfterComment2(c); | ||
} else if (this._state === BEFORE_CDATA_1) { | ||
/* | ||
* cdata | ||
*/ | ||
this._stateBeforeCdata1(c); | ||
} else if (this._state === BEFORE_CDATA_2) { | ||
this._stateBeforeCdata2(c); | ||
} else if (this._state === BEFORE_CDATA_3) { | ||
this._stateBeforeCdata3(c); | ||
} else if (this._state === BEFORE_CDATA_4) { | ||
this._stateBeforeCdata4(c); | ||
} else if (this._state === BEFORE_CDATA_5) { | ||
this._stateBeforeCdata5(c); | ||
} else if (this._state === BEFORE_CDATA_6) { | ||
this._stateBeforeCdata6(c); | ||
} else if (this._state === IN_CDATA) { | ||
this._stateInCdata(c); | ||
} else if (this._state === AFTER_CDATA_1) { | ||
this._stateAfterCdata1(c); | ||
} else if (this._state === AFTER_CDATA_2) { | ||
this._stateAfterCdata2(c); | ||
} else if (this._state === BEFORE_SPECIAL) { | ||
/* | ||
* special tags | ||
*/ | ||
this._stateBeforeSpecial(c); | ||
} else if (this._state === BEFORE_SPECIAL_END) { | ||
this._stateBeforeSpecialEnd(c); | ||
} else if (this._state === BEFORE_SCRIPT_1) { | ||
/* | ||
* script | ||
*/ | ||
this._stateBeforeScript1(c); | ||
} else if (this._state === BEFORE_SCRIPT_2) { | ||
this._stateBeforeScript2(c); | ||
} else if (this._state === BEFORE_SCRIPT_3) { | ||
this._stateBeforeScript3(c); | ||
} else if (this._state === BEFORE_SCRIPT_4) { | ||
this._stateBeforeScript4(c); | ||
} else if (this._state === BEFORE_SCRIPT_5) { | ||
this._stateBeforeScript5(c); | ||
} else if (this._state === AFTER_SCRIPT_1) { | ||
this._stateAfterScript1(c); | ||
} else if (this._state === AFTER_SCRIPT_2) { | ||
this._stateAfterScript2(c); | ||
} else if (this._state === AFTER_SCRIPT_3) { | ||
this._stateAfterScript3(c); | ||
} else if (this._state === AFTER_SCRIPT_4) { | ||
this._stateAfterScript4(c); | ||
} else if (this._state === AFTER_SCRIPT_5) { | ||
this._stateAfterScript5(c); | ||
} else if (this._state === BEFORE_STYLE_1) { | ||
/* | ||
* style | ||
*/ | ||
this._stateBeforeStyle1(c); | ||
} else if (this._state === BEFORE_STYLE_2) { | ||
this._stateBeforeStyle2(c); | ||
} else if (this._state === BEFORE_STYLE_3) { | ||
this._stateBeforeStyle3(c); | ||
} else if (this._state === BEFORE_STYLE_4) { | ||
this._stateBeforeStyle4(c); | ||
} else if (this._state === AFTER_STYLE_1) { | ||
this._stateAfterStyle1(c); | ||
} else if (this._state === AFTER_STYLE_2) { | ||
this._stateAfterStyle2(c); | ||
} else if (this._state === AFTER_STYLE_3) { | ||
this._stateAfterStyle3(c); | ||
} else if (this._state === AFTER_STYLE_4) { | ||
this._stateAfterStyle4(c); | ||
} else if (this._state === BEFORE_ENTITY) { | ||
/* | ||
* entities | ||
*/ | ||
this._stateBeforeEntity(c); | ||
} else if (this._state === BEFORE_NUMERIC_ENTITY) { | ||
this._stateBeforeNumericEntity(c); | ||
} else if (this._state === IN_NAMED_ENTITY) { | ||
this._stateInNamedEntity(c); | ||
} else if (this._state === IN_NUMERIC_ENTITY) { | ||
this._stateInNumericEntity(c); | ||
} else if (this._state === IN_HEX_ENTITY) { | ||
this._stateInHexEntity(c); | ||
} else { | ||
this._cbs.onerror(Error("unknown _state"), this._state); | ||
else if (this._running) { | ||
if (this._state === 1 /* Text */) { | ||
if (this._sectionStart !== this._index) { | ||
this._cbs.ontext(this._buffer.substr(this._sectionStart)); | ||
} | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} | ||
else if (this._sectionStart === this._index) { | ||
//the section just started | ||
this._buffer = ""; | ||
this._bufferOffset += this._index; | ||
this._index = 0; | ||
} | ||
else { | ||
//remove everything unnecessary | ||
this._buffer = this._buffer.substr(this._sectionStart); | ||
this._index -= this._sectionStart; | ||
this._bufferOffset += this._sectionStart; | ||
} | ||
this._sectionStart = 0; | ||
} | ||
this._index++; | ||
} | ||
this._cleanup(); | ||
}; | ||
Tokenizer.prototype.pause = function() { | ||
this._running = false; | ||
}; | ||
Tokenizer.prototype.resume = function() { | ||
this._running = true; | ||
if (this._index < this._buffer.length) { | ||
}; | ||
//TODO make events conditional | ||
Tokenizer.prototype.write = function (chunk) { | ||
if (this._ended) | ||
this._cbs.onerror(Error(".write() after done!")); | ||
this._buffer += chunk; | ||
this._parse(); | ||
} | ||
if (this._ended) { | ||
this._finish(); | ||
} | ||
}; | ||
Tokenizer.prototype.end = function(chunk) { | ||
if (this._ended) this._cbs.onerror(Error(".end() after done!")); | ||
if (chunk) this.write(chunk); | ||
this._ended = true; | ||
if (this._running) this._finish(); | ||
}; | ||
Tokenizer.prototype._finish = function() { | ||
//if there is remaining data, emit it in a reasonable way | ||
if (this._sectionStart < this._index) { | ||
this._handleTrailingData(); | ||
} | ||
this._cbs.onend(); | ||
}; | ||
Tokenizer.prototype._handleTrailingData = function() { | ||
var data = this._buffer.substr(this._sectionStart); | ||
if ( | ||
this._state === IN_CDATA || | ||
this._state === AFTER_CDATA_1 || | ||
this._state === AFTER_CDATA_2 | ||
) { | ||
this._cbs.oncdata(data); | ||
} else if ( | ||
this._state === IN_COMMENT || | ||
this._state === AFTER_COMMENT_1 || | ||
this._state === AFTER_COMMENT_2 | ||
) { | ||
this._cbs.oncomment(data); | ||
} else if (this._state === IN_NAMED_ENTITY && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
}; | ||
// Iterates through the buffer, calling the function corresponding to the current state. | ||
// States that are more likely to be hit are higher up, as a performance improvement. | ||
Tokenizer.prototype._parse = function () { | ||
while (this._index < this._buffer.length && this._running) { | ||
var c = this._buffer.charAt(this._index); | ||
if (this._state === 1 /* Text */) { | ||
this._stateText(c); | ||
} | ||
else if (this._state === 12 /* InAttributeValueDq */) { | ||
this._stateInAttributeValueDoubleQuotes(c); | ||
} | ||
else if (this._state === 9 /* InAttributeName */) { | ||
this._stateInAttributeName(c); | ||
} | ||
else if (this._state === 19 /* InComment */) { | ||
this._stateInComment(c); | ||
} | ||
else if (this._state === 8 /* BeforeAttributeName */) { | ||
this._stateBeforeAttributeName(c); | ||
} | ||
else if (this._state === 3 /* InTagName */) { | ||
this._stateInTagName(c); | ||
} | ||
else if (this._state === 6 /* InClosingTagName */) { | ||
this._stateInCloseingTagName(c); | ||
} | ||
else if (this._state === 2 /* BeforeTagName */) { | ||
this._stateBeforeTagName(c); | ||
} | ||
else if (this._state === 10 /* AfterAttributeName */) { | ||
this._stateAfterAttributeName(c); | ||
} | ||
else if (this._state === 13 /* InAttributeValueSq */) { | ||
this._stateInAttributeValueSingleQuotes(c); | ||
} | ||
else if (this._state === 11 /* BeforeAttributeValue */) { | ||
this._stateBeforeAttributeValue(c); | ||
} | ||
else if (this._state === 5 /* BeforeClosingTagName */) { | ||
this._stateBeforeCloseingTagName(c); | ||
} | ||
else if (this._state === 7 /* AfterClosingTagName */) { | ||
this._stateAfterCloseingTagName(c); | ||
} | ||
else if (this._state === 31 /* BeforeSpecial */) { | ||
this._stateBeforeSpecial(c); | ||
} | ||
else if (this._state === 20 /* AfterComment1 */) { | ||
this._stateAfterComment1(c); | ||
} | ||
else if (this._state === 14 /* InAttributeValueNq */) { | ||
this._stateInAttributeValueNoQuotes(c); | ||
} | ||
else if (this._state === 4 /* InSelfClosingTag */) { | ||
this._stateInSelfClosingTag(c); | ||
} | ||
else if (this._state === 16 /* InDeclaration */) { | ||
this._stateInDeclaration(c); | ||
} | ||
else if (this._state === 15 /* BeforeDeclaration */) { | ||
this._stateBeforeDeclaration(c); | ||
} | ||
else if (this._state === 21 /* AfterComment2 */) { | ||
this._stateAfterComment2(c); | ||
} | ||
else if (this._state === 18 /* BeforeComment */) { | ||
this._stateBeforeComment(c); | ||
} | ||
else if (this._state === 32 /* BeforeSpecialEnd */) { | ||
this._stateBeforeSpecialEnd(c); | ||
} | ||
else if (this._state === 38 /* AfterScript1 */) { | ||
stateAfterScript1(this, c); | ||
} | ||
else if (this._state === 39 /* AfterScript2 */) { | ||
stateAfterScript2(this, c); | ||
} | ||
else if (this._state === 40 /* AfterScript3 */) { | ||
stateAfterScript3(this, c); | ||
} | ||
else if (this._state === 33 /* BeforeScript1 */) { | ||
stateBeforeScript1(this, c); | ||
} | ||
else if (this._state === 34 /* BeforeScript2 */) { | ||
stateBeforeScript2(this, c); | ||
} | ||
else if (this._state === 35 /* BeforeScript3 */) { | ||
stateBeforeScript3(this, c); | ||
} | ||
else if (this._state === 36 /* BeforeScript4 */) { | ||
stateBeforeScript4(this, c); | ||
} | ||
else if (this._state === 37 /* BeforeScript5 */) { | ||
this._stateBeforeScript5(c); | ||
} | ||
else if (this._state === 41 /* AfterScript4 */) { | ||
stateAfterScript4(this, c); | ||
} | ||
else if (this._state === 42 /* AfterScript5 */) { | ||
this._stateAfterScript5(c); | ||
} | ||
else if (this._state === 43 /* BeforeStyle1 */) { | ||
stateBeforeStyle1(this, c); | ||
} | ||
else if (this._state === 28 /* InCdata */) { | ||
this._stateInCdata(c); | ||
} | ||
else if (this._state === 44 /* BeforeStyle2 */) { | ||
stateBeforeStyle2(this, c); | ||
} | ||
else if (this._state === 45 /* BeforeStyle3 */) { | ||
stateBeforeStyle3(this, c); | ||
} | ||
else if (this._state === 46 /* BeforeStyle4 */) { | ||
this._stateBeforeStyle4(c); | ||
} | ||
else if (this._state === 47 /* AfterStyle1 */) { | ||
stateAfterStyle1(this, c); | ||
} | ||
else if (this._state === 48 /* AfterStyle2 */) { | ||
stateAfterStyle2(this, c); | ||
} | ||
else if (this._state === 49 /* AfterStyle3 */) { | ||
stateAfterStyle3(this, c); | ||
} | ||
else if (this._state === 50 /* AfterStyle4 */) { | ||
this._stateAfterStyle4(c); | ||
} | ||
else if (this._state === 17 /* InProcessingInstruction */) { | ||
this._stateInProcessingInstruction(c); | ||
} | ||
else if (this._state === 53 /* InNamedEntity */) { | ||
this._stateInNamedEntity(c); | ||
} | ||
else if (this._state === 22 /* BeforeCdata1 */) { | ||
stateBeforeCdata1(this, c); | ||
} | ||
else if (this._state === 51 /* BeforeEntity */) { | ||
stateBeforeEntity(this, c); | ||
} | ||
else if (this._state === 23 /* BeforeCdata2 */) { | ||
stateBeforeCdata2(this, c); | ||
} | ||
else if (this._state === 24 /* BeforeCdata3 */) { | ||
stateBeforeCdata3(this, c); | ||
} | ||
else if (this._state === 29 /* AfterCdata1 */) { | ||
this._stateAfterCdata1(c); | ||
} | ||
else if (this._state === 30 /* AfterCdata2 */) { | ||
this._stateAfterCdata2(c); | ||
} | ||
else if (this._state === 25 /* BeforeCdata4 */) { | ||
stateBeforeCdata4(this, c); | ||
} | ||
else if (this._state === 26 /* BeforeCdata5 */) { | ||
stateBeforeCdata5(this, c); | ||
} | ||
else if (this._state === 27 /* BeforeCdata6 */) { | ||
this._stateBeforeCdata6(c); | ||
} | ||
else if (this._state === 55 /* InHexEntity */) { | ||
this._stateInHexEntity(c); | ||
} | ||
else if (this._state === 54 /* InNumericEntity */) { | ||
this._stateInNumericEntity(c); | ||
} | ||
else if (this._state === 52 /* BeforeNumericEntity */) { | ||
stateBeforeNumericEntity(this, c); | ||
} | ||
else { | ||
this._cbs.onerror(Error("unknown _state"), this._state); | ||
} | ||
this._index++; | ||
} | ||
} else if (this._state === IN_NUMERIC_ENTITY && !this._xmlMode) { | ||
this._decodeNumericEntity(2, 10); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
this._cleanup(); | ||
}; | ||
Tokenizer.prototype.pause = function () { | ||
this._running = false; | ||
}; | ||
Tokenizer.prototype.resume = function () { | ||
this._running = true; | ||
if (this._index < this._buffer.length) { | ||
this._parse(); | ||
} | ||
} else if (this._state === IN_HEX_ENTITY && !this._xmlMode) { | ||
this._decodeNumericEntity(3, 16); | ||
if (this._ended) { | ||
this._finish(); | ||
} | ||
}; | ||
Tokenizer.prototype.end = function (chunk) { | ||
if (this._ended) | ||
this._cbs.onerror(Error(".end() after done!")); | ||
if (chunk) | ||
this.write(chunk); | ||
this._ended = true; | ||
if (this._running) | ||
this._finish(); | ||
}; | ||
Tokenizer.prototype._finish = function () { | ||
//if there is remaining data, emit it in a reasonable way | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} else if ( | ||
this._state !== IN_TAG_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_NAME && | ||
this._state !== BEFORE_ATTRIBUTE_VALUE && | ||
this._state !== AFTER_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_NAME && | ||
this._state !== IN_ATTRIBUTE_VALUE_SQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_DQ && | ||
this._state !== IN_ATTRIBUTE_VALUE_NQ && | ||
this._state !== IN_CLOSING_TAG_NAME | ||
) { | ||
this._cbs.ontext(data); | ||
} | ||
//else, ignore remaining data | ||
//TODO add a way to remove current tag | ||
}; | ||
Tokenizer.prototype.reset = function() { | ||
Tokenizer.call( | ||
this, | ||
{ xmlMode: this._xmlMode, decodeEntities: this._decodeEntities }, | ||
this._cbs | ||
); | ||
}; | ||
Tokenizer.prototype.getAbsoluteIndex = function() { | ||
return this._bufferOffset + this._index; | ||
}; | ||
Tokenizer.prototype._getSection = function() { | ||
return this._buffer.substring(this._sectionStart, this._index); | ||
}; | ||
Tokenizer.prototype._emitToken = function(name) { | ||
this._cbs[name](this._getSection()); | ||
this._sectionStart = -1; | ||
}; | ||
Tokenizer.prototype._emitPartial = function(value) { | ||
if (this._baseState !== TEXT) { | ||
this._cbs.onattribdata(value); //TODO implement the new event | ||
} else { | ||
this._cbs.ontext(value); | ||
} | ||
}; | ||
this._cbs.onend(); | ||
}; | ||
Tokenizer.prototype._handleTrailingData = function () { | ||
var data = this._buffer.substr(this._sectionStart); | ||
if (this._state === 28 /* InCdata */ || | ||
this._state === 29 /* AfterCdata1 */ || | ||
this._state === 30 /* AfterCdata2 */) { | ||
this._cbs.oncdata(data); | ||
} | ||
else if (this._state === 19 /* InComment */ || | ||
this._state === 20 /* AfterComment1 */ || | ||
this._state === 21 /* AfterComment2 */) { | ||
this._cbs.oncomment(data); | ||
} | ||
else if (this._state === 53 /* InNamedEntity */ && !this._xmlMode) { | ||
this._parseLegacyEntity(); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} | ||
else if (this._state === 54 /* InNumericEntity */ && !this._xmlMode) { | ||
this._decodeNumericEntity(2, 10); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} | ||
else if (this._state === 55 /* InHexEntity */ && !this._xmlMode) { | ||
this._decodeNumericEntity(3, 16); | ||
if (this._sectionStart < this._index) { | ||
this._state = this._baseState; | ||
this._handleTrailingData(); | ||
} | ||
} | ||
else if (this._state !== 3 /* InTagName */ && | ||
this._state !== 8 /* BeforeAttributeName */ && | ||
this._state !== 11 /* BeforeAttributeValue */ && | ||
this._state !== 10 /* AfterAttributeName */ && | ||
this._state !== 9 /* InAttributeName */ && | ||
this._state !== 13 /* InAttributeValueSq */ && | ||
this._state !== 12 /* InAttributeValueDq */ && | ||
this._state !== 14 /* InAttributeValueNq */ && | ||
this._state !== 6 /* InClosingTagName */) { | ||
this._cbs.ontext(data); | ||
} | ||
//else, ignore remaining data | ||
//TODO add a way to remove current tag | ||
}; | ||
Tokenizer.prototype.getAbsoluteIndex = function () { | ||
return this._bufferOffset + this._index; | ||
}; | ||
Tokenizer.prototype._getSection = function () { | ||
return this._buffer.substring(this._sectionStart, this._index); | ||
}; | ||
Tokenizer.prototype._emitToken = function (name) { | ||
this._cbs[name](this._getSection()); | ||
this._sectionStart = -1; | ||
}; | ||
Tokenizer.prototype._emitPartial = function (value) { | ||
if (this._baseState !== 1 /* Text */) { | ||
this._cbs.onattribdata(value); //TODO implement the new event | ||
} | ||
else { | ||
this._cbs.ontext(value); | ||
} | ||
}; | ||
return Tokenizer; | ||
}()); | ||
exports.default = Tokenizer; |
@@ -1,25 +0,48 @@ | ||
module.exports = Stream; | ||
var Parser = require("./Parser.js"); | ||
var WritableStream = require("readable-stream").Writable; | ||
var StringDecoder = require("string_decoder").StringDecoder; | ||
var Buffer = require("buffer").Buffer; | ||
function Stream(cbs, options) { | ||
var parser = (this._parser = new Parser(cbs, options)); | ||
var decoder = (this._decoder = new StringDecoder()); | ||
WritableStream.call(this, { decodeStrings: false }); | ||
this.once("finish", function() { | ||
parser.end(decoder.end()); | ||
}); | ||
"use strict"; | ||
var __extends = (this && this.__extends) || (function () { | ||
var extendStatics = function (d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
return function (d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
})(); | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var Parser_1 = require("./Parser"); | ||
var stream_1 = require("stream"); | ||
var string_decoder_1 = require("string_decoder"); | ||
// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream | ||
function isBuffer(_chunk, encoding) { | ||
return encoding === "buffer"; | ||
} | ||
require("inherits")(Stream, WritableStream); | ||
Stream.prototype._write = function(chunk, encoding, cb) { | ||
if (chunk instanceof Buffer) chunk = this._decoder.write(chunk); | ||
this._parser.write(chunk); | ||
cb(); | ||
}; | ||
/** | ||
* WritableStream makes the `Parser` interface available as a NodeJS stream. | ||
* | ||
* @see Parser | ||
*/ | ||
var WritableStream = /** @class */ (function (_super) { | ||
__extends(WritableStream, _super); | ||
function WritableStream(cbs, options) { | ||
var _this = _super.call(this, { decodeStrings: false }) || this; | ||
_this._decoder = new string_decoder_1.StringDecoder(); | ||
_this._parser = new Parser_1.Parser(cbs, options); | ||
return _this; | ||
} | ||
WritableStream.prototype._write = function (chunk, encoding, cb) { | ||
if (isBuffer(chunk, encoding)) | ||
chunk = this._decoder.write(chunk); | ||
this._parser.write(chunk); | ||
cb(); | ||
}; | ||
WritableStream.prototype._final = function (cb) { | ||
this._parser.end(this._decoder.end()); | ||
cb(); | ||
}; | ||
return WritableStream; | ||
}(stream_1.Writable)); | ||
exports.WritableStream = WritableStream; |
{ | ||
"name": "htmlparser2", | ||
"description": "Fast & forgiving HTML/XML/RSS parser", | ||
"version": "3.10.1", | ||
"version": "4.0.0", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
@@ -20,6 +20,2 @@ "keywords": [ | ||
}, | ||
"bugs": { | ||
"mail": "me@feedic.com", | ||
"url": "http://github.com/fb55/htmlparser2/issues" | ||
}, | ||
"directories": { | ||
@@ -30,27 +26,37 @@ "lib": "lib/" | ||
"files": [ | ||
"lib" | ||
"lib/**/*" | ||
], | ||
"browser": { | ||
"./lib/WritableStream.js": false | ||
}, | ||
"scripts": { | ||
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec", | ||
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)", | ||
"test": "mocha && npm run lint", | ||
"lint": "eslint lib test" | ||
"test": "jest --coverage -u && npm run lint", | ||
"coverage": "cat coverage/lcov.info | coveralls", | ||
"lint": "eslint src/**/*.ts", | ||
"format": "prettier --write '**/*.{ts,md,json}'", | ||
"build": "tsc", | ||
"prepare": "npm run build" | ||
}, | ||
"dependencies": { | ||
"domelementtype": "^1.3.1", | ||
"domhandler": "^2.3.0", | ||
"domutils": "^1.5.1", | ||
"entities": "^1.1.1", | ||
"inherits": "^2.0.1", | ||
"readable-stream": "^3.1.1" | ||
"domelementtype": "^2.0.1", | ||
"domhandler": "^3.0.0", | ||
"domutils": "^2.0.0", | ||
"entities": "^2.0.0" | ||
}, | ||
"devDependencies": { | ||
"@types/jest": "^24.0.16", | ||
"@types/node": "^12.6.8", | ||
"@typescript-eslint/eslint-plugin": "^1.13.0", | ||
"@typescript-eslint/parser": "^1.13.0", | ||
"coveralls": "^3.0.1", | ||
"eslint": "^5.13.0", | ||
"istanbul": "^0.4.3", | ||
"mocha": "^5.2.0", | ||
"mocha-lcov-reporter": "^1.2.0" | ||
"eslint": "^6.0.0", | ||
"eslint-config-prettier": "^6.0.0", | ||
"jest": "^24.8.0", | ||
"prettier": "^1.18.2", | ||
"ts-jest": "^24.0.2", | ||
"typescript": "^3.5.3" | ||
}, | ||
"browser": { | ||
"readable-stream": false | ||
"jest": { | ||
"preset": "ts-jest", | ||
"testEnvironment": "node" | ||
}, | ||
@@ -57,0 +63,0 @@ "license": "MIT", |
@@ -8,7 +8,9 @@ # htmlparser2 | ||
A forgiving HTML/XML/RSS parser. The parser can handle streams and provides a callback interface. | ||
A forgiving HTML/XML/RSS parser. | ||
The parser can handle streams and provides a callback interface. | ||
## Installation | ||
npm install htmlparser2 | ||
npm install htmlparser2 | ||
A live demo of htmlparser2 is available [here](https://astexplorer.net/#/2AmVrGuGVJ). | ||
@@ -19,19 +21,24 @@ | ||
```javascript | ||
var htmlparser = require("htmlparser2"); | ||
var parser = new htmlparser.Parser({ | ||
onopentag: function(name, attribs){ | ||
if(name === "script" && attribs.type === "text/javascript"){ | ||
console.log("JS! Hooray!"); | ||
} | ||
}, | ||
ontext: function(text){ | ||
console.log("-->", text); | ||
}, | ||
onclosetag: function(tagname){ | ||
if(tagname === "script"){ | ||
console.log("That's it?!"); | ||
} | ||
} | ||
}, {decodeEntities: true}); | ||
parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>"); | ||
const htmlparser2 = require("htmlparser2"); | ||
const parser = new htmlparser2.Parser( | ||
{ | ||
onopentag(name, attribs) { | ||
if (name === "script" && attribs.type === "text/javascript") { | ||
console.log("JS! Hooray!"); | ||
} | ||
}, | ||
ontext(text) { | ||
console.log("-->", text); | ||
}, | ||
onclosetag(tagname) { | ||
if (tagname === "script") { | ||
console.log("That's it?!"); | ||
} | ||
} | ||
}, | ||
{ decodeEntities: true } | ||
); | ||
parser.write( | ||
"Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>" | ||
); | ||
parser.end(); | ||
@@ -54,2 +61,3 @@ ``` | ||
## Get a DOM | ||
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper. | ||
@@ -62,12 +70,10 @@ | ||
```javascript | ||
new htmlparser.FeedHandler(function(<error> error, <object> feed){ | ||
... | ||
}); | ||
const feed = htmlparser2.parseFeed(content, options); | ||
``` | ||
Note: While the provided feed handler works for most feeds, you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained. | ||
Note: While the provided feed handler works for most feeds, you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained. | ||
## Performance | ||
After having some artificial benchmarks for some time, __@AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites. | ||
After having some artificial benchmarks for some time, **@AndreasMadsen** published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites. | ||
@@ -91,6 +97,16 @@ At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests): | ||
This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore. | ||
This module started as a fork of the `htmlparser` module. | ||
The main difference is that `htmlparser2` is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). | ||
`htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore. | ||
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). As a result, old handlers won't work anymore. | ||
The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). | ||
As a result, old handlers won't work anymore. | ||
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, your code should work as expected. | ||
## Security contact information | ||
To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). | ||
Tidelift will coordinate the fix and disclosure. | ||
[Get supported htmlparser2 with the Tidelift Subscription](https://tidelift.com/subscription/pkg/npm-htmlparser2?utm_source=npm-htmlparser2&utm_medium=referral&utm_campaign=readme) |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
90614
4
24
2030
108
11
1
8
+ Addeddom-serializer@1.4.1(transitive)
+ Addeddomhandler@3.3.04.3.1(transitive)
+ Addeddomutils@2.8.0(transitive)
- Removedinherits@^2.0.1
- Removedreadable-stream@^3.1.1
- Removeddom-serializer@0.2.2(transitive)
- Removeddomelementtype@1.3.1(transitive)
- Removeddomhandler@2.4.2(transitive)
- Removeddomutils@1.7.0(transitive)
- Removedentities@1.1.2(transitive)
- Removedinherits@2.0.4(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedutil-deprecate@1.0.2(transitive)
Updateddomelementtype@^2.0.1
Updateddomhandler@^3.0.0
Updateddomutils@^2.0.0
Updatedentities@^2.0.0