Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

htmlparser2

Package Overview
Dependencies
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

htmlparser2 - npm Package Compare versions

Comparing version 3.10.1 to 4.0.0

lib/CollectingHandler.d.ts

110

lib/CollectingHandler.js

@@ -1,57 +0,59 @@

module.exports = CollectingHandler;
function CollectingHandler(cbs) {
this._cbs = cbs || {};
this.events = [];
}
var EVENTS = require("./").EVENTS;
Object.keys(EVENTS).forEach(function(name) {
if (EVENTS[name] === 0) {
name = "on" + name;
CollectingHandler.prototype[name] = function() {
this.events.push([name]);
if (this._cbs[name]) this._cbs[name]();
};
} else if (EVENTS[name] === 1) {
name = "on" + name;
CollectingHandler.prototype[name] = function(a) {
this.events.push([name, a]);
if (this._cbs[name]) this._cbs[name](a);
};
} else if (EVENTS[name] === 2) {
name = "on" + name;
CollectingHandler.prototype[name] = function(a, b) {
this.events.push([name, a, b]);
if (this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});
CollectingHandler.prototype.onreset = function() {
this.events = [];
if (this._cbs.onreset) this._cbs.onreset();
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
CollectingHandler.prototype.restart = function() {
if (this._cbs.onreset) this._cbs.onreset();
for (var i = 0, len = this.events.length; i < len; i++) {
if (this._cbs[this.events[i][0]]) {
var num = this.events[i].length;
if (num === 1) {
this._cbs[this.events[i][0]]();
} else if (num === 2) {
this._cbs[this.events[i][0]](this.events[i][1]);
} else {
this._cbs[this.events[i][0]](
this.events[i][1],
this.events[i][2]
);
Object.defineProperty(exports, "__esModule", { value: true });
var MultiplexHandler_1 = __importDefault(require("./MultiplexHandler"));
var CollectingHandler = /** @class */ (function (_super) {
__extends(CollectingHandler, _super);
function CollectingHandler(cbs) {
if (cbs === void 0) { cbs = {}; }
var _this = _super.call(this, function (name) {
var _a;
var args = [];
for (var _i = 1; _i < arguments.length; _i++) {
args[_i - 1] = arguments[_i];
}
_this.events.push([name].concat(args));
// @ts-ignore
if (_this._cbs[name])
(_a = _this._cbs)[name].apply(_a, args);
}) || this;
_this._cbs = cbs;
_this.events = [];
return _this;
}
CollectingHandler.prototype.onreset = function () {
this.events = [];
if (this._cbs.onreset)
this._cbs.onreset();
};
CollectingHandler.prototype.restart = function () {
var _a;
if (this._cbs.onreset)
this._cbs.onreset();
for (var i = 0; i < this.events.length; i++) {
var _b = this.events[i], name_1 = _b[0], args = _b.slice(1);
if (!this._cbs[name_1]) {
continue;
}
// @ts-ignore
(_a = this._cbs)[name_1].apply(_a, args);
}
}
};
};
return CollectingHandler;
}(MultiplexHandler_1.default));
exports.CollectingHandler = CollectingHandler;

@@ -1,13 +0,119 @@

var DomHandler = require("domhandler");
var DomUtils = require("domutils");
//TODO: make this a streamable handler
function FeedHandler(callback, options) {
this.init(callback, options);
}
require("inherits")(FeedHandler, DomHandler);
FeedHandler.prototype.init = DomHandler;
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k];
result["default"] = mod;
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
var domhandler_1 = __importDefault(require("domhandler"));
var DomUtils = __importStar(require("domutils"));
var Parser_1 = require("./Parser");
//TODO: Consume data as it is coming in
var FeedHandler = /** @class */ (function (_super) {
__extends(FeedHandler, _super);
/**
*
* @param callback
* @param options
*/
function FeedHandler(callback, options) {
var _this = this;
if (typeof callback === "object" && callback !== null) {
callback = undefined;
options = callback;
}
_this = _super.call(this, callback, options) || this;
return _this;
}
FeedHandler.prototype.onend = function () {
var feed = {};
var feedRoot = getOneElement(isValidFeed, this.dom);
if (feedRoot) {
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (href) {
feed.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
entry.link = href;
}
var description = fetch("summary", children) ||
fetch("content", children);
if (description) {
entry.description = description;
}
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
return entry;
});
}
else {
var childs = getOneElement("channel", feedRoot.children)
.children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
return entry;
});
}
}
this.feed = feed;
this.handleCallback(feedRoot ? null : Error("couldn't find root of feed"));
};
return FeedHandler;
}(domhandler_1.default));
exports.FeedHandler = FeedHandler;
function getElements(what, where) {

@@ -20,96 +126,35 @@ return DomUtils.getElementsByTagName(what, where, true);

function fetch(what, where, recurse) {
return DomUtils.getText(
DomUtils.getElementsByTagName(what, where, recurse, 1)
).trim();
if (recurse === void 0) { recurse = false; }
return DomUtils.getText(DomUtils.getElementsByTagName(what, where, recurse, 1)).trim();
}
function getAttribute(name, elem) {
if (!elem) {
return null;
}
var attribs = elem.attribs;
return attribs[name];
}
function addConditionally(obj, prop, what, where, recurse) {
if (recurse === void 0) { recurse = false; }
var tmp = fetch(what, where, recurse);
if (tmp) obj[prop] = tmp;
// @ts-ignore
if (tmp)
obj[prop] = tmp;
}
var isValidFeed = function(value) {
function isValidFeed(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
};
FeedHandler.prototype.onend = function() {
var feed = {},
feedRoot = getOneElement(isValidFeed, this.dom),
tmp,
childs;
if (feedRoot) {
if (feedRoot.name === "feed") {
childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
if (
(tmp = getOneElement("link", childs)) &&
(tmp = tmp.attribs) &&
(tmp = tmp.href)
)
feed.link = tmp;
addConditionally(feed, "description", "subtitle", childs);
if ((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function(item) {
var entry = {},
tmp;
item = item.children;
addConditionally(entry, "id", "id", item);
addConditionally(entry, "title", "title", item);
if (
(tmp = getOneElement("link", item)) &&
(tmp = tmp.attribs) &&
(tmp = tmp.href)
)
entry.link = tmp;
if ((tmp = fetch("summary", item) || fetch("content", item)))
entry.description = tmp;
if ((tmp = fetch("updated", item)))
entry.pubDate = new Date(tmp);
return entry;
});
} else {
childs = getOneElement("channel", feedRoot.children).children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
if ((tmp = fetch("lastBuildDate", childs)))
feed.updated = new Date(tmp);
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function(
item
) {
var entry = {},
tmp;
item = item.children;
addConditionally(entry, "id", "guid", item);
addConditionally(entry, "title", "title", item);
addConditionally(entry, "link", "link", item);
addConditionally(entry, "description", "description", item);
if ((tmp = fetch("pubDate", item)))
entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this,
feedRoot ? null : Error("couldn't find root of feed")
);
};
module.exports = FeedHandler;
}
var defaultOptions = { xmlMode: true };
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this option, you probably want to set `xmlMode` to `true`.
*/
function parseFeed(feed, options) {
if (options === void 0) { options = defaultOptions; }
var handler = new FeedHandler(options);
new Parser_1.Parser(handler, options).end(feed);
return handler.feed;
}
exports.parseFeed = parseFeed;

@@ -1,72 +0,76 @@

var Parser = require("./Parser.js");
var DomHandler = require("domhandler");
function defineProp(name, value) {
delete module.exports[name];
module.exports[name] = value;
return value;
"use strict";
function __export(m) {
for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p];
}
module.exports = {
Parser: Parser,
Tokenizer: require("./Tokenizer.js"),
ElementType: require("domelementtype"),
DomHandler: DomHandler,
get FeedHandler() {
return defineProp("FeedHandler", require("./FeedHandler.js"));
},
get Stream() {
return defineProp("Stream", require("./Stream.js"));
},
get WritableStream() {
return defineProp("WritableStream", require("./WritableStream.js"));
},
get ProxyHandler() {
return defineProp("ProxyHandler", require("./ProxyHandler.js"));
},
get DomUtils() {
return defineProp("DomUtils", require("domutils"));
},
get CollectingHandler() {
return defineProp(
"CollectingHandler",
require("./CollectingHandler.js")
);
},
// For legacy support
DefaultHandler: DomHandler,
get RssHandler() {
return defineProp("RssHandler", this.FeedHandler);
},
//helper methods
parseDOM: function(data, options) {
var handler = new DomHandler(options);
new Parser(handler, options).end(data);
return handler.dom;
},
parseFeed: function(feed, options) {
var handler = new module.exports.FeedHandler(options);
new Parser(handler, options).end(feed);
return handler.dom;
},
createDomStream: function(cb, options, elementCb) {
var handler = new DomHandler(cb, options, elementCb);
return new Parser(handler, options);
},
// List of all events that the parser emits
EVENTS: {
/* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k];
result["default"] = mod;
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
var Parser_1 = require("./Parser");
exports.Parser = Parser_1.Parser;
var domhandler_1 = require("domhandler");
exports.DomHandler = domhandler_1.DomHandler;
exports.DefaultHandler = domhandler_1.DomHandler;
// Helper methods
/**
* Parses data, returns the resulting DOM.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
*/
function parseDOM(data, options) {
var handler = new domhandler_1.DomHandler(void 0, options);
new Parser_1.Parser(handler, options).end(data);
return handler.dom;
}
exports.parseDOM = parseDOM;
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param cb A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
function createDomStream(cb, options, elementCb) {
var handler = new domhandler_1.DomHandler(cb, options, elementCb);
return new Parser_1.Parser(handler, options);
}
exports.createDomStream = createDomStream;
var Tokenizer_1 = require("./Tokenizer");
exports.Tokenizer = Tokenizer_1.default;
var ElementType = __importStar(require("domelementtype"));
exports.ElementType = ElementType;
/**
* List of all events that the parser emits.
*
* Format: eventname: number of arguments.
*/
exports.EVENTS = {
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
};
/*
All of the following exports exist for backwards-compatibility.
They should probably be removed eventually.
*/
__export(require("./FeedHandler"));
__export(require("./WritableStream"));
__export(require("./CollectingHandler"));
var DomUtils = __importStar(require("domutils"));
exports.DomUtils = DomUtils;
var FeedHandler_1 = require("./FeedHandler");
exports.RssHandler = FeedHandler_1.FeedHandler;

@@ -1,49 +0,44 @@

var Tokenizer = require("./Tokenizer.js");
/*
Options:
xmlMode: Disables the special behavior for script/style tags (false by default)
lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
*/
/*
Callbacks:
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onprocessinginstruction,
onreset,
ontext
*/
var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var Tokenizer_1 = __importDefault(require("./Tokenizer"));
var events_1 = require("events");
var formTags = new Set([
"input",
"option",
"optgroup",
"select",
"button",
"datalist",
"textarea"
]);
var pTag = new Set(["p"]);
var openImpliesClose = {
tr: { tr: true, th: true, td: true },
th: { th: true },
td: { thead: true, th: true, td: true },
body: { head: true, link: true, script: true },
li: { li: true },
p: { p: true },
h1: { p: true },
h2: { p: true },
h3: { p: true },
h4: { p: true },
h5: { p: true },
h6: { p: true },
tr: new Set(["tr", "th", "td"]),
th: new Set(["th"]),
td: new Set(["thead", "th", "td"]),
body: new Set(["head", "link", "script"]),
li: new Set(["li"]),
p: pTag,
h1: pTag,
h2: pTag,
h3: pTag,
h4: pTag,
h5: pTag,
h6: pTag,
select: formTags,

@@ -55,329 +50,322 @@ input: formTags,

textarea: formTags,
option: { option: true },
optgroup: { optgroup: true }
option: new Set(["option"]),
optgroup: new Set(["optgroup", "option"]),
dd: new Set(["dt", "dd"]),
dt: new Set(["dt", "dd"]),
address: pTag,
article: pTag,
aside: pTag,
blockquote: pTag,
details: pTag,
div: pTag,
dl: pTag,
fieldset: pTag,
figcaption: pTag,
figure: pTag,
footer: pTag,
form: pTag,
header: pTag,
hr: pTag,
main: pTag,
nav: pTag,
ol: pTag,
pre: pTag,
section: pTag,
table: pTag,
ul: pTag,
rt: new Set(["rt", "rp"]),
rp: new Set(["rt", "rp"]),
tbody: new Set(["thead", "tbody"]),
tfoot: new Set(["thead", "tbody"])
};
var voidElements = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
command: true,
embed: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
keygen: true,
link: true,
meta: true,
param: true,
source: true,
track: true,
wbr: true
};
var foreignContextElements = {
__proto__: null,
math: true,
svg: true
};
var htmlIntegrationElements = {
__proto__: null,
mi: true,
mo: true,
mn: true,
ms: true,
mtext: true,
"annotation-xml": true,
foreignObject: true,
desc: true,
title: true
};
var re_nameEnd = /\s|\//;
function Parser(cbs, options) {
this._options = options || {};
this._cbs = cbs || {};
this._tagname = "";
this._attribname = "";
this._attribvalue = "";
this._attribs = null;
this._stack = [];
this._foreignContext = [];
this.startIndex = 0;
this.endIndex = null;
this._lowerCaseTagNames =
"lowerCaseTags" in this._options
? !!this._options.lowerCaseTags
: !this._options.xmlMode;
this._lowerCaseAttributeNames =
"lowerCaseAttributeNames" in this._options
? !!this._options.lowerCaseAttributeNames
: !this._options.xmlMode;
if (this._options.Tokenizer) {
Tokenizer = this._options.Tokenizer;
var voidElements = new Set([
"area",
"base",
"basefont",
"br",
"col",
"command",
"embed",
"frame",
"hr",
"img",
"input",
"isindex",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
]);
var foreignContextElements = new Set(["math", "svg"]);
var htmlIntegrationElements = new Set([
"mi",
"mo",
"mn",
"ms",
"mtext",
"annotation-xml",
"foreignObject",
"desc",
"title"
]);
var reNameEnd = /\s|\//;
var Parser = /** @class */ (function (_super) {
__extends(Parser, _super);
function Parser(cbs, options) {
var _this = _super.call(this) || this;
_this._tagname = "";
_this._attribname = "";
_this._attribvalue = "";
_this._attribs = null;
_this._stack = [];
_this._foreignContext = [];
_this.startIndex = 0;
_this.endIndex = null;
// Aliases for backwards compatibility
_this.parseChunk = Parser.prototype.write;
_this.done = Parser.prototype.end;
_this._options = options || {};
_this._cbs = cbs || {};
_this._tagname = "";
_this._attribname = "";
_this._attribvalue = "";
_this._attribs = null;
_this._stack = [];
_this._foreignContext = [];
_this.startIndex = 0;
_this.endIndex = null;
_this._lowerCaseTagNames =
"lowerCaseTags" in _this._options
? !!_this._options.lowerCaseTags
: !_this._options.xmlMode;
_this._lowerCaseAttributeNames =
"lowerCaseAttributeNames" in _this._options
? !!_this._options.lowerCaseAttributeNames
: !_this._options.xmlMode;
_this._tokenizer = new (_this._options.Tokenizer || Tokenizer_1.default)(_this._options, _this);
if (_this._cbs.onparserinit)
_this._cbs.onparserinit(_this);
return _this;
}
this._tokenizer = new Tokenizer(this._options, this);
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
}
require("inherits")(Parser, require("events").EventEmitter);
Parser.prototype._updatePosition = function(initialOffset) {
if (this.endIndex === null) {
if (this._tokenizer._sectionStart <= initialOffset) {
this.startIndex = 0;
} else {
this.startIndex = this._tokenizer._sectionStart - initialOffset;
Parser.prototype._updatePosition = function (initialOffset) {
if (this.endIndex === null) {
if (this._tokenizer._sectionStart <= initialOffset) {
this.startIndex = 0;
}
else {
this.startIndex = this._tokenizer._sectionStart - initialOffset;
}
}
} else this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer.getAbsoluteIndex();
};
//Tokenizer event handlers
Parser.prototype.ontext = function(data) {
this._updatePosition(1);
this.endIndex--;
if (this._cbs.ontext) this._cbs.ontext(data);
};
Parser.prototype.onopentagname = function(name) {
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
this._tagname = name;
if (!this._options.xmlMode && name in openImpliesClose) {
for (
var el;
(el = this._stack[this._stack.length - 1]) in
openImpliesClose[name];
this.onclosetag(el)
);
}
if (this._options.xmlMode || !(name in voidElements)) {
this._stack.push(name);
if (name in foreignContextElements) this._foreignContext.push(true);
else if (name in htmlIntegrationElements)
this._foreignContext.push(false);
}
if (this._cbs.onopentagname) this._cbs.onopentagname(name);
if (this._cbs.onopentag) this._attribs = {};
};
Parser.prototype.onopentagend = function() {
this._updatePosition(1);
if (this._attribs) {
else
this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer.getAbsoluteIndex();
};
//Tokenizer event handlers
Parser.prototype.ontext = function (data) {
this._updatePosition(1);
// @ts-ignore
this.endIndex--;
if (this._cbs.ontext)
this._cbs.ontext(data);
};
Parser.prototype.onopentagname = function (name) {
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
this._tagname = name;
if (!this._options.xmlMode && name in openImpliesClose) {
for (var el = void 0;
// @ts-ignore
openImpliesClose[name].has((el = this._stack[this._stack.length - 1])); this.onclosetag(el))
;
}
if (this._options.xmlMode || !voidElements.has(name)) {
this._stack.push(name);
if (foreignContextElements.has(name)) {
this._foreignContext.push(true);
}
else if (htmlIntegrationElements.has(name)) {
this._foreignContext.push(false);
}
}
if (this._cbs.onopentagname)
this._cbs.onopentagname(name);
if (this._cbs.onopentag)
this._cbs.onopentag(this._tagname, this._attribs);
this._attribs = null;
}
if (
!this._options.xmlMode &&
this._cbs.onclosetag &&
this._tagname in voidElements
) {
this._cbs.onclosetag(this._tagname);
}
this._tagname = "";
};
Parser.prototype.onclosetag = function(name) {
this._updatePosition(1);
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
if (name in foreignContextElements || name in htmlIntegrationElements) {
this._foreignContext.pop();
}
if (
this._stack.length &&
(!(name in voidElements) || this._options.xmlMode)
) {
var pos = this._stack.lastIndexOf(name);
if (pos !== -1) {
if (this._cbs.onclosetag) {
pos = this._stack.length - pos;
while (pos--) this._cbs.onclosetag(this._stack.pop());
} else this._stack.length = pos;
} else if (name === "p" && !this._options.xmlMode) {
this._attribs = {};
};
Parser.prototype.onopentagend = function () {
this._updatePosition(1);
if (this._attribs) {
if (this._cbs.onopentag) {
this._cbs.onopentag(this._tagname, this._attribs);
}
this._attribs = null;
}
if (!this._options.xmlMode &&
this._cbs.onclosetag &&
voidElements.has(this._tagname)) {
this._cbs.onclosetag(this._tagname);
}
this._tagname = "";
};
Parser.prototype.onclosetag = function (name) {
this._updatePosition(1);
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
if (foreignContextElements.has(name) ||
htmlIntegrationElements.has(name)) {
this._foreignContext.pop();
}
if (this._stack.length &&
(this._options.xmlMode || !voidElements.has(name))) {
var pos = this._stack.lastIndexOf(name);
if (pos !== -1) {
if (this._cbs.onclosetag) {
pos = this._stack.length - pos;
// @ts-ignore
while (pos--)
this._cbs.onclosetag(this._stack.pop());
}
else
this._stack.length = pos;
}
else if (name === "p" && !this._options.xmlMode) {
this.onopentagname(name);
this._closeCurrentTag();
}
}
else if (!this._options.xmlMode && (name === "br" || name === "p")) {
this.onopentagname(name);
this._closeCurrentTag();
}
} else if (!this._options.xmlMode && (name === "br" || name === "p")) {
this.onopentagname(name);
this._closeCurrentTag();
}
};
Parser.prototype.onselfclosingtag = function() {
if (
this._options.xmlMode ||
this._options.recognizeSelfClosing ||
this._foreignContext[this._foreignContext.length - 1]
) {
this._closeCurrentTag();
} else {
};
Parser.prototype.onselfclosingtag = function () {
if (this._options.xmlMode ||
this._options.recognizeSelfClosing ||
this._foreignContext[this._foreignContext.length - 1]) {
this._closeCurrentTag();
}
else {
this.onopentagend();
}
};
Parser.prototype._closeCurrentTag = function () {
var name = this._tagname;
this.onopentagend();
}
};
Parser.prototype._closeCurrentTag = function() {
var name = this._tagname;
this.onopentagend();
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if (this._stack[this._stack.length - 1] === name) {
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if (this._stack[this._stack.length - 1] === name) {
if (this._cbs.onclosetag) {
this._cbs.onclosetag(name);
}
this._stack.pop();
}
};
Parser.prototype.onattribname = function (name) {
if (this._lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this._attribname = name;
};
Parser.prototype.onattribdata = function (value) {
this._attribvalue += value;
};
Parser.prototype.onattribend = function () {
if (this._cbs.onattribute)
this._cbs.onattribute(this._attribname, this._attribvalue);
if (this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)) {
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
};
Parser.prototype._getInstructionName = function (value) {
var idx = value.search(reNameEnd);
var name = idx < 0 ? value : value.substr(0, idx);
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
return name;
};
Parser.prototype.ondeclaration = function (value) {
if (this._cbs.onprocessinginstruction) {
var name_1 = this._getInstructionName(value);
this._cbs.onprocessinginstruction("!" + name_1, "!" + value);
}
};
Parser.prototype.onprocessinginstruction = function (value) {
if (this._cbs.onprocessinginstruction) {
var name_2 = this._getInstructionName(value);
this._cbs.onprocessinginstruction("?" + name_2, "?" + value);
}
};
Parser.prototype.oncomment = function (value) {
this._updatePosition(4);
if (this._cbs.oncomment)
this._cbs.oncomment(value);
if (this._cbs.oncommentend)
this._cbs.oncommentend();
};
Parser.prototype.oncdata = function (value) {
this._updatePosition(1);
if (this._options.xmlMode || this._options.recognizeCDATA) {
if (this._cbs.oncdatastart)
this._cbs.oncdatastart();
if (this._cbs.ontext)
this._cbs.ontext(value);
if (this._cbs.oncdataend)
this._cbs.oncdataend();
}
else {
this.oncomment("[CDATA[" + value + "]]");
}
};
Parser.prototype.onerror = function (err) {
if (this._cbs.onerror)
this._cbs.onerror(err);
};
Parser.prototype.onend = function () {
if (this._cbs.onclosetag) {
this._cbs.onclosetag(name);
for (var i = this._stack.length; i > 0; this._cbs.onclosetag(this._stack[--i]))
;
}
this._stack.pop();
}
};
Parser.prototype.onattribname = function(name) {
if (this._lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this._attribname = name;
};
Parser.prototype.onattribdata = function(value) {
this._attribvalue += value;
};
Parser.prototype.onattribend = function() {
if (this._cbs.onattribute)
this._cbs.onattribute(this._attribname, this._attribvalue);
if (
this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
) {
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
};
Parser.prototype._getInstructionName = function(value) {
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
if (this._lowerCaseTagNames) {
name = name.toLowerCase();
}
return name;
};
Parser.prototype.ondeclaration = function(value) {
if (this._cbs.onprocessinginstruction) {
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("!" + name, "!" + value);
}
};
Parser.prototype.onprocessinginstruction = function(value) {
if (this._cbs.onprocessinginstruction) {
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("?" + name, "?" + value);
}
};
Parser.prototype.oncomment = function(value) {
this._updatePosition(4);
if (this._cbs.oncomment) this._cbs.oncomment(value);
if (this._cbs.oncommentend) this._cbs.oncommentend();
};
Parser.prototype.oncdata = function(value) {
this._updatePosition(1);
if (this._options.xmlMode || this._options.recognizeCDATA) {
if (this._cbs.oncdatastart) this._cbs.oncdatastart();
if (this._cbs.ontext) this._cbs.ontext(value);
if (this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
}
};
Parser.prototype.onerror = function(err) {
if (this._cbs.onerror) this._cbs.onerror(err);
};
Parser.prototype.onend = function() {
if (this._cbs.onclosetag) {
for (
var i = this._stack.length;
i > 0;
this._cbs.onclosetag(this._stack[--i])
);
}
if (this._cbs.onend) this._cbs.onend();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function() {
if (this._cbs.onreset) this._cbs.onreset();
this._tokenizer.reset();
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
};
//Parses a complete HTML document and pushes it to the handler
Parser.prototype.parseComplete = function(data) {
this.reset();
this.end(data);
};
Parser.prototype.write = function(chunk) {
this._tokenizer.write(chunk);
};
Parser.prototype.end = function(chunk) {
this._tokenizer.end(chunk);
};
Parser.prototype.pause = function() {
this._tokenizer.pause();
};
Parser.prototype.resume = function() {
this._tokenizer.resume();
};
//alias for backwards compat
Parser.prototype.parseChunk = Parser.prototype.write;
Parser.prototype.done = Parser.prototype.end;
module.exports = Parser;
if (this._cbs.onend)
this._cbs.onend();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function () {
if (this._cbs.onreset)
this._cbs.onreset();
this._tokenizer.reset();
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
if (this._cbs.onparserinit)
this._cbs.onparserinit(this);
};
//Parses a complete HTML document and pushes it to the handler
Parser.prototype.parseComplete = function (data) {
this.reset();
this.end(data);
};
Parser.prototype.write = function (chunk) {
this._tokenizer.write(chunk);
};
Parser.prototype.end = function (chunk) {
this._tokenizer.end(chunk);
};
Parser.prototype.pause = function () {
this._tokenizer.pause();
};
Parser.prototype.resume = function () {
this._tokenizer.resume();
};
return Parser;
}(events_1.EventEmitter));
exports.Parser = Parser;

@@ -1,970 +0,859 @@

module.exports = Tokenizer;
var decodeCodePoint = require("entities/lib/decode_codepoint.js");
var entityMap = require("entities/maps/entities.json");
var legacyMap = require("entities/maps/legacy.json");
var xmlMap = require("entities/maps/xml.json");
var i = 0;
var TEXT = i++;
var BEFORE_TAG_NAME = i++; //after <
var IN_TAG_NAME = i++;
var IN_SELF_CLOSING_TAG = i++;
var BEFORE_CLOSING_TAG_NAME = i++;
var IN_CLOSING_TAG_NAME = i++;
var AFTER_CLOSING_TAG_NAME = i++;
//attributes
var BEFORE_ATTRIBUTE_NAME = i++;
var IN_ATTRIBUTE_NAME = i++;
var AFTER_ATTRIBUTE_NAME = i++;
var BEFORE_ATTRIBUTE_VALUE = i++;
var IN_ATTRIBUTE_VALUE_DQ = i++; // "
var IN_ATTRIBUTE_VALUE_SQ = i++; // '
var IN_ATTRIBUTE_VALUE_NQ = i++;
//declarations
var BEFORE_DECLARATION = i++; // !
var IN_DECLARATION = i++;
//processing instructions
var IN_PROCESSING_INSTRUCTION = i++; // ?
//comments
var BEFORE_COMMENT = i++;
var IN_COMMENT = i++;
var AFTER_COMMENT_1 = i++;
var AFTER_COMMENT_2 = i++;
//cdata
var BEFORE_CDATA_1 = i++; // [
var BEFORE_CDATA_2 = i++; // C
var BEFORE_CDATA_3 = i++; // D
var BEFORE_CDATA_4 = i++; // A
var BEFORE_CDATA_5 = i++; // T
var BEFORE_CDATA_6 = i++; // A
var IN_CDATA = i++; // [
var AFTER_CDATA_1 = i++; // ]
var AFTER_CDATA_2 = i++; // ]
//special tags
var BEFORE_SPECIAL = i++; //S
var BEFORE_SPECIAL_END = i++; //S
var BEFORE_SCRIPT_1 = i++; //C
var BEFORE_SCRIPT_2 = i++; //R
var BEFORE_SCRIPT_3 = i++; //I
var BEFORE_SCRIPT_4 = i++; //P
var BEFORE_SCRIPT_5 = i++; //T
var AFTER_SCRIPT_1 = i++; //C
var AFTER_SCRIPT_2 = i++; //R
var AFTER_SCRIPT_3 = i++; //I
var AFTER_SCRIPT_4 = i++; //P
var AFTER_SCRIPT_5 = i++; //T
var BEFORE_STYLE_1 = i++; //T
var BEFORE_STYLE_2 = i++; //Y
var BEFORE_STYLE_3 = i++; //L
var BEFORE_STYLE_4 = i++; //E
var AFTER_STYLE_1 = i++; //T
var AFTER_STYLE_2 = i++; //Y
var AFTER_STYLE_3 = i++; //L
var AFTER_STYLE_4 = i++; //E
var BEFORE_ENTITY = i++; //&
var BEFORE_NUMERIC_ENTITY = i++; //#
var IN_NAMED_ENTITY = i++;
var IN_NUMERIC_ENTITY = i++;
var IN_HEX_ENTITY = i++; //X
var j = 0;
var SPECIAL_NONE = j++;
var SPECIAL_SCRIPT = j++;
var SPECIAL_STYLE = j++;
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint"));
var entities_json_1 = __importDefault(require("entities/lib/maps/entities.json"));
var legacy_json_1 = __importDefault(require("entities/lib/maps/legacy.json"));
var xml_json_1 = __importDefault(require("entities/lib/maps/xml.json"));
function whitespace(c) {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}
function ifElseState(upper, SUCCESS, FAILURE) {
var lower = upper.toLowerCase();
if (upper === lower) {
return function(c) {
return function (t, c) {
if (c === lower) {
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
t._state = SUCCESS;
}
else {
t._state = FAILURE;
t._index--;
}
};
} else {
return function(c) {
}
else {
return function (t, c) {
if (c === lower || c === upper) {
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
t._state = SUCCESS;
}
else {
t._state = FAILURE;
t._index--;
}
};
}
}
function consumeSpecialNameChar(upper, NEXT_STATE) {
var lower = upper.toLowerCase();
return function(c) {
return function (t, c) {
if (c === lower || c === upper) {
this._state = NEXT_STATE;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
t._state = NEXT_STATE;
}
else {
t._state = 3 /* InTagName */;
t._index--; //consume the token again
}
};
}
function Tokenizer(options, cbs) {
this._state = TEXT;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
this._baseState = TEXT;
this._special = SPECIAL_NONE;
this._cbs = cbs;
this._running = true;
this._ended = false;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
}
Tokenizer.prototype._stateText = function(c) {
if (c === "<") {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
var stateBeforeCdata1 = ifElseState("C", 23 /* BeforeCdata2 */, 16 /* InDeclaration */);
var stateBeforeCdata2 = ifElseState("D", 24 /* BeforeCdata3 */, 16 /* InDeclaration */);
var stateBeforeCdata3 = ifElseState("A", 25 /* BeforeCdata4 */, 16 /* InDeclaration */);
var stateBeforeCdata4 = ifElseState("T", 26 /* BeforeCdata5 */, 16 /* InDeclaration */);
var stateBeforeCdata5 = ifElseState("A", 27 /* BeforeCdata6 */, 16 /* InDeclaration */);
var stateBeforeScript1 = consumeSpecialNameChar("R", 34 /* BeforeScript2 */);
var stateBeforeScript2 = consumeSpecialNameChar("I", 35 /* BeforeScript3 */);
var stateBeforeScript3 = consumeSpecialNameChar("P", 36 /* BeforeScript4 */);
var stateBeforeScript4 = consumeSpecialNameChar("T", 37 /* BeforeScript5 */);
var stateAfterScript1 = ifElseState("R", 39 /* AfterScript2 */, 1 /* Text */);
var stateAfterScript2 = ifElseState("I", 40 /* AfterScript3 */, 1 /* Text */);
var stateAfterScript3 = ifElseState("P", 41 /* AfterScript4 */, 1 /* Text */);
var stateAfterScript4 = ifElseState("T", 42 /* AfterScript5 */, 1 /* Text */);
var stateBeforeStyle1 = consumeSpecialNameChar("Y", 44 /* BeforeStyle2 */);
var stateBeforeStyle2 = consumeSpecialNameChar("L", 45 /* BeforeStyle3 */);
var stateBeforeStyle3 = consumeSpecialNameChar("E", 46 /* BeforeStyle4 */);
var stateAfterStyle1 = ifElseState("Y", 48 /* AfterStyle2 */, 1 /* Text */);
var stateAfterStyle2 = ifElseState("L", 49 /* AfterStyle3 */, 1 /* Text */);
var stateAfterStyle3 = ifElseState("E", 50 /* AfterStyle4 */, 1 /* Text */);
var stateBeforeEntity = ifElseState("#", 52 /* BeforeNumericEntity */, 53 /* InNamedEntity */);
var stateBeforeNumericEntity = ifElseState("X", 55 /* InHexEntity */, 54 /* InNumericEntity */);
var Tokenizer = /** @class */ (function () {
function Tokenizer(options, cbs) {
/** The current state the tokenizer is in. */
this._state = 1 /* Text */;
/** The read buffer. */
this._buffer = "";
/** The beginning of the section that is currently being read. */
this._sectionStart = 0;
/** The index within the buffer that we are currently looking at. */
this._index = 0;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
this._bufferOffset = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
this._baseState = 1 /* Text */;
/** For special parsing behavior inside of script and style tags. */
this._special = 1 /* None */;
/** Indicates whether the tokenizer has been paused. */
this._running = true;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
this._ended = false;
this._cbs = cbs;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
}
Tokenizer.prototype.reset = function () {
this._state = 1 /* Text */;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0;
this._baseState = 1 /* Text */;
this._special = 1 /* None */;
this._running = true;
this._ended = false;
};
Tokenizer.prototype._stateText = function (c) {
if (c === "<") {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._state = 2 /* BeforeTagName */;
this._sectionStart = this._index;
}
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if (
this._decodeEntities &&
this._special === SPECIAL_NONE &&
c === "&"
) {
if (this._index > this._sectionStart) {
else if (this._decodeEntities &&
this._special === 1 /* None */ &&
c === "&") {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._baseState = 1 /* Text */;
this._state = 51 /* BeforeEntity */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeTagName = function (c) {
if (c === "/") {
this._state = 5 /* BeforeClosingTagName */;
}
else if (c === "<") {
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
}
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeTagName = function(c) {
if (c === "/") {
this._state = BEFORE_CLOSING_TAG_NAME;
} else if (c === "<") {
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
} else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
this._state = TEXT;
} else if (c === "!") {
this._state = BEFORE_DECLARATION;
this._sectionStart = this._index + 1;
} else if (c === "?") {
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else {
else if (c === ">" ||
this._special !== 1 /* None */ ||
whitespace(c)) {
this._state = 1 /* Text */;
}
else if (c === "!") {
this._state = 15 /* BeforeDeclaration */;
this._sectionStart = this._index + 1;
}
else if (c === "?") {
this._state = 17 /* InProcessingInstruction */;
this._sectionStart = this._index + 1;
}
else {
this._state =
!this._xmlMode && (c === "s" || c === "S")
? 31 /* BeforeSpecial */
: 3 /* InTagName */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInTagName = function (c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._emitToken("onopentagname");
this._state = 8 /* BeforeAttributeName */;
this._index--;
}
};
Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
if (whitespace(c)) {
// ignore
}
else if (c === ">") {
this._state = 1 /* Text */;
}
else if (this._special !== 1 /* None */) {
if (c === "s" || c === "S") {
this._state = 32 /* BeforeSpecialEnd */;
}
else {
this._state = 1 /* Text */;
this._index--;
}
}
else {
this._state = 6 /* InClosingTagName */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInCloseingTagName = function (c) {
if (c === ">" || whitespace(c)) {
this._emitToken("onclosetag");
this._state = 7 /* AfterClosingTagName */;
this._index--;
}
};
Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
//skip everything until ">"
if (c === ">") {
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeAttributeName = function (c) {
if (c === ">") {
this._cbs.onopentagend();
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
else if (c === "/") {
this._state = 4 /* InSelfClosingTag */;
}
else if (!whitespace(c)) {
this._state = 9 /* InAttributeName */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInSelfClosingTag = function (c) {
if (c === ">") {
this._cbs.onselfclosingtag();
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
else if (!whitespace(c)) {
this._state = 8 /* BeforeAttributeName */;
this._index--;
}
};
Tokenizer.prototype._stateInAttributeName = function (c) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = 10 /* AfterAttributeName */;
this._index--;
}
};
Tokenizer.prototype._stateAfterAttributeName = function (c) {
if (c === "=") {
this._state = 11 /* BeforeAttributeValue */;
}
else if (c === "/" || c === ">") {
this._cbs.onattribend();
this._state = 8 /* BeforeAttributeName */;
this._index--;
}
else if (!whitespace(c)) {
this._cbs.onattribend();
this._state = 9 /* InAttributeName */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
if (c === '"') {
this._state = 12 /* InAttributeValueDq */;
this._sectionStart = this._index + 1;
}
else if (c === "'") {
this._state = 13 /* InAttributeValueSq */;
this._sectionStart = this._index + 1;
}
else if (!whitespace(c)) {
this._state = 14 /* InAttributeValueNq */;
this._sectionStart = this._index;
this._index--; //reconsume token
}
};
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
if (c === '"') {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = 8 /* BeforeAttributeName */;
}
else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = 51 /* BeforeEntity */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
if (c === "'") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = 8 /* BeforeAttributeName */;
}
else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = 51 /* BeforeEntity */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
if (whitespace(c) || c === ">") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = 8 /* BeforeAttributeName */;
this._index--;
}
else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = 51 /* BeforeEntity */;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeDeclaration = function (c) {
this._state =
!this._xmlMode && (c === "s" || c === "S")
? BEFORE_SPECIAL
: IN_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInTagName = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateBeforeCloseingTagName = function(c) {
if (whitespace(c));
else if (c === ">") {
this._state = TEXT;
} else if (this._special !== SPECIAL_NONE) {
if (c === "s" || c === "S") {
this._state = BEFORE_SPECIAL_END;
} else {
this._state = TEXT;
c === "["
? 22 /* BeforeCdata1 */
: c === "-"
? 18 /* BeforeComment */
: 16 /* InDeclaration */;
};
Tokenizer.prototype._stateInDeclaration = function (c) {
if (c === ">") {
this._cbs.ondeclaration(this._getSection());
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateInProcessingInstruction = function (c) {
if (c === ">") {
this._cbs.onprocessinginstruction(this._getSection());
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeComment = function (c) {
if (c === "-") {
this._state = 19 /* InComment */;
this._sectionStart = this._index + 1;
}
else {
this._state = 16 /* InDeclaration */;
}
};
Tokenizer.prototype._stateInComment = function (c) {
if (c === "-")
this._state = 20 /* AfterComment1 */;
};
Tokenizer.prototype._stateAfterComment1 = function (c) {
if (c === "-") {
this._state = 21 /* AfterComment2 */;
}
else {
this._state = 19 /* InComment */;
}
};
Tokenizer.prototype._stateAfterComment2 = function (c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
else if (c !== "-") {
this._state = 19 /* InComment */;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
};
Tokenizer.prototype._stateBeforeCdata6 = function (c) {
if (c === "[") {
this._state = 28 /* InCdata */;
this._sectionStart = this._index + 1;
}
else {
this._state = 16 /* InDeclaration */;
this._index--;
}
} else {
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInCloseingTagName = function(c) {
if (c === ">" || whitespace(c)) {
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterCloseingTagName = function(c) {
//skip everything until ">"
if (c === ">") {
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeAttributeName = function(c) {
if (c === ">") {
this._cbs.onopentagend();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c === "/") {
this._state = IN_SELF_CLOSING_TAG;
} else if (!whitespace(c)) {
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInSelfClosingTag = function(c) {
if (c === ">") {
this._cbs.onselfclosingtag();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateInAttributeName = function(c) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = AFTER_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterAttributeName = function(c) {
if (c === "=") {
this._state = BEFORE_ATTRIBUTE_VALUE;
} else if (c === "/" || c === ">") {
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if (!whitespace(c)) {
this._cbs.onattribend();
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeAttributeValue = function(c) {
if (c === '"') {
this._state = IN_ATTRIBUTE_VALUE_DQ;
this._sectionStart = this._index + 1;
} else if (c === "'") {
this._state = IN_ATTRIBUTE_VALUE_SQ;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = IN_ATTRIBUTE_VALUE_NQ;
this._sectionStart = this._index;
this._index--; //reconsume token
}
};
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c) {
if (c === '"') {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c) {
if (c === "'") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c) {
if (whitespace(c) || c === ">") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeDeclaration = function(c) {
this._state =
c === "["
? BEFORE_CDATA_1
: c === "-"
? BEFORE_COMMENT
: IN_DECLARATION;
};
Tokenizer.prototype._stateInDeclaration = function(c) {
if (c === ">") {
this._cbs.ondeclaration(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateInProcessingInstruction = function(c) {
if (c === ">") {
this._cbs.onprocessinginstruction(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeComment = function(c) {
if (c === "-") {
this._state = IN_COMMENT;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
}
};
Tokenizer.prototype._stateInComment = function(c) {
if (c === "-") this._state = AFTER_COMMENT_1;
};
Tokenizer.prototype._stateAfterComment1 = function(c) {
if (c === "-") {
this._state = AFTER_COMMENT_2;
} else {
this._state = IN_COMMENT;
}
};
Tokenizer.prototype._stateAfterComment2 = function(c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncomment(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c !== "-") {
this._state = IN_COMMENT;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
};
Tokenizer.prototype._stateBeforeCdata1 = ifElseState(
"C",
BEFORE_CDATA_2,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata2 = ifElseState(
"D",
BEFORE_CDATA_3,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata3 = ifElseState(
"A",
BEFORE_CDATA_4,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata4 = ifElseState(
"T",
BEFORE_CDATA_5,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata5 = ifElseState(
"A",
BEFORE_CDATA_6,
IN_DECLARATION
);
Tokenizer.prototype._stateBeforeCdata6 = function(c) {
if (c === "[") {
this._state = IN_CDATA;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
this._index--;
}
};
Tokenizer.prototype._stateInCdata = function(c) {
if (c === "]") this._state = AFTER_CDATA_1;
};
Tokenizer.prototype._stateAfterCdata1 = function(c) {
if (c === "]") this._state = AFTER_CDATA_2;
else this._state = IN_CDATA;
};
Tokenizer.prototype._stateAfterCdata2 = function(c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncdata(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if (c !== "]") {
this._state = IN_CDATA;
}
//else: stay in AFTER_CDATA_2 (`]]]>`)
};
Tokenizer.prototype._stateBeforeSpecial = function(c) {
if (c === "c" || c === "C") {
this._state = BEFORE_SCRIPT_1;
} else if (c === "t" || c === "T") {
this._state = BEFORE_STYLE_1;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
Tokenizer.prototype._stateBeforeSpecialEnd = function(c) {
if (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) {
this._state = AFTER_SCRIPT_1;
} else if (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) {
this._state = AFTER_STYLE_1;
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar(
"R",
BEFORE_SCRIPT_2
);
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar(
"I",
BEFORE_SCRIPT_3
);
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar(
"P",
BEFORE_SCRIPT_4
);
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar(
"T",
BEFORE_SCRIPT_5
);
Tokenizer.prototype._stateBeforeScript5 = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = SPECIAL_SCRIPT;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT);
Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
Tokenizer.prototype._stateAfterScript5 = function(c) {
if (c === ">" || whitespace(c)) {
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar(
"Y",
BEFORE_STYLE_2
);
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar(
"L",
BEFORE_STYLE_3
);
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar(
"E",
BEFORE_STYLE_4
);
Tokenizer.prototype._stateBeforeStyle4 = function(c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = SPECIAL_STYLE;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
Tokenizer.prototype._stateAfterStyle4 = function(c) {
if (c === ">" || whitespace(c)) {
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
} else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeEntity = ifElseState(
"#",
BEFORE_NUMERIC_ENTITY,
IN_NAMED_ENTITY
);
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState(
"X",
IN_HEX_ENTITY,
IN_NUMERIC_ENTITY
);
//for entities terminated with a semicolon
Tokenizer.prototype._parseNamedEntityStrict = function() {
//offset = 1
if (this._sectionStart + 1 < this._index) {
var entity = this._buffer.substring(
this._sectionStart + 1,
this._index
),
map = this._xmlMode ? xmlMap : entityMap;
if (map.hasOwnProperty(entity)) {
this._emitPartial(map[entity]);
};
Tokenizer.prototype._stateInCdata = function (c) {
if (c === "]")
this._state = 29 /* AfterCdata1 */;
};
Tokenizer.prototype._stateAfterCdata1 = function (c) {
if (c === "]")
this._state = 30 /* AfterCdata2 */;
else
this._state = 28 /* InCdata */;
};
Tokenizer.prototype._stateAfterCdata2 = function (c) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = 1 /* Text */;
this._sectionStart = this._index + 1;
}
}
};
//parses legacy entities (without trailing semicolon)
Tokenizer.prototype._parseLegacyEntity = function() {
var start = this._sectionStart + 1,
limit = this._index - start;
if (limit > 6) limit = 6; //the max length of legacy entities is 6
while (limit >= 2) {
//the min length of legacy entities is 2
var entity = this._buffer.substr(start, limit);
if (legacyMap.hasOwnProperty(entity)) {
this._emitPartial(legacyMap[entity]);
this._sectionStart += limit + 1;
return;
} else {
limit--;
else if (c !== "]") {
this._state = 28 /* InCdata */;
}
}
};
Tokenizer.prototype._stateInNamedEntity = function(c) {
if (c === ";") {
this._parseNamedEntityStrict();
if (this._sectionStart + 1 < this._index && !this._xmlMode) {
this._parseLegacyEntity();
//else: stay in AFTER_CDATA_2 (`]]]>`)
};
Tokenizer.prototype._stateBeforeSpecial = function (c) {
if (c === "c" || c === "C") {
this._state = 33 /* BeforeScript1 */;
}
this._state = this._baseState;
} else if (
(c < "a" || c > "z") &&
(c < "A" || c > "Z") &&
(c < "0" || c > "9")
) {
if (this._xmlMode);
else if (this._sectionStart + 1 === this._index);
else if (this._baseState !== TEXT) {
if (c !== "=") {
this._parseNamedEntityStrict();
else if (c === "t" || c === "T") {
this._state = 43 /* BeforeStyle1 */;
}
else {
this._state = 3 /* InTagName */;
this._index--; //consume the token again
}
};
Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
if (this._special === 2 /* Script */ && (c === "c" || c === "C")) {
this._state = 38 /* AfterScript1 */;
}
else if (this._special === 3 /* Style */ &&
(c === "t" || c === "T")) {
this._state = 47 /* AfterStyle1 */;
}
else
this._state = 1 /* Text */;
};
Tokenizer.prototype._stateBeforeScript5 = function (c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = 2 /* Script */;
}
this._state = 3 /* InTagName */;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterScript5 = function (c) {
if (c === ">" || whitespace(c)) {
this._special = 1 /* None */;
this._state = 6 /* InClosingTagName */;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
}
else
this._state = 1 /* Text */;
};
Tokenizer.prototype._stateBeforeStyle4 = function (c) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = 3 /* Style */;
}
this._state = 3 /* InTagName */;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterStyle4 = function (c) {
if (c === ">" || whitespace(c)) {
this._special = 1 /* None */;
this._state = 6 /* InClosingTagName */;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
}
else
this._state = 1 /* Text */;
};
//for entities terminated with a semicolon
Tokenizer.prototype._parseNamedEntityStrict = function () {
//offset = 1
if (this._sectionStart + 1 < this._index) {
var entity = this._buffer.substring(this._sectionStart + 1, this._index), map = this._xmlMode ? xml_json_1.default : entities_json_1.default;
if (Object.prototype.hasOwnProperty.call(map, entity)) {
// @ts-ignore
this._emitPartial(map[entity]);
this._sectionStart = this._index + 1;
}
} else {
this._parseLegacyEntity();
}
};
//parses legacy entities (without trailing semicolon)
Tokenizer.prototype._parseLegacyEntity = function () {
var start = this._sectionStart + 1;
var limit = this._index - start;
if (limit > 6)
limit = 6; // The max length of legacy entities is 6
while (limit >= 2) {
// The min length of legacy entities is 2
var entity = this._buffer.substr(start, limit);
if (Object.prototype.hasOwnProperty.call(legacy_json_1.default, entity)) {
// @ts-ignore
this._emitPartial(legacy_json_1.default[entity]);
this._sectionStart += limit + 1;
return;
}
else {
limit--;
}
}
};
Tokenizer.prototype._stateInNamedEntity = function (c) {
if (c === ";") {
this._parseNamedEntityStrict();
if (this._sectionStart + 1 < this._index && !this._xmlMode) {
this._parseLegacyEntity();
}
this._state = this._baseState;
}
else if ((c < "a" || c > "z") &&
(c < "A" || c > "Z") &&
(c < "0" || c > "9")) {
if (this._xmlMode || this._sectionStart + 1 === this._index) {
// ignore
}
else if (this._baseState !== 1 /* Text */) {
if (c !== "=") {
this._parseNamedEntityStrict();
}
}
else {
this._parseLegacyEntity();
}
this._state = this._baseState;
this._index--;
}
};
Tokenizer.prototype._decodeNumericEntity = function (offset, base) {
var sectionStart = this._sectionStart + offset;
if (sectionStart !== this._index) {
//parse entity
var entity = this._buffer.substring(sectionStart, this._index);
var parsed = parseInt(entity, base);
this._emitPartial(decode_codepoint_1.default(parsed));
this._sectionStart = this._index;
}
else {
this._sectionStart--;
}
this._state = this._baseState;
this._index--;
}
};
Tokenizer.prototype._decodeNumericEntity = function(offset, base) {
var sectionStart = this._sectionStart + offset;
if (sectionStart !== this._index) {
//parse entity
var entity = this._buffer.substring(sectionStart, this._index);
var parsed = parseInt(entity, base);
this._emitPartial(decodeCodePoint(parsed));
this._sectionStart = this._index;
} else {
this._sectionStart--;
}
this._state = this._baseState;
};
Tokenizer.prototype._stateInNumericEntity = function(c) {
if (c === ";") {
this._decodeNumericEntity(2, 10);
this._sectionStart++;
} else if (c < "0" || c > "9") {
if (!this._xmlMode) {
};
Tokenizer.prototype._stateInNumericEntity = function (c) {
if (c === ";") {
this._decodeNumericEntity(2, 10);
} else {
this._state = this._baseState;
this._sectionStart++;
}
this._index--;
}
};
Tokenizer.prototype._stateInHexEntity = function(c) {
if (c === ";") {
this._decodeNumericEntity(3, 16);
this._sectionStart++;
} else if (
(c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")
) {
if (!this._xmlMode) {
else if (c < "0" || c > "9") {
if (!this._xmlMode) {
this._decodeNumericEntity(2, 10);
}
else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._stateInHexEntity = function (c) {
if (c === ";") {
this._decodeNumericEntity(3, 16);
} else {
this._state = this._baseState;
this._sectionStart++;
}
this._index--;
}
};
Tokenizer.prototype._cleanup = function() {
if (this._sectionStart < 0) {
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._running) {
if (this._state === TEXT) {
if (this._sectionStart !== this._index) {
this._cbs.ontext(this._buffer.substr(this._sectionStart));
else if ((c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")) {
if (!this._xmlMode) {
this._decodeNumericEntity(3, 16);
}
else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._cleanup = function () {
if (this._sectionStart < 0) {
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._sectionStart === this._index) {
//the section just started
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
this._sectionStart = 0;
}
};
//TODO make events conditional
Tokenizer.prototype.write = function(chunk) {
if (this._ended) this._cbs.onerror(Error(".write() after done!"));
this._buffer += chunk;
this._parse();
};
Tokenizer.prototype._parse = function() {
while (this._index < this._buffer.length && this._running) {
var c = this._buffer.charAt(this._index);
if (this._state === TEXT) {
this._stateText(c);
} else if (this._state === BEFORE_TAG_NAME) {
this._stateBeforeTagName(c);
} else if (this._state === IN_TAG_NAME) {
this._stateInTagName(c);
} else if (this._state === BEFORE_CLOSING_TAG_NAME) {
this._stateBeforeCloseingTagName(c);
} else if (this._state === IN_CLOSING_TAG_NAME) {
this._stateInCloseingTagName(c);
} else if (this._state === AFTER_CLOSING_TAG_NAME) {
this._stateAfterCloseingTagName(c);
} else if (this._state === IN_SELF_CLOSING_TAG) {
this._stateInSelfClosingTag(c);
} else if (this._state === BEFORE_ATTRIBUTE_NAME) {
/*
* attributes
*/
this._stateBeforeAttributeName(c);
} else if (this._state === IN_ATTRIBUTE_NAME) {
this._stateInAttributeName(c);
} else if (this._state === AFTER_ATTRIBUTE_NAME) {
this._stateAfterAttributeName(c);
} else if (this._state === BEFORE_ATTRIBUTE_VALUE) {
this._stateBeforeAttributeValue(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_DQ) {
this._stateInAttributeValueDoubleQuotes(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_SQ) {
this._stateInAttributeValueSingleQuotes(c);
} else if (this._state === IN_ATTRIBUTE_VALUE_NQ) {
this._stateInAttributeValueNoQuotes(c);
} else if (this._state === BEFORE_DECLARATION) {
/*
* declarations
*/
this._stateBeforeDeclaration(c);
} else if (this._state === IN_DECLARATION) {
this._stateInDeclaration(c);
} else if (this._state === IN_PROCESSING_INSTRUCTION) {
/*
* processing instructions
*/
this._stateInProcessingInstruction(c);
} else if (this._state === BEFORE_COMMENT) {
/*
* comments
*/
this._stateBeforeComment(c);
} else if (this._state === IN_COMMENT) {
this._stateInComment(c);
} else if (this._state === AFTER_COMMENT_1) {
this._stateAfterComment1(c);
} else if (this._state === AFTER_COMMENT_2) {
this._stateAfterComment2(c);
} else if (this._state === BEFORE_CDATA_1) {
/*
* cdata
*/
this._stateBeforeCdata1(c);
} else if (this._state === BEFORE_CDATA_2) {
this._stateBeforeCdata2(c);
} else if (this._state === BEFORE_CDATA_3) {
this._stateBeforeCdata3(c);
} else if (this._state === BEFORE_CDATA_4) {
this._stateBeforeCdata4(c);
} else if (this._state === BEFORE_CDATA_5) {
this._stateBeforeCdata5(c);
} else if (this._state === BEFORE_CDATA_6) {
this._stateBeforeCdata6(c);
} else if (this._state === IN_CDATA) {
this._stateInCdata(c);
} else if (this._state === AFTER_CDATA_1) {
this._stateAfterCdata1(c);
} else if (this._state === AFTER_CDATA_2) {
this._stateAfterCdata2(c);
} else if (this._state === BEFORE_SPECIAL) {
/*
* special tags
*/
this._stateBeforeSpecial(c);
} else if (this._state === BEFORE_SPECIAL_END) {
this._stateBeforeSpecialEnd(c);
} else if (this._state === BEFORE_SCRIPT_1) {
/*
* script
*/
this._stateBeforeScript1(c);
} else if (this._state === BEFORE_SCRIPT_2) {
this._stateBeforeScript2(c);
} else if (this._state === BEFORE_SCRIPT_3) {
this._stateBeforeScript3(c);
} else if (this._state === BEFORE_SCRIPT_4) {
this._stateBeforeScript4(c);
} else if (this._state === BEFORE_SCRIPT_5) {
this._stateBeforeScript5(c);
} else if (this._state === AFTER_SCRIPT_1) {
this._stateAfterScript1(c);
} else if (this._state === AFTER_SCRIPT_2) {
this._stateAfterScript2(c);
} else if (this._state === AFTER_SCRIPT_3) {
this._stateAfterScript3(c);
} else if (this._state === AFTER_SCRIPT_4) {
this._stateAfterScript4(c);
} else if (this._state === AFTER_SCRIPT_5) {
this._stateAfterScript5(c);
} else if (this._state === BEFORE_STYLE_1) {
/*
* style
*/
this._stateBeforeStyle1(c);
} else if (this._state === BEFORE_STYLE_2) {
this._stateBeforeStyle2(c);
} else if (this._state === BEFORE_STYLE_3) {
this._stateBeforeStyle3(c);
} else if (this._state === BEFORE_STYLE_4) {
this._stateBeforeStyle4(c);
} else if (this._state === AFTER_STYLE_1) {
this._stateAfterStyle1(c);
} else if (this._state === AFTER_STYLE_2) {
this._stateAfterStyle2(c);
} else if (this._state === AFTER_STYLE_3) {
this._stateAfterStyle3(c);
} else if (this._state === AFTER_STYLE_4) {
this._stateAfterStyle4(c);
} else if (this._state === BEFORE_ENTITY) {
/*
* entities
*/
this._stateBeforeEntity(c);
} else if (this._state === BEFORE_NUMERIC_ENTITY) {
this._stateBeforeNumericEntity(c);
} else if (this._state === IN_NAMED_ENTITY) {
this._stateInNamedEntity(c);
} else if (this._state === IN_NUMERIC_ENTITY) {
this._stateInNumericEntity(c);
} else if (this._state === IN_HEX_ENTITY) {
this._stateInHexEntity(c);
} else {
this._cbs.onerror(Error("unknown _state"), this._state);
else if (this._running) {
if (this._state === 1 /* Text */) {
if (this._sectionStart !== this._index) {
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
}
else if (this._sectionStart === this._index) {
//the section just started
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
}
else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
this._sectionStart = 0;
}
this._index++;
}
this._cleanup();
};
Tokenizer.prototype.pause = function() {
this._running = false;
};
Tokenizer.prototype.resume = function() {
this._running = true;
if (this._index < this._buffer.length) {
};
//TODO make events conditional
Tokenizer.prototype.write = function (chunk) {
if (this._ended)
this._cbs.onerror(Error(".write() after done!"));
this._buffer += chunk;
this._parse();
}
if (this._ended) {
this._finish();
}
};
Tokenizer.prototype.end = function(chunk) {
if (this._ended) this._cbs.onerror(Error(".end() after done!"));
if (chunk) this.write(chunk);
this._ended = true;
if (this._running) this._finish();
};
Tokenizer.prototype._finish = function() {
//if there is remaining data, emit it in a reasonable way
if (this._sectionStart < this._index) {
this._handleTrailingData();
}
this._cbs.onend();
};
Tokenizer.prototype._handleTrailingData = function() {
var data = this._buffer.substr(this._sectionStart);
if (
this._state === IN_CDATA ||
this._state === AFTER_CDATA_1 ||
this._state === AFTER_CDATA_2
) {
this._cbs.oncdata(data);
} else if (
this._state === IN_COMMENT ||
this._state === AFTER_COMMENT_1 ||
this._state === AFTER_COMMENT_2
) {
this._cbs.oncomment(data);
} else if (this._state === IN_NAMED_ENTITY && !this._xmlMode) {
this._parseLegacyEntity();
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
};
// Iterates through the buffer, calling the function corresponding to the current state.
// States that are more likely to be hit are higher up, as a performance improvement.
Tokenizer.prototype._parse = function () {
while (this._index < this._buffer.length && this._running) {
var c = this._buffer.charAt(this._index);
if (this._state === 1 /* Text */) {
this._stateText(c);
}
else if (this._state === 12 /* InAttributeValueDq */) {
this._stateInAttributeValueDoubleQuotes(c);
}
else if (this._state === 9 /* InAttributeName */) {
this._stateInAttributeName(c);
}
else if (this._state === 19 /* InComment */) {
this._stateInComment(c);
}
else if (this._state === 8 /* BeforeAttributeName */) {
this._stateBeforeAttributeName(c);
}
else if (this._state === 3 /* InTagName */) {
this._stateInTagName(c);
}
else if (this._state === 6 /* InClosingTagName */) {
this._stateInCloseingTagName(c);
}
else if (this._state === 2 /* BeforeTagName */) {
this._stateBeforeTagName(c);
}
else if (this._state === 10 /* AfterAttributeName */) {
this._stateAfterAttributeName(c);
}
else if (this._state === 13 /* InAttributeValueSq */) {
this._stateInAttributeValueSingleQuotes(c);
}
else if (this._state === 11 /* BeforeAttributeValue */) {
this._stateBeforeAttributeValue(c);
}
else if (this._state === 5 /* BeforeClosingTagName */) {
this._stateBeforeCloseingTagName(c);
}
else if (this._state === 7 /* AfterClosingTagName */) {
this._stateAfterCloseingTagName(c);
}
else if (this._state === 31 /* BeforeSpecial */) {
this._stateBeforeSpecial(c);
}
else if (this._state === 20 /* AfterComment1 */) {
this._stateAfterComment1(c);
}
else if (this._state === 14 /* InAttributeValueNq */) {
this._stateInAttributeValueNoQuotes(c);
}
else if (this._state === 4 /* InSelfClosingTag */) {
this._stateInSelfClosingTag(c);
}
else if (this._state === 16 /* InDeclaration */) {
this._stateInDeclaration(c);
}
else if (this._state === 15 /* BeforeDeclaration */) {
this._stateBeforeDeclaration(c);
}
else if (this._state === 21 /* AfterComment2 */) {
this._stateAfterComment2(c);
}
else if (this._state === 18 /* BeforeComment */) {
this._stateBeforeComment(c);
}
else if (this._state === 32 /* BeforeSpecialEnd */) {
this._stateBeforeSpecialEnd(c);
}
else if (this._state === 38 /* AfterScript1 */) {
stateAfterScript1(this, c);
}
else if (this._state === 39 /* AfterScript2 */) {
stateAfterScript2(this, c);
}
else if (this._state === 40 /* AfterScript3 */) {
stateAfterScript3(this, c);
}
else if (this._state === 33 /* BeforeScript1 */) {
stateBeforeScript1(this, c);
}
else if (this._state === 34 /* BeforeScript2 */) {
stateBeforeScript2(this, c);
}
else if (this._state === 35 /* BeforeScript3 */) {
stateBeforeScript3(this, c);
}
else if (this._state === 36 /* BeforeScript4 */) {
stateBeforeScript4(this, c);
}
else if (this._state === 37 /* BeforeScript5 */) {
this._stateBeforeScript5(c);
}
else if (this._state === 41 /* AfterScript4 */) {
stateAfterScript4(this, c);
}
else if (this._state === 42 /* AfterScript5 */) {
this._stateAfterScript5(c);
}
else if (this._state === 43 /* BeforeStyle1 */) {
stateBeforeStyle1(this, c);
}
else if (this._state === 28 /* InCdata */) {
this._stateInCdata(c);
}
else if (this._state === 44 /* BeforeStyle2 */) {
stateBeforeStyle2(this, c);
}
else if (this._state === 45 /* BeforeStyle3 */) {
stateBeforeStyle3(this, c);
}
else if (this._state === 46 /* BeforeStyle4 */) {
this._stateBeforeStyle4(c);
}
else if (this._state === 47 /* AfterStyle1 */) {
stateAfterStyle1(this, c);
}
else if (this._state === 48 /* AfterStyle2 */) {
stateAfterStyle2(this, c);
}
else if (this._state === 49 /* AfterStyle3 */) {
stateAfterStyle3(this, c);
}
else if (this._state === 50 /* AfterStyle4 */) {
this._stateAfterStyle4(c);
}
else if (this._state === 17 /* InProcessingInstruction */) {
this._stateInProcessingInstruction(c);
}
else if (this._state === 53 /* InNamedEntity */) {
this._stateInNamedEntity(c);
}
else if (this._state === 22 /* BeforeCdata1 */) {
stateBeforeCdata1(this, c);
}
else if (this._state === 51 /* BeforeEntity */) {
stateBeforeEntity(this, c);
}
else if (this._state === 23 /* BeforeCdata2 */) {
stateBeforeCdata2(this, c);
}
else if (this._state === 24 /* BeforeCdata3 */) {
stateBeforeCdata3(this, c);
}
else if (this._state === 29 /* AfterCdata1 */) {
this._stateAfterCdata1(c);
}
else if (this._state === 30 /* AfterCdata2 */) {
this._stateAfterCdata2(c);
}
else if (this._state === 25 /* BeforeCdata4 */) {
stateBeforeCdata4(this, c);
}
else if (this._state === 26 /* BeforeCdata5 */) {
stateBeforeCdata5(this, c);
}
else if (this._state === 27 /* BeforeCdata6 */) {
this._stateBeforeCdata6(c);
}
else if (this._state === 55 /* InHexEntity */) {
this._stateInHexEntity(c);
}
else if (this._state === 54 /* InNumericEntity */) {
this._stateInNumericEntity(c);
}
else if (this._state === 52 /* BeforeNumericEntity */) {
stateBeforeNumericEntity(this, c);
}
else {
this._cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;
}
} else if (this._state === IN_NUMERIC_ENTITY && !this._xmlMode) {
this._decodeNumericEntity(2, 10);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
this._cleanup();
};
Tokenizer.prototype.pause = function () {
this._running = false;
};
Tokenizer.prototype.resume = function () {
this._running = true;
if (this._index < this._buffer.length) {
this._parse();
}
} else if (this._state === IN_HEX_ENTITY && !this._xmlMode) {
this._decodeNumericEntity(3, 16);
if (this._ended) {
this._finish();
}
};
Tokenizer.prototype.end = function (chunk) {
if (this._ended)
this._cbs.onerror(Error(".end() after done!"));
if (chunk)
this.write(chunk);
this._ended = true;
if (this._running)
this._finish();
};
Tokenizer.prototype._finish = function () {
//if there is remaining data, emit it in a reasonable way
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
) {
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
};
Tokenizer.prototype.reset = function() {
Tokenizer.call(
this,
{ xmlMode: this._xmlMode, decodeEntities: this._decodeEntities },
this._cbs
);
};
Tokenizer.prototype.getAbsoluteIndex = function() {
return this._bufferOffset + this._index;
};
Tokenizer.prototype._getSection = function() {
return this._buffer.substring(this._sectionStart, this._index);
};
Tokenizer.prototype._emitToken = function(name) {
this._cbs[name](this._getSection());
this._sectionStart = -1;
};
Tokenizer.prototype._emitPartial = function(value) {
if (this._baseState !== TEXT) {
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
}
};
this._cbs.onend();
};
Tokenizer.prototype._handleTrailingData = function () {
var data = this._buffer.substr(this._sectionStart);
if (this._state === 28 /* InCdata */ ||
this._state === 29 /* AfterCdata1 */ ||
this._state === 30 /* AfterCdata2 */) {
this._cbs.oncdata(data);
}
else if (this._state === 19 /* InComment */ ||
this._state === 20 /* AfterComment1 */ ||
this._state === 21 /* AfterComment2 */) {
this._cbs.oncomment(data);
}
else if (this._state === 53 /* InNamedEntity */ && !this._xmlMode) {
this._parseLegacyEntity();
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
}
else if (this._state === 54 /* InNumericEntity */ && !this._xmlMode) {
this._decodeNumericEntity(2, 10);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
}
else if (this._state === 55 /* InHexEntity */ && !this._xmlMode) {
this._decodeNumericEntity(3, 16);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
}
else if (this._state !== 3 /* InTagName */ &&
this._state !== 8 /* BeforeAttributeName */ &&
this._state !== 11 /* BeforeAttributeValue */ &&
this._state !== 10 /* AfterAttributeName */ &&
this._state !== 9 /* InAttributeName */ &&
this._state !== 13 /* InAttributeValueSq */ &&
this._state !== 12 /* InAttributeValueDq */ &&
this._state !== 14 /* InAttributeValueNq */ &&
this._state !== 6 /* InClosingTagName */) {
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
};
Tokenizer.prototype.getAbsoluteIndex = function () {
return this._bufferOffset + this._index;
};
Tokenizer.prototype._getSection = function () {
return this._buffer.substring(this._sectionStart, this._index);
};
Tokenizer.prototype._emitToken = function (name) {
this._cbs[name](this._getSection());
this._sectionStart = -1;
};
Tokenizer.prototype._emitPartial = function (value) {
if (this._baseState !== 1 /* Text */) {
this._cbs.onattribdata(value); //TODO implement the new event
}
else {
this._cbs.ontext(value);
}
};
return Tokenizer;
}());
exports.default = Tokenizer;

@@ -1,25 +0,48 @@

module.exports = Stream;
var Parser = require("./Parser.js");
var WritableStream = require("readable-stream").Writable;
var StringDecoder = require("string_decoder").StringDecoder;
var Buffer = require("buffer").Buffer;
function Stream(cbs, options) {
var parser = (this._parser = new Parser(cbs, options));
var decoder = (this._decoder = new StringDecoder());
WritableStream.call(this, { decodeStrings: false });
this.once("finish", function() {
parser.end(decoder.end());
});
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
var Parser_1 = require("./Parser");
var stream_1 = require("stream");
var string_decoder_1 = require("string_decoder");
// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream
function isBuffer(_chunk, encoding) {
return encoding === "buffer";
}
require("inherits")(Stream, WritableStream);
Stream.prototype._write = function(chunk, encoding, cb) {
if (chunk instanceof Buffer) chunk = this._decoder.write(chunk);
this._parser.write(chunk);
cb();
};
/**
* WritableStream makes the `Parser` interface available as a NodeJS stream.
*
* @see Parser
*/
var WritableStream = /** @class */ (function (_super) {
__extends(WritableStream, _super);
function WritableStream(cbs, options) {
var _this = _super.call(this, { decodeStrings: false }) || this;
_this._decoder = new string_decoder_1.StringDecoder();
_this._parser = new Parser_1.Parser(cbs, options);
return _this;
}
WritableStream.prototype._write = function (chunk, encoding, cb) {
if (isBuffer(chunk, encoding))
chunk = this._decoder.write(chunk);
this._parser.write(chunk);
cb();
};
WritableStream.prototype._final = function (cb) {
this._parser.end(this._decoder.end());
cb();
};
return WritableStream;
}(stream_1.Writable));
exports.WritableStream = WritableStream;
{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML/RSS parser",
"version": "3.10.1",
"version": "4.0.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -20,6 +20,2 @@ "keywords": [

},
"bugs": {
"mail": "me@feedic.com",
"url": "http://github.com/fb55/htmlparser2/issues"
},
"directories": {

@@ -30,27 +26,37 @@ "lib": "lib/"

"files": [
"lib"
"lib/**/*"
],
"browser": {
"./lib/WritableStream.js": false
},
"scripts": {
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
"test": "mocha && npm run lint",
"lint": "eslint lib test"
"test": "jest --coverage -u && npm run lint",
"coverage": "cat coverage/lcov.info | coveralls",
"lint": "eslint src/**/*.ts",
"format": "prettier --write '**/*.{ts,md,json}'",
"build": "tsc",
"prepare": "npm run build"
},
"dependencies": {
"domelementtype": "^1.3.1",
"domhandler": "^2.3.0",
"domutils": "^1.5.1",
"entities": "^1.1.1",
"inherits": "^2.0.1",
"readable-stream": "^3.1.1"
"domelementtype": "^2.0.1",
"domhandler": "^3.0.0",
"domutils": "^2.0.0",
"entities": "^2.0.0"
},
"devDependencies": {
"@types/jest": "^24.0.16",
"@types/node": "^12.6.8",
"@typescript-eslint/eslint-plugin": "^1.13.0",
"@typescript-eslint/parser": "^1.13.0",
"coveralls": "^3.0.1",
"eslint": "^5.13.0",
"istanbul": "^0.4.3",
"mocha": "^5.2.0",
"mocha-lcov-reporter": "^1.2.0"
"eslint": "^6.0.0",
"eslint-config-prettier": "^6.0.0",
"jest": "^24.8.0",
"prettier": "^1.18.2",
"ts-jest": "^24.0.2",
"typescript": "^3.5.3"
},
"browser": {
"readable-stream": false
"jest": {
"preset": "ts-jest",
"testEnvironment": "node"
},

@@ -57,0 +63,0 @@ "license": "MIT",

@@ -8,7 +8,9 @@ # htmlparser2

A forgiving HTML/XML/RSS parser. The parser can handle streams and provides a callback interface.
A forgiving HTML/XML/RSS parser.
The parser can handle streams and provides a callback interface.
## Installation
npm install htmlparser2
npm install htmlparser2
A live demo of htmlparser2 is available [here](https://astexplorer.net/#/2AmVrGuGVJ).

@@ -19,19 +21,24 @@

```javascript
var htmlparser = require("htmlparser2");
var parser = new htmlparser.Parser({
onopentag: function(name, attribs){
if(name === "script" && attribs.type === "text/javascript"){
console.log("JS! Hooray!");
}
},
ontext: function(text){
console.log("-->", text);
},
onclosetag: function(tagname){
if(tagname === "script"){
console.log("That's it?!");
}
}
}, {decodeEntities: true});
parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>");
const htmlparser2 = require("htmlparser2");
const parser = new htmlparser2.Parser(
{
onopentag(name, attribs) {
if (name === "script" && attribs.type === "text/javascript") {
console.log("JS! Hooray!");
}
},
ontext(text) {
console.log("-->", text);
},
onclosetag(tagname) {
if (tagname === "script") {
console.log("That's it?!");
}
}
},
{ decodeEntities: true }
);
parser.write(
"Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>"
);
parser.end();

@@ -54,2 +61,3 @@ ```

## Get a DOM
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.

@@ -62,12 +70,10 @@

```javascript
new htmlparser.FeedHandler(function(<error> error, <object> feed){
...
});
const feed = htmlparser2.parseFeed(content, options);
```
Note: While the provided feed handler works for most feeds, you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.
Note: While the provided feed handler works for most feeds, you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.
## Performance
After having some artificial benchmarks for some time, __@AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
After having some artificial benchmarks for some time, **@AndreasMadsen** published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.

@@ -91,6 +97,16 @@ At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):

This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
This module started as a fork of the `htmlparser` module.
The main difference is that `htmlparser2` is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)).
`htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). As a result, old handlers won't work anymore.
The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)).
As a result, old handlers won't work anymore.
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, your code should work as expected.
## Security contact information
To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security).
Tidelift will coordinate the fix and disclosure.
[Get supported htmlparser2 with the Tidelift Subscription](https://tidelift.com/subscription/pkg/npm-htmlparser2?utm_source=npm-htmlparser2&utm_medium=referral&utm_campaign=readme)
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc