Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 6.1.0 to 7.0.0

14

lib/FeedHandler.d.ts

@@ -1,2 +0,2 @@

import DomHandler, { DomHandlerOptions } from "domhandler";
import DomHandler, { DomHandlerOptions, Node } from "domhandler";
import { ParserOptions } from "./Parser";

@@ -49,2 +49,3 @@ declare enum FeedItemMediaMedium {

}
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */
export declare class FeedHandler extends DomHandler {

@@ -61,9 +62,16 @@ feed?: Feed;

/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
export declare function getFeed(dom: Node[]): Feed | null;
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | undefined;
export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | null;
export {};
//# sourceMappingURL=FeedHandler.d.ts.map

@@ -40,3 +40,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.parseFeed = exports.FeedHandler = void 0;
exports.parseFeed = exports.getFeed = exports.FeedHandler = void 0;
var domhandler_1 = __importDefault(require("domhandler"));

@@ -59,3 +59,3 @@ var DomUtils = __importStar(require("domutils"));

})(FeedItemMediaExpression || (FeedItemMediaExpression = {}));
// TODO: Consume data as it is coming in
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */
var FeedHandler = /** @class */ (function (_super) {

@@ -78,77 +78,91 @@ __extends(FeedHandler, _super);

FeedHandler.prototype.onend = function () {
var _a, _b;
var feedRoot = getOneElement(isValidFeed, this.dom);
if (!feedRoot) {
var feed = getFeed(this.dom);
if (feed) {
this.feed = feed;
this.handleCallback(null);
}
else {
this.handleCallback(new Error("couldn't find root of feed"));
return;
}
var feed = {};
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
};
return FeedHandler;
}(domhandler_1.default));
exports.FeedHandler = FeedHandler;
/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
function getFeed(dom) {
var _a, _b;
var feedRoot = getOneElement(isValidFeed, dom);
if (!feedRoot)
return null;
var feed = {};
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (href) {
feed.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
feed.link = href;
entry.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
var description = fetch("summary", children) || fetch("content", children);
if (description) {
entry.description = description;
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
entry.link = href;
}
var description = fetch("summary", children) || fetch("content", children);
if (description) {
entry.description = description;
}
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
}
this.feed = feed;
this.handleCallback(null);
};
return FeedHandler;
}(domhandler_1.default));
exports.FeedHandler = FeedHandler;
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
}
return feed;
}
exports.getFeed = getFeed;
function getMediaElements(where) {

@@ -208,3 +222,3 @@ return getElements("media:content", where).map(function (elem) {

if (recurse === void 0) { recurse = false; }
return DomUtils.getText(DomUtils.getElementsByTagName(tagName, where, recurse, 1)).trim();
return DomUtils.textContent(DomUtils.getElementsByTagName(tagName, where, recurse, 1)).trim();
}

@@ -231,10 +245,10 @@ function getAttribute(name, elem) {

* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
function parseFeed(feed, options) {
if (options === void 0) { options = { xmlMode: true }; }
var handler = new FeedHandler(options);
var handler = new domhandler_1.default(null, options);
new Parser_1.Parser(handler, options).end(feed);
return handler.feed;
return getFeed(handler.dom);
}
exports.parseFeed = parseFeed;

@@ -80,6 +80,7 @@ import Tokenizer from "./Tokenizer";

export declare class Parser {
private readonly options;
/** The start index of the last event. */
startIndex: number;
/** The end index of the last event. */
endIndex: number | null;
endIndex: number;
private tagname;

@@ -92,9 +93,9 @@ private attribname;

private readonly cbs;
private readonly options;
private readonly lowerCaseTagNames;
private readonly lowerCaseAttributeNames;
private readonly tokenizer;
constructor(cbs: Partial<Handler> | null, options?: ParserOptions);
constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
private updatePosition;
ontext(data: string): void;
protected isVoidElement(name: string): boolean;
onopentagname(name: string): void;

@@ -101,0 +102,0 @@ onopentagend(): void;

@@ -18,51 +18,54 @@ "use strict";

var pTag = new Set(["p"]);
var openImpliesClose = {
tr: new Set(["tr", "th", "td"]),
th: new Set(["th"]),
td: new Set(["thead", "th", "td"]),
body: new Set(["head", "link", "script"]),
li: new Set(["li"]),
p: pTag,
h1: pTag,
h2: pTag,
h3: pTag,
h4: pTag,
h5: pTag,
h6: pTag,
select: formTags,
input: formTags,
output: formTags,
button: formTags,
datalist: formTags,
textarea: formTags,
option: new Set(["option"]),
optgroup: new Set(["optgroup", "option"]),
dd: new Set(["dt", "dd"]),
dt: new Set(["dt", "dd"]),
address: pTag,
article: pTag,
aside: pTag,
blockquote: pTag,
details: pTag,
div: pTag,
dl: pTag,
fieldset: pTag,
figcaption: pTag,
figure: pTag,
footer: pTag,
form: pTag,
header: pTag,
hr: pTag,
main: pTag,
nav: pTag,
ol: pTag,
pre: pTag,
section: pTag,
table: pTag,
ul: pTag,
rt: new Set(["rt", "rp"]),
rp: new Set(["rt", "rp"]),
tbody: new Set(["thead", "tbody"]),
tfoot: new Set(["thead", "tbody"]),
};
var tableSectionTags = new Set(["thead", "tbody"]);
var ddtTags = new Set(["dd", "dt"]);
var rtpTags = new Set(["rt", "rp"]);
var openImpliesClose = new Map([
["tr", new Set(["tr", "th", "td"])],
["th", new Set(["th"])],
["td", new Set(["thead", "th", "td"])],
["body", new Set(["head", "link", "script"])],
["li", new Set(["li"])],
["p", pTag],
["h1", pTag],
["h2", pTag],
["h3", pTag],
["h4", pTag],
["h5", pTag],
["h6", pTag],
["select", formTags],
["input", formTags],
["output", formTags],
["button", formTags],
["datalist", formTags],
["textarea", formTags],
["option", new Set(["option"])],
["optgroup", new Set(["optgroup", "option"])],
["dd", ddtTags],
["dt", ddtTags],
["address", pTag],
["article", pTag],
["aside", pTag],
["blockquote", pTag],
["details", pTag],
["div", pTag],
["dl", pTag],
["fieldset", pTag],
["figcaption", pTag],
["figure", pTag],
["footer", pTag],
["form", pTag],
["header", pTag],
["hr", pTag],
["main", pTag],
["nav", pTag],
["ol", pTag],
["pre", pTag],
["section", pTag],
["table", pTag],
["ul", pTag],
["rt", rtpTags],
["rp", rtpTags],
["tbody", tableSectionTags],
["tfoot", tableSectionTags],
]);
var voidElements = new Set([

@@ -106,6 +109,7 @@ "area",

var _a, _b, _c, _d, _e;
this.options = options;
/** The start index of the last event. */
this.startIndex = 0;
/** The end index of the last event. */
this.endIndex = null;
this.endIndex = 0;
this.tagname = "";

@@ -125,14 +129,4 @@ this.attribname = "";

}
Parser.prototype.updatePosition = function (initialOffset) {
if (this.endIndex === null) {
if (this.tokenizer.sectionStart <= initialOffset) {
this.startIndex = 0;
}
else {
this.startIndex = this.tokenizer.sectionStart - initialOffset;
}
}
else {
this.startIndex = this.endIndex + 1;
}
Parser.prototype.updatePosition = function (offset) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();

@@ -143,8 +137,12 @@ };

var _a, _b;
this.updatePosition(1);
this.endIndex--;
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
};
Parser.prototype.isVoidElement = function (name) {
return !this.options.xmlMode && voidElements.has(name);
};
Parser.prototype.onopentagname = function (name) {
var _a, _b;
var _a, _b, _c, _d;
this.updatePosition(1);
if (this.lowerCaseTagNames) {

@@ -154,11 +152,11 @@ name = name.toLowerCase();

this.tagname = name;
if (!this.options.xmlMode &&
Object.prototype.hasOwnProperty.call(openImpliesClose, name)) {
var el = void 0;
var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
if (impliesClose) {
while (this.stack.length > 0 &&
openImpliesClose[name].has((el = this.stack[this.stack.length - 1]))) {
this.onclosetag(el);
impliesClose.has(this.stack[this.stack.length - 1])) {
var el = this.stack.pop();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el);
}
}
if (this.options.xmlMode || !voidElements.has(name)) {
if (!this.isVoidElement(name)) {
this.stack.push(name);

@@ -172,3 +170,3 @@ if (foreignContextElements.has(name)) {

}
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
if (this.cbs.onopentag)

@@ -179,3 +177,3 @@ this.attribs = {};

var _a, _b;
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.attribs) {

@@ -185,5 +183,3 @@ (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs);

}
if (!this.options.xmlMode &&
this.cbs.onclosetag &&
voidElements.has(this.tagname)) {
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname);

@@ -202,4 +198,3 @@ }

}
if (this.stack.length &&
(this.options.xmlMode || !voidElements.has(name))) {
if (this.stack.length && !this.isVoidElement(name)) {
var pos = this.stack.lastIndexOf(name);

@@ -279,2 +274,3 @@ if (pos !== -1) {

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_1 = this.getInstructionName(value);

@@ -286,2 +282,3 @@ this.cbs.onprocessinginstruction("!" + name_1, "!" + value);

if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_2 = this.getInstructionName(value);

@@ -288,0 +285,0 @@ this.cbs.onprocessinginstruction("?" + name_2, "?" + value);

@@ -87,2 +87,3 @@ /** All the states the tokenizer can be in. */

export default class Tokenizer {
private readonly cbs;
/** The current state the tokenizer is in. */

@@ -109,5 +110,5 @@ _state: State;

private ended;
private readonly cbs;
private readonly xmlMode;
private readonly decodeEntities;
private readonly entityTrie;
constructor(options: {

@@ -123,2 +124,6 @@ xmlMode?: boolean;

/**
* The start of the current section.
*/
getAbsoluteSectionStart(): number;
/**
* The current index within all of the written data.

@@ -165,8 +170,13 @@ */

private stateAfterSpecialLast;
private parseFixedEntity;
private parseLegacyEntity;
private trieIndex;
private trieCurrent;
private trieResult;
private trieExcess;
private stateBeforeEntity;
private stateInNamedEntity;
private emitNamedEntity;
private decodeNumericEntity;
private stateInNumericEntity;
private stateInHexEntity;
private allowLegacyEntity;
private cleanup;

@@ -182,3 +192,2 @@ /**

private getSection;
private emitToken;
private emitPartial;

@@ -185,0 +194,0 @@ }

@@ -7,26 +7,19 @@ "use strict";

var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint"));
var entities_json_1 = __importDefault(require("entities/lib/maps/entities.json"));
var legacy_json_1 = __importDefault(require("entities/lib/maps/legacy.json"));
var xml_json_1 = __importDefault(require("entities/lib/maps/xml.json"));
var decode_1 = require("entities/lib/decode");
function whitespace(c) {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
return (c === 32 /* Space */ ||
c === 10 /* NewLine */ ||
c === 9 /* Tab */ ||
c === 12 /* FormFeed */ ||
c === 13 /* CarriageReturn */);
}
function isASCIIAlpha(c) {
return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z");
return ((c >= 97 /* LowerA */ && c <= 122 /* LowerZ */) ||
(c >= 65 /* UpperA */ && c <= 90 /* UpperZ */));
}
function ifElseState(upper, SUCCESS, FAILURE) {
var lower = upper.toLowerCase();
if (upper === lower) {
return function (t, c) {
if (c === lower) {
t._state = SUCCESS;
}
else {
t._state = FAILURE;
t._index--;
}
};
}
var upperCode = upper.charCodeAt(0);
var lowerCode = upper.toLowerCase().charCodeAt(0);
return function (t, c) {
if (c === lower || c === upper) {
if (c === lowerCode || c === upperCode) {
t._state = SUCCESS;

@@ -40,14 +33,2 @@ }

}
function consumeSpecialNameChar(upper, NEXT_STATE) {
var lower = upper.toLowerCase();
return function (t, c) {
if (c === lower || c === upper) {
t._state = NEXT_STATE;
}
else {
t._state = 3 /* InTagName */;
t._index--; // Consume the token again
}
};
}
var stateBeforeCdata1 = ifElseState("C", 24 /* BeforeCdata2 */, 16 /* InDeclaration */);

@@ -58,6 +39,6 @@ var stateBeforeCdata2 = ifElseState("D", 25 /* BeforeCdata3 */, 16 /* InDeclaration */);

var stateBeforeCdata5 = ifElseState("A", 28 /* BeforeCdata6 */, 16 /* InDeclaration */);
var stateBeforeScript1 = consumeSpecialNameChar("R", 35 /* BeforeScript2 */);
var stateBeforeScript2 = consumeSpecialNameChar("I", 36 /* BeforeScript3 */);
var stateBeforeScript3 = consumeSpecialNameChar("P", 37 /* BeforeScript4 */);
var stateBeforeScript4 = consumeSpecialNameChar("T", 38 /* BeforeScript5 */);
var stateBeforeScript1 = ifElseState("R", 35 /* BeforeScript2 */, 3 /* InTagName */);
var stateBeforeScript2 = ifElseState("I", 36 /* BeforeScript3 */, 3 /* InTagName */);
var stateBeforeScript3 = ifElseState("P", 37 /* BeforeScript4 */, 3 /* InTagName */);
var stateBeforeScript4 = ifElseState("T", 38 /* BeforeScript5 */, 3 /* InTagName */);
var stateAfterScript1 = ifElseState("R", 40 /* AfterScript2 */, 1 /* Text */);

@@ -67,17 +48,16 @@ var stateAfterScript2 = ifElseState("I", 41 /* AfterScript3 */, 1 /* Text */);

var stateAfterScript4 = ifElseState("T", 43 /* AfterScript5 */, 1 /* Text */);
var stateBeforeStyle1 = consumeSpecialNameChar("Y", 45 /* BeforeStyle2 */);
var stateBeforeStyle2 = consumeSpecialNameChar("L", 46 /* BeforeStyle3 */);
var stateBeforeStyle3 = consumeSpecialNameChar("E", 47 /* BeforeStyle4 */);
var stateBeforeStyle1 = ifElseState("Y", 45 /* BeforeStyle2 */, 3 /* InTagName */);
var stateBeforeStyle2 = ifElseState("L", 46 /* BeforeStyle3 */, 3 /* InTagName */);
var stateBeforeStyle3 = ifElseState("E", 47 /* BeforeStyle4 */, 3 /* InTagName */);
var stateAfterStyle1 = ifElseState("Y", 49 /* AfterStyle2 */, 1 /* Text */);
var stateAfterStyle2 = ifElseState("L", 50 /* AfterStyle3 */, 1 /* Text */);
var stateAfterStyle3 = ifElseState("E", 51 /* AfterStyle4 */, 1 /* Text */);
var stateBeforeSpecialT = consumeSpecialNameChar("I", 54 /* BeforeTitle1 */);
var stateBeforeTitle1 = consumeSpecialNameChar("T", 55 /* BeforeTitle2 */);
var stateBeforeTitle2 = consumeSpecialNameChar("L", 56 /* BeforeTitle3 */);
var stateBeforeTitle3 = consumeSpecialNameChar("E", 57 /* BeforeTitle4 */);
var stateAfterSpecialTEnd = ifElseState("I", 58 /* AfterTitle1 */, 1 /* Text */);
var stateBeforeSpecialT = ifElseState("I", 54 /* BeforeTitle1 */, 3 /* InTagName */);
var stateBeforeTitle1 = ifElseState("T", 55 /* BeforeTitle2 */, 3 /* InTagName */);
var stateBeforeTitle2 = ifElseState("L", 56 /* BeforeTitle3 */, 3 /* InTagName */);
var stateBeforeTitle3 = ifElseState("E", 57 /* BeforeTitle4 */, 3 /* InTagName */);
var stateBeforeSpecialTEnd = ifElseState("I", 58 /* AfterTitle1 */, 1 /* Text */);
var stateAfterTitle1 = ifElseState("T", 59 /* AfterTitle2 */, 1 /* Text */);
var stateAfterTitle2 = ifElseState("L", 60 /* AfterTitle3 */, 1 /* Text */);
var stateAfterTitle3 = ifElseState("E", 61 /* AfterTitle4 */, 1 /* Text */);
var stateBeforeEntity = ifElseState("#", 63 /* BeforeNumericEntity */, 64 /* InNamedEntity */);
var stateBeforeNumericEntity = ifElseState("X", 66 /* InHexEntity */, 65 /* InNumericEntity */);

@@ -87,2 +67,3 @@ var Tokenizer = /** @class */ (function () {

var _a;
this.cbs = cbs;
/** The current state the tokenizer is in. */

@@ -109,5 +90,10 @@ this._state = 1 /* Text */;

this.ended = false;
this.trieIndex = 0;
this.trieCurrent = 0;
this.trieResult = null;
this.trieExcess = 0;
this.cbs = cbs;
this.xmlMode = !!(options === null || options === void 0 ? void 0 : options.xmlMode);
this.decodeEntities = (_a = options === null || options === void 0 ? void 0 : options.decodeEntities) !== null && _a !== void 0 ? _a : true;
this.entityTrie = this.xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
}

@@ -153,2 +139,8 @@ Tokenizer.prototype.reset = function () {

/**
* The start of the current section.
*/
Tokenizer.prototype.getAbsoluteSectionStart = function () {
return this.sectionStart + this.bufferOffset;
};
/**
* The current index within all of the written data.

@@ -160,3 +152,3 @@ */

Tokenizer.prototype.stateText = function (c) {
if (c === "<") {
if (c === 60 /* Lt */) {
if (this._index > this.sectionStart) {

@@ -169,3 +161,3 @@ this.cbs.ontext(this.getSection());

else if (this.decodeEntities &&
c === "&" &&
c === 38 /* Amp */ &&
(this.special === 1 /* None */ || this.special === 4 /* Title */)) {

@@ -188,13 +180,16 @@ if (this._index > this.sectionStart) {

return (isASCIIAlpha(c) ||
(this.xmlMode && !whitespace(c) && c !== "/" && c !== ">"));
(this.xmlMode &&
!whitespace(c) &&
c !== 47 /* Slash */ &&
c !== 62 /* Gt */));
};
Tokenizer.prototype.stateBeforeTagName = function (c) {
if (c === "/") {
if (c === 47 /* Slash */) {
this._state = 5 /* BeforeClosingTagName */;
}
else if (c === "<") {
else if (c === 60 /* Lt */) {
this.cbs.ontext(this.getSection());
this.sectionStart = this._index;
}
else if (c === ">" ||
else if (c === 62 /* Gt */ ||
this.special !== 1 /* None */ ||

@@ -204,7 +199,7 @@ whitespace(c)) {

}
else if (c === "!") {
else if (c === 33 /* ExclamationMark */) {
this._state = 15 /* BeforeDeclaration */;
this.sectionStart = this._index + 1;
}
else if (c === "?") {
else if (c === 63 /* Questionmark */) {
this._state = 17 /* InProcessingInstruction */;

@@ -218,5 +213,7 @@ this.sectionStart = this._index + 1;

this._state =
!this.xmlMode && (c === "s" || c === "S")
!this.xmlMode &&
(c === 115 /* LowerS */ || c === 83 /* UpperS */)
? 32 /* BeforeSpecialS */
: !this.xmlMode && (c === "t" || c === "T")
: !this.xmlMode &&
(c === 116 /* LowerT */ || c === 84 /* UpperT */)
? 52 /* BeforeSpecialT */

@@ -228,6 +225,7 @@ : 3 /* InTagName */;

Tokenizer.prototype.stateInTagName = function (c) {
if (c === "/" || c === ">" || whitespace(c)) {
this.emitToken("onopentagname");
if (c === 47 /* Slash */ || c === 62 /* Gt */ || whitespace(c)) {
this.cbs.onopentagname(this.getSection());
this.sectionStart = -1;
this._state = 8 /* BeforeAttributeName */;
this._index--;
this.stateBeforeAttributeName(c);
}

@@ -239,11 +237,12 @@ };

}
else if (c === ">") {
else if (c === 62 /* Gt */) {
this._state = 1 /* Text */;
}
else if (this.special !== 1 /* None */) {
if (this.special !== 4 /* Title */ && (c === "s" || c === "S")) {
if (this.special !== 4 /* Title */ &&
(c === 115 /* LowerS */ || c === 83 /* UpperS */)) {
this._state = 33 /* BeforeSpecialSEnd */;
}
else if (this.special === 4 /* Title */ &&
(c === "t" || c === "T")) {
(c === 116 /* LowerT */ || c === 84 /* UpperT */)) {
this._state = 53 /* BeforeSpecialTEnd */;

@@ -253,3 +252,3 @@ }

this._state = 1 /* Text */;
this._index--;
this.stateText(c);
}

@@ -267,6 +266,7 @@ }

Tokenizer.prototype.stateInClosingTagName = function (c) {
if (c === ">" || whitespace(c)) {
this.emitToken("onclosetag");
if (c === 62 /* Gt */ || whitespace(c)) {
this.cbs.onclosetag(this.getSection());
this.sectionStart = -1;
this._state = 7 /* AfterClosingTagName */;
this._index--;
this.stateAfterClosingTagName(c);
}

@@ -276,3 +276,3 @@ };

// Skip everything until ">"
if (c === ">") {
if (c === 62 /* Gt */) {
this._state = 1 /* Text */;

@@ -283,3 +283,3 @@ this.sectionStart = this._index + 1;

Tokenizer.prototype.stateBeforeAttributeName = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
this.cbs.onopentagend();

@@ -289,3 +289,3 @@ this._state = 1 /* Text */;

}
else if (c === "/") {
else if (c === 47 /* Slash */) {
this._state = 4 /* InSelfClosingTag */;

@@ -299,3 +299,3 @@ }

Tokenizer.prototype.stateInSelfClosingTag = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
this.cbs.onselfclosingtag();

@@ -308,21 +308,24 @@ this._state = 1 /* Text */;

this._state = 8 /* BeforeAttributeName */;
this._index--;
this.stateBeforeAttributeName(c);
}
};
Tokenizer.prototype.stateInAttributeName = function (c) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
if (c === 61 /* Eq */ ||
c === 47 /* Slash */ ||
c === 62 /* Gt */ ||
whitespace(c)) {
this.cbs.onattribname(this.getSection());
this.sectionStart = -1;
this._state = 10 /* AfterAttributeName */;
this._index--;
this.stateAfterAttributeName(c);
}
};
Tokenizer.prototype.stateAfterAttributeName = function (c) {
if (c === "=") {
if (c === 61 /* Eq */) {
this._state = 11 /* BeforeAttributeValue */;
}
else if (c === "/" || c === ">") {
else if (c === 47 /* Slash */ || c === 62 /* Gt */) {
this.cbs.onattribend(undefined);
this._state = 8 /* BeforeAttributeName */;
this._index--;
this.stateBeforeAttributeName(c);
}

@@ -336,7 +339,7 @@ else if (!whitespace(c)) {

Tokenizer.prototype.stateBeforeAttributeValue = function (c) {
if (c === '"') {
if (c === 34 /* DoubleQuote */) {
this._state = 12 /* InAttributeValueDq */;
this.sectionStart = this._index + 1;
}
else if (c === "'") {
else if (c === 39 /* SingleQuote */) {
this._state = 13 /* InAttributeValueSq */;

@@ -346,5 +349,5 @@ this.sectionStart = this._index + 1;

else if (!whitespace(c)) {
this.sectionStart = this._index;
this._state = 14 /* InAttributeValueNq */;
this.sectionStart = this._index;
this._index--; // Reconsume token
this.stateInAttributeValueNoQuotes(c); // Reconsume token
}

@@ -354,8 +357,9 @@ };

if (c === quote) {
this.emitToken("onattribdata");
this.cbs.onattribend(quote);
this.cbs.onattribdata(this.getSection());
this.sectionStart = -1;
this.cbs.onattribend(String.fromCharCode(quote));
this._state = 8 /* BeforeAttributeName */;
}
else if (this.decodeEntities && c === "&") {
this.emitToken("onattribdata");
else if (this.decodeEntities && c === 38 /* Amp */) {
this.cbs.onattribdata(this.getSection());
this.baseState = this._state;

@@ -367,16 +371,17 @@ this._state = 62 /* BeforeEntity */;

Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) {
this.handleInAttributeValue(c, '"');
this.handleInAttributeValue(c, 34 /* DoubleQuote */);
};
Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) {
this.handleInAttributeValue(c, "'");
this.handleInAttributeValue(c, 39 /* SingleQuote */);
};
Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) {
if (whitespace(c) || c === ">") {
this.emitToken("onattribdata");
if (whitespace(c) || c === 62 /* Gt */) {
this.cbs.onattribdata(this.getSection());
this.sectionStart = -1;
this.cbs.onattribend(null);
this._state = 8 /* BeforeAttributeName */;
this._index--;
this.stateBeforeAttributeName(c);
}
else if (this.decodeEntities && c === "&") {
this.emitToken("onattribdata");
else if (this.decodeEntities && c === 38 /* Amp */) {
this.cbs.onattribdata(this.getSection());
this.baseState = this._state;

@@ -389,5 +394,5 @@ this._state = 62 /* BeforeEntity */;

this._state =
c === "["
c === 91 /* OpeningSquareBracket */
? 23 /* BeforeCdata1 */
: c === "-"
: c === 45 /* Dash */
? 18 /* BeforeComment */

@@ -397,3 +402,3 @@ : 16 /* InDeclaration */;

Tokenizer.prototype.stateInDeclaration = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
this.cbs.ondeclaration(this.getSection());

@@ -405,3 +410,3 @@ this._state = 1 /* Text */;

Tokenizer.prototype.stateInProcessingInstruction = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
this.cbs.onprocessinginstruction(this.getSection());

@@ -413,3 +418,3 @@ this._state = 1 /* Text */;

Tokenizer.prototype.stateBeforeComment = function (c) {
if (c === "-") {
if (c === 45 /* Dash */) {
this._state = 19 /* InComment */;

@@ -423,7 +428,7 @@ this.sectionStart = this._index + 1;

Tokenizer.prototype.stateInComment = function (c) {
if (c === "-")
if (c === 45 /* Dash */)
this._state = 21 /* AfterComment1 */;
};
Tokenizer.prototype.stateInSpecialComment = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index));

@@ -435,3 +440,3 @@ this._state = 1 /* Text */;

Tokenizer.prototype.stateAfterComment1 = function (c) {
if (c === "-") {
if (c === 45 /* Dash */) {
this._state = 22 /* AfterComment2 */;

@@ -444,3 +449,3 @@ }

Tokenizer.prototype.stateAfterComment2 = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
// Remove 2 trailing chars

@@ -451,3 +456,3 @@ this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index - 2));

}
else if (c !== "-") {
else if (c !== 45 /* Dash */) {
this._state = 19 /* InComment */;

@@ -458,3 +463,3 @@ }

Tokenizer.prototype.stateBeforeCdata6 = function (c) {
if (c === "[") {
if (c === 91 /* OpeningSquareBracket */) {
this._state = 29 /* InCdata */;

@@ -465,11 +470,11 @@ this.sectionStart = this._index + 1;

this._state = 16 /* InDeclaration */;
this._index--;
this.stateInDeclaration(c);
}
};
Tokenizer.prototype.stateInCdata = function (c) {
if (c === "]")
if (c === 93 /* ClosingSquareBracket */)
this._state = 30 /* AfterCdata1 */;
};
Tokenizer.prototype.stateAfterCdata1 = function (c) {
if (c === "]")
if (c === 93 /* ClosingSquareBracket */)
this._state = 31 /* AfterCdata2 */;

@@ -480,3 +485,3 @@ else

Tokenizer.prototype.stateAfterCdata2 = function (c) {
if (c === ">") {
if (c === 62 /* Gt */) {
// Remove 2 trailing chars

@@ -487,3 +492,3 @@ this.cbs.oncdata(this.buffer.substring(this.sectionStart, this._index - 2));

}
else if (c !== "]") {
else if (c !== 93 /* ClosingSquareBracket */) {
this._state = 29 /* InCdata */;

@@ -494,6 +499,6 @@ }

Tokenizer.prototype.stateBeforeSpecialS = function (c) {
if (c === "c" || c === "C") {
if (c === 99 /* LowerC */ || c === 67 /* UpperC */) {
this._state = 34 /* BeforeScript1 */;
}
else if (c === "t" || c === "T") {
else if (c === 116 /* LowerT */ || c === 84 /* UpperT */) {
this._state = 44 /* BeforeStyle1 */;

@@ -503,10 +508,12 @@ }

this._state = 3 /* InTagName */;
this._index--; // Consume the token again
this.stateInTagName(c); // Consume the token again
}
};
Tokenizer.prototype.stateBeforeSpecialSEnd = function (c) {
if (this.special === 2 /* Script */ && (c === "c" || c === "C")) {
if (this.special === 2 /* Script */ &&
(c === 99 /* LowerC */ || c === 67 /* UpperC */)) {
this._state = 39 /* AfterScript1 */;
}
else if (this.special === 3 /* Style */ && (c === "t" || c === "T")) {
else if (this.special === 3 /* Style */ &&
(c === 116 /* LowerT */ || c === 84 /* UpperT */)) {
this._state = 48 /* AfterStyle1 */;

@@ -518,14 +525,14 @@ }

Tokenizer.prototype.stateBeforeSpecialLast = function (c, special) {
if (c === "/" || c === ">" || whitespace(c)) {
if (c === 47 /* Slash */ || c === 62 /* Gt */ || whitespace(c)) {
this.special = special;
}
this._state = 3 /* InTagName */;
this._index--; // Consume the token again
this.stateInTagName(c); // Consume the token again
};
Tokenizer.prototype.stateAfterSpecialLast = function (c, sectionStartOffset) {
if (c === ">" || whitespace(c)) {
if (c === 62 /* Gt */ || whitespace(c)) {
this.sectionStart = this._index - sectionStartOffset;
this.special = 1 /* None */;
this._state = 6 /* InClosingTagName */;
this.sectionStart = this._index - sectionStartOffset;
this._index--; // Reconsume the token
this.stateInClosingTagName(c); // Reconsume the token
}

@@ -535,60 +542,56 @@ else

};
// For entities terminated with a semicolon
Tokenizer.prototype.parseFixedEntity = function (map) {
if (map === void 0) { map = this.xmlMode ? xml_json_1.default : entities_json_1.default; }
// Offset = 1
if (this.sectionStart + 1 < this._index) {
var entity = this.buffer.substring(this.sectionStart + 1, this._index);
if (Object.prototype.hasOwnProperty.call(map, entity)) {
this.emitPartial(map[entity]);
this.sectionStart = this._index + 1;
}
Tokenizer.prototype.stateBeforeEntity = function (c) {
if (c === 35 /* Num */) {
this._state = 63 /* BeforeNumericEntity */;
}
};
// Parses legacy entities (without trailing semicolon)
Tokenizer.prototype.parseLegacyEntity = function () {
var start = this.sectionStart + 1;
// The max length of legacy entities is 6
var limit = Math.min(this._index - start, 6);
while (limit >= 2) {
// The min length of legacy entities is 2
var entity = this.buffer.substr(start, limit);
if (Object.prototype.hasOwnProperty.call(legacy_json_1.default, entity)) {
this.emitPartial(legacy_json_1.default[entity]);
this.sectionStart += limit + 1;
return;
}
limit--;
else if (c === 38 /* Amp */) {
// We have two `&` characters in a row. Emit the first one.
this.emitPartial(this.getSection());
this.sectionStart = this._index;
}
else {
this._state = 64 /* InNamedEntity */;
this.trieIndex = 0;
this.trieCurrent = this.entityTrie[0];
this.trieResult = null;
// Start excess with 1 to include the '&'
this.trieExcess = 1;
this._index--;
}
};
Tokenizer.prototype.stateInNamedEntity = function (c) {
if (c === ";") {
this.parseFixedEntity();
// Retry as legacy entity if entity wasn't parsed
if (this.baseState === 1 /* Text */ &&
this.sectionStart + 1 < this._index &&
!this.xmlMode) {
this.parseLegacyEntity();
}
this._state = this.baseState;
this.trieExcess += 1;
this.trieIndex = decode_1.determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
return;
}
else if ((c < "0" || c > "9") && !isASCIIAlpha(c)) {
if (this.xmlMode || this.sectionStart + 1 === this._index) {
// Ignore
this.trieCurrent = this.entityTrie[this.trieIndex];
// If the branch is a value, store it and continue
if (this.trieCurrent & decode_1.BinTrieFlags.HAS_VALUE) {
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== 59 /* Semi */) {
// No need to consider multi-byte values, as the legacy entity is always a single byte
this.trieIndex += 1;
}
else if (this.baseState !== 1 /* Text */) {
if (c !== "=") {
// Parse as legacy entity, without allowing additional characters.
this.parseFixedEntity(legacy_json_1.default);
}
}
else {
this.parseLegacyEntity();
// If this is a surrogate pair, combine the higher bits from the node with the next byte
this.trieResult =
this.trieCurrent & decode_1.BinTrieFlags.MULTI_BYTE
? String.fromCharCode(this.entityTrie[++this.trieIndex], this.entityTrie[++this.trieIndex])
: String.fromCharCode(this.entityTrie[++this.trieIndex]);
this.trieExcess = 0;
}
this._state = this.baseState;
this._index--;
}
};
Tokenizer.prototype.decodeNumericEntity = function (offset, base, strict) {
var sectionStart = this.sectionStart + offset;
Tokenizer.prototype.emitNamedEntity = function () {
if (this.trieResult) {
this.emitPartial(this.trieResult);
}
this.sectionStart = this._index - this.trieExcess + 1;
this._state = this.baseState;
this._index--;
};
Tokenizer.prototype.decodeNumericEntity = function (base, strict) {
var sectionStart = this.sectionStart + 2 + (base >> 4);
if (sectionStart !== this._index) {

@@ -604,8 +607,8 @@ // Parse entity

Tokenizer.prototype.stateInNumericEntity = function (c) {
if (c === ";") {
this.decodeNumericEntity(2, 10, true);
if (c === 59 /* Semi */) {
this.decodeNumericEntity(10, true);
}
else if (c < "0" || c > "9") {
if (!this.xmlMode) {
this.decodeNumericEntity(2, 10, false);
else if (c < 48 /* Zero */ || c > 57 /* Nine */) {
if (this.allowLegacyEntity()) {
this.decodeNumericEntity(10, false);
}

@@ -619,10 +622,10 @@ else {

Tokenizer.prototype.stateInHexEntity = function (c) {
if (c === ";") {
this.decodeNumericEntity(3, 16, true);
if (c === 59 /* Semi */) {
this.decodeNumericEntity(16, true);
}
else if ((c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")) {
if (!this.xmlMode) {
this.decodeNumericEntity(3, 16, false);
else if ((c < 97 /* LowerA */ || c > 102 /* LowerF */) &&
(c < 65 /* UpperA */ || c > 70 /* UpperF */) &&
(c < 48 /* Zero */ || c > 57 /* Nine */)) {
if (this.allowLegacyEntity()) {
this.decodeNumericEntity(16, false);
}

@@ -635,2 +638,5 @@ else {

};
Tokenizer.prototype.allowLegacyEntity = function () {
return !this.xmlMode && this.baseState === 1 /* Text */;
};
Tokenizer.prototype.cleanup = function () {

@@ -673,3 +679,3 @@ if (this.sectionStart < 0) {

while (this._index < this.buffer.length && this.running) {
var c = this.buffer.charAt(this._index);
var c = this.buffer.charCodeAt(this._index);
if (this._state === 1 /* Text */) {

@@ -745,3 +751,3 @@ this.stateText(c);

else if (this._state === 53 /* BeforeSpecialTEnd */) {
stateAfterSpecialTEnd(this, c);
stateBeforeSpecialTEnd(this, c);
}

@@ -842,3 +848,3 @@ else if (this._state === 39 /* AfterScript1 */) {

else if (this._state === 62 /* BeforeEntity */) {
stateBeforeEntity(this, c);
this.stateBeforeEntity(c);
}

@@ -903,3 +909,5 @@ else if (this._state === 24 /* BeforeCdata2 */) {

else if (this._state === 64 /* InNamedEntity */ && !this.xmlMode) {
this.parseLegacyEntity();
// Increase excess for EOF
this.trieExcess++;
this.emitNamedEntity();
if (this.sectionStart < this._index) {

@@ -911,3 +919,3 @@ this._state = this.baseState;

else if (this._state === 65 /* InNumericEntity */ && !this.xmlMode) {
this.decodeNumericEntity(2, 10, false);
this.decodeNumericEntity(10, false);
if (this.sectionStart < this._index) {

@@ -919,3 +927,3 @@ this._state = this.baseState;

else if (this._state === 66 /* InHexEntity */ && !this.xmlMode) {
this.decodeNumericEntity(3, 16, false);
this.decodeNumericEntity(16, false);
if (this.sectionStart < this._index) {

@@ -945,6 +953,2 @@ this._state = this.baseState;

};
Tokenizer.prototype.emitToken = function (name) {
this.cbs[name](this.getSection());
this.sectionStart = -1;
};
Tokenizer.prototype.emitPartial = function (value) {

@@ -951,0 +955,0 @@ if (this.baseState !== 1 /* Text */) {

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "6.1.0",
"version": "7.0.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -38,3 +38,4 @@ "funding": [

"scripts": {
"test": "jest --coverage",
"test": "npm run test:jest && npm run lint",
"test:jest": "jest",
"lint": "npm run lint:es && npm run lint:prettier",

@@ -54,15 +55,15 @@ "lint:es": "eslint src",

"domutils": "^2.5.2",
"entities": "^2.0.0"
"entities": "^3.0.1"
},
"devDependencies": {
"@types/jest": "^26.0.0",
"@types/node": "^14.0.5",
"@typescript-eslint/eslint-plugin": "^4.9.1",
"@typescript-eslint/parser": "^4.9.1",
"eslint": "^7.15.0",
"@types/jest": "^27.0.1",
"@types/node": "^16.6.2",
"@typescript-eslint/eslint-plugin": "^4.29.2",
"@typescript-eslint/parser": "^4.29.2",
"eslint": "^7.32.0",
"eslint-config-prettier": "^8.1.0",
"jest": "^26.0.1",
"jest": "^27.0.1",
"prettier": "^2.1.1",
"ts-jest": "^26.0.0",
"typescript": "^4.0.2"
"ts-jest": "^27.0.5",
"typescript": "^4.3.5"
},

@@ -69,0 +70,0 @@ "jest": {

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc