🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
Book a DemoInstallSign in
Socket

htmlparser2

Package Overview
Dependencies
Maintainers
1
Versions
77
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

htmlparser2 - npm Package Compare versions

Comparing version

to
7.1.0

58

lib/FeedHandler.d.ts

@@ -1,49 +0,5 @@

import DomHandler, { DomHandlerOptions, Node } from "domhandler";
import DomHandler, { DomHandlerOptions } from "domhandler";
import { getFeed, Feed } from "domutils";
import { ParserOptions } from "./Parser";
declare enum FeedItemMediaMedium {
image = 0,
audio = 1,
video = 2,
document = 3,
executable = 4
}
declare enum FeedItemMediaExpression {
sample = 0,
full = 1,
nonstop = 2
}
interface FeedItemMedia {
url?: string;
fileSize?: number;
type?: string;
medium: FeedItemMediaMedium | undefined;
isDefault: boolean;
expression?: FeedItemMediaExpression;
bitrate?: number;
framerate?: number;
samplingrate?: number;
channels?: number;
duration?: number;
height?: number;
width?: number;
lang?: string;
}
interface FeedItem {
id?: string;
title?: string;
link?: string;
description?: string;
pubDate?: Date;
media?: FeedItemMedia[];
}
interface Feed {
type?: string;
id?: string;
title?: string;
link?: string;
description?: string;
updated?: Date;
author?: string;
items?: FeedItem[];
}
export { getFeed };
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */

@@ -61,9 +17,2 @@ export declare class FeedHandler extends DomHandler {

/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
export declare function getFeed(dom: Node[]): Feed | null;
/**
* Parse a feed.

@@ -75,3 +24,2 @@ *

export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | null;
export {};
//# sourceMappingURL=FeedHandler.d.ts.map

@@ -17,21 +17,2 @@ "use strict";

})();
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -41,20 +22,7 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

Object.defineProperty(exports, "__esModule", { value: true });
exports.parseFeed = exports.getFeed = exports.FeedHandler = void 0;
exports.parseFeed = exports.FeedHandler = exports.getFeed = void 0;
var domhandler_1 = __importDefault(require("domhandler"));
var DomUtils = __importStar(require("domutils"));
var domutils_1 = require("domutils");
Object.defineProperty(exports, "getFeed", { enumerable: true, get: function () { return domutils_1.getFeed; } });
var Parser_1 = require("./Parser");
var FeedItemMediaMedium;
(function (FeedItemMediaMedium) {
FeedItemMediaMedium[FeedItemMediaMedium["image"] = 0] = "image";
FeedItemMediaMedium[FeedItemMediaMedium["audio"] = 1] = "audio";
FeedItemMediaMedium[FeedItemMediaMedium["video"] = 2] = "video";
FeedItemMediaMedium[FeedItemMediaMedium["document"] = 3] = "document";
FeedItemMediaMedium[FeedItemMediaMedium["executable"] = 4] = "executable";
})(FeedItemMediaMedium || (FeedItemMediaMedium = {}));
var FeedItemMediaExpression;
(function (FeedItemMediaExpression) {
FeedItemMediaExpression[FeedItemMediaExpression["sample"] = 0] = "sample";
FeedItemMediaExpression[FeedItemMediaExpression["full"] = 1] = "full";
FeedItemMediaExpression[FeedItemMediaExpression["nonstop"] = 2] = "nonstop";
})(FeedItemMediaExpression || (FeedItemMediaExpression = {}));
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */

@@ -78,3 +46,3 @@ var FeedHandler = /** @class */ (function (_super) {

FeedHandler.prototype.onend = function () {
var feed = getFeed(this.dom);
var feed = (0, domutils_1.getFeed)(this.dom);
if (feed) {

@@ -92,151 +60,2 @@ this.feed = feed;

/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
function getFeed(dom) {
var _a, _b;
var feedRoot = getOneElement(isValidFeed, dom);
if (!feedRoot)
return null;
var feed = {};
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (href) {
feed.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
entry.link = href;
}
var description = fetch("summary", children) || fetch("content", children);
if (description) {
entry.description = description;
}
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
}
return feed;
}
exports.getFeed = getFeed;
function getMediaElements(where) {
return getElements("media:content", where).map(function (elem) {
var media = {
medium: elem.attribs.medium,
isDefault: !!elem.attribs.isDefault,
};
if (elem.attribs.url) {
media.url = elem.attribs.url;
}
if (elem.attribs.fileSize) {
media.fileSize = parseInt(elem.attribs.fileSize, 10);
}
if (elem.attribs.type) {
media.type = elem.attribs.type;
}
if (elem.attribs.expression) {
media.expression = elem.attribs
.expression;
}
if (elem.attribs.bitrate) {
media.bitrate = parseInt(elem.attribs.bitrate, 10);
}
if (elem.attribs.framerate) {
media.framerate = parseInt(elem.attribs.framerate, 10);
}
if (elem.attribs.samplingrate) {
media.samplingrate = parseInt(elem.attribs.samplingrate, 10);
}
if (elem.attribs.channels) {
media.channels = parseInt(elem.attribs.channels, 10);
}
if (elem.attribs.duration) {
media.duration = parseInt(elem.attribs.duration, 10);
}
if (elem.attribs.height) {
media.height = parseInt(elem.attribs.height, 10);
}
if (elem.attribs.width) {
media.width = parseInt(elem.attribs.width, 10);
}
if (elem.attribs.lang) {
media.lang = elem.attribs.lang;
}
return media;
});
}
function getElements(tagName, where) {
return DomUtils.getElementsByTagName(tagName, where, true);
}
function getOneElement(tagName, node) {
return DomUtils.getElementsByTagName(tagName, node, true, 1)[0];
}
function fetch(tagName, where, recurse) {
if (recurse === void 0) { recurse = false; }
return DomUtils.textContent(DomUtils.getElementsByTagName(tagName, where, recurse, 1)).trim();
}
function getAttribute(name, elem) {
if (!elem) {
return null;
}
var attribs = elem.attribs;
return attribs[name];
}
function addConditionally(obj, prop, what, where, recurse) {
if (recurse === void 0) { recurse = false; }
var tmp = fetch(what, where, recurse);
if (tmp)
obj[prop] = tmp;
}
function isValidFeed(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
}
/**
* Parse a feed.

@@ -251,4 +70,4 @@ *

new Parser_1.Parser(handler, options).end(feed);
return getFeed(handler.dom);
return (0, domutils_1.getFeed)(handler.dom);
}
exports.parseFeed = parseFeed;

26

lib/Parser.d.ts

@@ -60,3 +60,3 @@ import Tokenizer from "./Tokenizer";

onerror(error: Error): void;
onclosetag(name: string): void;
onclosetag(name: string, isImplied: boolean): void;
onopentagname(name: string): void;

@@ -72,3 +72,3 @@ /**

[s: string]: string;
}): void;
}, isImplied: boolean): void;
ontext(data: string): void;

@@ -87,2 +87,7 @@ oncomment(data: string): void;

endIndex: number;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
private openTagStart;
private tagname;

@@ -99,19 +104,34 @@ private attribname;

constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
private updatePosition;
/** @internal */
ontext(data: string): void;
protected isVoidElement(name: string): boolean;
/** @internal */
onopentagname(name: string): void;
private emitOpenTag;
private endOpenTag;
/** @internal */
onopentagend(): void;
/** @internal */
onclosetag(name: string): void;
/** @internal */
onselfclosingtag(): void;
private closeCurrentTag;
/** @internal */
onattribname(name: string): void;
/** @internal */
onattribdata(value: string): void;
/** @internal */
onattribend(quote: string | undefined | null): void;
private getInstructionName;
/** @internal */
ondeclaration(value: string): void;
/** @internal */
onprocessinginstruction(value: string): void;
/** @internal */
oncomment(value: string): void;
/** @internal */
oncdata(value: string): void;
/** @internal */
onerror(err: Error): void;
/** @internal */
onend(): void;

@@ -118,0 +138,0 @@ /**

@@ -99,3 +99,3 @@ "use strict";

"annotation-xml",
"foreignObject",
"foreignobject",
"desc",

@@ -114,2 +114,7 @@ "title",

this.endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
this.openTagStart = 0;
this.tagname = "";

@@ -121,3 +126,2 @@ this.attribname = "";

this.foreignContext = [];
this.options = options;
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};

@@ -130,12 +134,10 @@ this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;

}
Parser.prototype.updatePosition = function (offset) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();
};
// Tokenizer event handlers
/** @internal */
Parser.prototype.ontext = function (data) {
var _a, _b;
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
var idx = this.tokenizer.getAbsoluteIndex();
this.endIndex = idx;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
this.startIndex = idx;
};

@@ -145,8 +147,13 @@ Parser.prototype.isVoidElement = function (name) {

};
/** @internal */
Parser.prototype.onopentagname = function (name) {
var _a, _b, _c, _d;
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
this.emitOpenTag(name);
};
Parser.prototype.emitOpenTag = function (name) {
var _a, _b, _c, _d;
this.openTagStart = this.startIndex;
this.tagname = name;

@@ -158,3 +165,3 @@ var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);

var el = this.stack.pop();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el);
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el, true);
}

@@ -175,16 +182,25 @@ }

};
Parser.prototype.onopentagend = function () {
Parser.prototype.endOpenTag = function (isImplied) {
var _a, _b;
this.startIndex = this.openTagStart;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.attribs) {
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs);
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
this.attribs = null;
}
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname);
this.cbs.onclosetag(this.tagname, true);
}
this.tagname = "";
};
/** @internal */
Parser.prototype.onopentagend = function () {
this.endOpenTag(false);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onclosetag = function (name) {
this.updatePosition(1);
var _a, _b, _c, _d, _e, _f;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.lowerCaseTagNames) {

@@ -197,10 +213,10 @@ name = name.toLowerCase();

}
if (this.stack.length && !this.isVoidElement(name)) {
if (!this.isVoidElement(name)) {
var pos = this.stack.lastIndexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
pos = this.stack.length - pos;
while (pos--) {
var count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop());
this.cbs.onclosetag(this.stack.pop(), pos !== 0);
}

@@ -211,12 +227,17 @@ }

}
else if (name === "p" && !this.options.xmlMode) {
this.onopentagname(name);
this.closeCurrentTag();
else if (!this.options.xmlMode && name === "p") {
this.emitOpenTag(name);
this.closeCurrentTag(true);
}
}
else if (!this.options.xmlMode && (name === "br" || name === "p")) {
this.onopentagname(name);
this.closeCurrentTag();
else if (!this.options.xmlMode && name === "br") {
// We can't go through `emitOpenTag` here, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, name, {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, name, false);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onselfclosingtag = function () {

@@ -226,22 +247,23 @@ if (this.options.xmlMode ||

this.foreignContext[this.foreignContext.length - 1]) {
this.closeCurrentTag();
this.closeCurrentTag(false);
}
else {
// Ignore the fact that the tag is self-closing.
this.onopentagend();
}
};
Parser.prototype.closeCurrentTag = function () {
Parser.prototype.closeCurrentTag = function (isOpenImplied) {
var _a, _b;
var name = this.tagname;
this.onopentagend();
/*
* Self-closing tags will be on the top of the stack
* (cheaper check than in onclosetag)
*/
this.endOpenTag(isOpenImplied);
// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name);
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
}
};
/** @internal */
Parser.prototype.onattribname = function (name) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
if (this.lowerCaseAttributeNames) {

@@ -252,7 +274,10 @@ name = name.toLowerCase();

};
/** @internal */
Parser.prototype.onattribdata = function (value) {
this.attribvalue += value;
};
/** @internal */
Parser.prototype.onattribend = function (quote) {
var _a, _b;
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);

@@ -274,25 +299,35 @@ if (this.attribs &&

};
/** @internal */
Parser.prototype.ondeclaration = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_1 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onprocessinginstruction = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_2 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.oncomment = function (value) {
var _a, _b, _c, _d;
this.updatePosition(4);
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.oncdata = function (value) {
var _a, _b, _c, _d, _e, _f;
this.updatePosition(1);
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.options.xmlMode || this.options.recognizeCDATA) {

@@ -304,5 +339,9 @@ (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);

else {
this.oncomment("[CDATA[" + value + "]]");
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[" + value + "]]");
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onerror = function (err) {

@@ -312,6 +351,9 @@ var _a, _b;

};
/** @internal */
Parser.prototype.onend = function () {
var _a, _b;
if (this.cbs.onclosetag) {
for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i]))
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i], true))
;

@@ -332,2 +374,4 @@ }

this.stack = [];
this.startIndex = 0;
this.endIndex = 0;
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);

@@ -334,0 +378,0 @@ };

@@ -112,6 +112,6 @@ /** All the states the tokenizer can be in. */

private readonly entityTrie;
constructor(options: {
constructor({ xmlMode, decodeEntities, }: {
xmlMode?: boolean;
decodeEntities?: boolean;
} | null, cbs: Callbacks);
}, cbs: Callbacks);
reset(): void;

@@ -179,2 +179,5 @@ write(chunk: string): void;

private allowLegacyEntity;
/**
* Remove data that has already been consumed from the buffer.
*/
private cleanup;

@@ -188,2 +191,3 @@ /**

private finish;
/** Handle any trailing data. */
private handleTrailingData;

@@ -190,0 +194,0 @@ private getSection;

@@ -61,4 +61,4 @@ "use strict";

var Tokenizer = /** @class */ (function () {
function Tokenizer(options, cbs) {
var _a;
function Tokenizer(_a, cbs) {
var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
this.cbs = cbs;

@@ -90,6 +90,5 @@ /** The current state the tokenizer is in. */

this.trieExcess = 0;
this.cbs = cbs;
this.xmlMode = !!(options === null || options === void 0 ? void 0 : options.xmlMode);
this.decodeEntities = (_a = options === null || options === void 0 ? void 0 : options.decodeEntities) !== null && _a !== void 0 ? _a : true;
this.entityTrie = this.xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
}

@@ -109,4 +108,7 @@ Tokenizer.prototype.reset = function () {

if (this.ended)
this.cbs.onerror(Error(".write() after done!"));
this.buffer += chunk;
return this.cbs.onerror(Error(".write() after done!"));
if (this.buffer.length)
this.buffer += chunk;
else
this.buffer = chunk;
this.parse();

@@ -116,3 +118,3 @@ };

if (this.ended)
this.cbs.onerror(Error(".end() after done!"));
return this.cbs.onerror(Error(".end() after done!"));
if (chunk)

@@ -527,5 +529,6 @@ this.write(chunk);

this.trieExcess += 1;
this.trieIndex = decode_1.determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this._index--;
return;

@@ -557,3 +560,2 @@ }

this._state = this.baseState;
this._index--;
};

@@ -566,4 +568,4 @@ Tokenizer.prototype.decodeNumericEntity = function (base, strict) {

var parsed = parseInt(entity, base);
this.emitPartial(decode_codepoint_1.default(parsed));
this.sectionStart = strict ? this._index + 1 : this._index;
this.emitPartial((0, decode_codepoint_1.default)(parsed));
this.sectionStart = this._index + Number(strict);
}

@@ -605,29 +607,20 @@ this._state = this.baseState;

};
/**
* Remove data that has already been consumed from the buffer.
*/
Tokenizer.prototype.cleanup = function () {
if (this.sectionStart < 0) {
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
// If we are inside of text, emit what we already have.
if (this.running &&
this._state === 1 /* Text */ &&
this.sectionStart !== this._index) {
// TODO: We could emit attribute data here as well.
this.cbs.ontext(this.buffer.substr(this.sectionStart));
this.sectionStart = this._index;
}
else if (this.running) {
if (this._state === 1 /* Text */) {
if (this.sectionStart !== this._index) {
this.cbs.ontext(this.buffer.substr(this.sectionStart));
}
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
}
else if (this.sectionStart === this._index) {
// The section just started
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
}
else {
// Remove everything unnecessary
this.buffer = this.buffer.substr(this.sectionStart);
this._index -= this.sectionStart;
this.bufferOffset += this.sectionStart;
}
var start = this.sectionStart < 0 ? this._index : this.sectionStart;
this.buffer =
start === this.buffer.length ? "" : this.buffer.substr(start);
this._index -= start;
this.bufferOffset += start;
if (this.sectionStart > 0) {
this.sectionStart = 0;

@@ -838,10 +831,7 @@ }

this.stateInNumericEntity(c);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
}
else if (this._state === 63 /* BeforeNumericEntity */) {
else {
// `this._state === State.BeforeNumericEntity`
stateBeforeNumericEntity(this, c);
}
else {
this.cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;

@@ -858,2 +848,3 @@ }

};
/** Handle any trailing data. */
Tokenizer.prototype.handleTrailingData = function () {

@@ -882,29 +873,25 @@ var data = this.buffer.substr(this.sectionStart);

this.decodeNumericEntity(10, false);
if (this.sectionStart < this._index) {
this._state = this.baseState;
this.handleTrailingData();
}
// All trailing data will have been consumed
}
else if (this._state === 66 /* InHexEntity */ && !this.xmlMode) {
this.decodeNumericEntity(16, false);
if (this.sectionStart < this._index) {
this._state = this.baseState;
this.handleTrailingData();
}
// All trailing data will have been consumed
}
else if (this._state !== 3 /* InTagName */ &&
this._state !== 8 /* BeforeAttributeName */ &&
this._state !== 11 /* BeforeAttributeValue */ &&
this._state !== 10 /* AfterAttributeName */ &&
this._state !== 9 /* InAttributeName */ &&
this._state !== 13 /* InAttributeValueSq */ &&
this._state !== 12 /* InAttributeValueDq */ &&
this._state !== 14 /* InAttributeValueNq */ &&
this._state !== 6 /* InClosingTagName */) {
else if (this._state === 3 /* InTagName */ ||
this._state === 8 /* BeforeAttributeName */ ||
this._state === 11 /* BeforeAttributeValue */ ||
this._state === 10 /* AfterAttributeName */ ||
this._state === 9 /* InAttributeName */ ||
this._state === 13 /* InAttributeValueSq */ ||
this._state === 12 /* InAttributeValueDq */ ||
this._state === 14 /* InAttributeValueNq */ ||
this._state === 6 /* InClosingTagName */) {
/*
* If we are currently in an opening or closing tag, us not calling the
* respective callback signals that the tag should be ignored.
*/
}
else {
this.cbs.ontext(data);
}
/*
* Else, ignore remaining data
* TODO add a way to remove current tag
*/
};

@@ -916,3 +903,3 @@ Tokenizer.prototype.getSection = function () {

if (this.baseState !== 1 /* Text */) {
this.cbs.onattribdata(value); // TODO implement the new event
this.cbs.onattribdata(value);
}

@@ -919,0 +906,0 @@ else {

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "7.0.0",
"version": "7.1.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -53,3 +53,3 @@ "funding": [

"domhandler": "^4.0.0",
"domutils": "^2.5.2",
"domutils": "^2.8.0",
"entities": "^3.0.1"

@@ -59,11 +59,11 @@ },

"@types/jest": "^27.0.1",
"@types/node": "^16.6.2",
"@typescript-eslint/eslint-plugin": "^4.29.2",
"@typescript-eslint/parser": "^4.29.2",
"@types/node": "^16.7.2",
"@typescript-eslint/eslint-plugin": "^4.29.3",
"@typescript-eslint/parser": "^4.29.3",
"eslint": "^7.32.0",
"eslint-config-prettier": "^8.1.0",
"jest": "^27.0.1",
"jest": "^27.1.0",
"prettier": "^2.1.1",
"ts-jest": "^27.0.5",
"typescript": "^4.3.5"
"typescript": "^4.4.2"
},

@@ -70,0 +70,0 @@ "jest": {

@@ -129,15 +129,18 @@ # htmlparser2

At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):
At the time of writing, the latest versions of all supported parsers show the following performance characteristics on GitHub Actions (sourced from [here](https://github.com/AndreasMadsen/htmlparser-benchmark/blob/e78cd8fc6c2adac08deedd4f274c33537451186b/stats.txt)):
```
gumbo-parser : 34.9208 ms/file ± 21.4238
html-parser : 24.8224 ms/file ± 15.8703
html5 : 419.597 ms/file ± 264.265
htmlparser : 60.0722 ms/file ± 384.844
htmlparser2-dom: 12.0749 ms/file ± 6.49474
htmlparser2 : 7.49130 ms/file ± 5.74368
hubbub : 30.4980 ms/file ± 16.4682
libxmljs : 14.1338 ms/file ± 18.6541
parse5 : 22.0439 ms/file ± 15.3743
sax : 49.6513 ms/file ± 26.6032
htmlparser2 : 2.17215 ms/file ± 3.81587
node-html-parser : 2.35983 ms/file ± 1.54487
html5parser : 2.43468 ms/file ± 2.81501
neutron-html5parser: 2.61356 ms/file ± 1.70324
htmlparser2-dom : 3.09034 ms/file ± 4.77033
html-dom-parser : 3.56804 ms/file ± 5.15621
libxmljs : 4.07490 ms/file ± 2.99869
htmljs-parser : 6.15812 ms/file ± 7.52497
parse5 : 9.70406 ms/file ± 6.74872
htmlparser : 15.0596 ms/file ± 89.0826
html-parser : 28.6282 ms/file ± 22.6652
saxes : 45.7921 ms/file ± 128.691
html5 : 120.844 ms/file ± 153.944
```

@@ -147,5 +150,4 @@

This module started as a fork of the `htmlparser` module.
The main difference is that `htmlparser2` is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)).
`htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
In 2011, this module started as a fork of the `htmlparser` module.
`htmlparser2` was rewritten multiple times and, while it maintains an API that's mostly compatible with `htmlparser` in most cases, the projects don't share any code anymore.

@@ -152,0 +154,0 @@ The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)).

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet