Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 7.0.0 to 7.1.0

58

lib/FeedHandler.d.ts

@@ -1,49 +0,5 @@

import DomHandler, { DomHandlerOptions, Node } from "domhandler";
import DomHandler, { DomHandlerOptions } from "domhandler";
import { getFeed, Feed } from "domutils";
import { ParserOptions } from "./Parser";
declare enum FeedItemMediaMedium {
image = 0,
audio = 1,
video = 2,
document = 3,
executable = 4
}
declare enum FeedItemMediaExpression {
sample = 0,
full = 1,
nonstop = 2
}
interface FeedItemMedia {
url?: string;
fileSize?: number;
type?: string;
medium: FeedItemMediaMedium | undefined;
isDefault: boolean;
expression?: FeedItemMediaExpression;
bitrate?: number;
framerate?: number;
samplingrate?: number;
channels?: number;
duration?: number;
height?: number;
width?: number;
lang?: string;
}
interface FeedItem {
id?: string;
title?: string;
link?: string;
description?: string;
pubDate?: Date;
media?: FeedItemMedia[];
}
interface Feed {
type?: string;
id?: string;
title?: string;
link?: string;
description?: string;
updated?: Date;
author?: string;
items?: FeedItem[];
}
export { getFeed };
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */

@@ -61,9 +17,2 @@ export declare class FeedHandler extends DomHandler {

/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
export declare function getFeed(dom: Node[]): Feed | null;
/**
* Parse a feed.

@@ -75,3 +24,2 @@ *

export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | null;
export {};
//# sourceMappingURL=FeedHandler.d.ts.map

@@ -17,21 +17,2 @@ "use strict";

})();
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -41,20 +22,7 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

Object.defineProperty(exports, "__esModule", { value: true });
exports.parseFeed = exports.getFeed = exports.FeedHandler = void 0;
exports.parseFeed = exports.FeedHandler = exports.getFeed = void 0;
var domhandler_1 = __importDefault(require("domhandler"));
var DomUtils = __importStar(require("domutils"));
var domutils_1 = require("domutils");
Object.defineProperty(exports, "getFeed", { enumerable: true, get: function () { return domutils_1.getFeed; } });
var Parser_1 = require("./Parser");
var FeedItemMediaMedium;
(function (FeedItemMediaMedium) {
FeedItemMediaMedium[FeedItemMediaMedium["image"] = 0] = "image";
FeedItemMediaMedium[FeedItemMediaMedium["audio"] = 1] = "audio";
FeedItemMediaMedium[FeedItemMediaMedium["video"] = 2] = "video";
FeedItemMediaMedium[FeedItemMediaMedium["document"] = 3] = "document";
FeedItemMediaMedium[FeedItemMediaMedium["executable"] = 4] = "executable";
})(FeedItemMediaMedium || (FeedItemMediaMedium = {}));
var FeedItemMediaExpression;
(function (FeedItemMediaExpression) {
FeedItemMediaExpression[FeedItemMediaExpression["sample"] = 0] = "sample";
FeedItemMediaExpression[FeedItemMediaExpression["full"] = 1] = "full";
FeedItemMediaExpression[FeedItemMediaExpression["nonstop"] = 2] = "nonstop";
})(FeedItemMediaExpression || (FeedItemMediaExpression = {}));
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */

@@ -78,3 +46,3 @@ var FeedHandler = /** @class */ (function (_super) {

FeedHandler.prototype.onend = function () {
var feed = getFeed(this.dom);
var feed = (0, domutils_1.getFeed)(this.dom);
if (feed) {

@@ -92,151 +60,2 @@ this.feed = feed;

/**
* Get the feed object from the root of a DOM tree.
*
* @param dom - The DOM to to extract the feed from.
* @returns The feed.
*/
function getFeed(dom) {
var _a, _b;
var feedRoot = getOneElement(isValidFeed, dom);
if (!feedRoot)
return null;
var feed = {};
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (href) {
feed.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
entry.link = href;
}
var description = fetch("summary", children) || fetch("content", children);
if (description) {
entry.description = description;
}
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
}
return feed;
}
exports.getFeed = getFeed;
function getMediaElements(where) {
return getElements("media:content", where).map(function (elem) {
var media = {
medium: elem.attribs.medium,
isDefault: !!elem.attribs.isDefault,
};
if (elem.attribs.url) {
media.url = elem.attribs.url;
}
if (elem.attribs.fileSize) {
media.fileSize = parseInt(elem.attribs.fileSize, 10);
}
if (elem.attribs.type) {
media.type = elem.attribs.type;
}
if (elem.attribs.expression) {
media.expression = elem.attribs
.expression;
}
if (elem.attribs.bitrate) {
media.bitrate = parseInt(elem.attribs.bitrate, 10);
}
if (elem.attribs.framerate) {
media.framerate = parseInt(elem.attribs.framerate, 10);
}
if (elem.attribs.samplingrate) {
media.samplingrate = parseInt(elem.attribs.samplingrate, 10);
}
if (elem.attribs.channels) {
media.channels = parseInt(elem.attribs.channels, 10);
}
if (elem.attribs.duration) {
media.duration = parseInt(elem.attribs.duration, 10);
}
if (elem.attribs.height) {
media.height = parseInt(elem.attribs.height, 10);
}
if (elem.attribs.width) {
media.width = parseInt(elem.attribs.width, 10);
}
if (elem.attribs.lang) {
media.lang = elem.attribs.lang;
}
return media;
});
}
function getElements(tagName, where) {
return DomUtils.getElementsByTagName(tagName, where, true);
}
function getOneElement(tagName, node) {
return DomUtils.getElementsByTagName(tagName, node, true, 1)[0];
}
function fetch(tagName, where, recurse) {
if (recurse === void 0) { recurse = false; }
return DomUtils.textContent(DomUtils.getElementsByTagName(tagName, where, recurse, 1)).trim();
}
function getAttribute(name, elem) {
if (!elem) {
return null;
}
var attribs = elem.attribs;
return attribs[name];
}
function addConditionally(obj, prop, what, where, recurse) {
if (recurse === void 0) { recurse = false; }
var tmp = fetch(what, where, recurse);
if (tmp)
obj[prop] = tmp;
}
function isValidFeed(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
}
/**
* Parse a feed.

@@ -251,4 +70,4 @@ *

new Parser_1.Parser(handler, options).end(feed);
return getFeed(handler.dom);
return (0, domutils_1.getFeed)(handler.dom);
}
exports.parseFeed = parseFeed;

26

lib/Parser.d.ts

@@ -60,3 +60,3 @@ import Tokenizer from "./Tokenizer";

onerror(error: Error): void;
onclosetag(name: string): void;
onclosetag(name: string, isImplied: boolean): void;
onopentagname(name: string): void;

@@ -72,3 +72,3 @@ /**

[s: string]: string;
}): void;
}, isImplied: boolean): void;
ontext(data: string): void;

@@ -87,2 +87,7 @@ oncomment(data: string): void;

endIndex: number;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
private openTagStart;
private tagname;

@@ -99,19 +104,34 @@ private attribname;

constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
private updatePosition;
/** @internal */
ontext(data: string): void;
protected isVoidElement(name: string): boolean;
/** @internal */
onopentagname(name: string): void;
private emitOpenTag;
private endOpenTag;
/** @internal */
onopentagend(): void;
/** @internal */
onclosetag(name: string): void;
/** @internal */
onselfclosingtag(): void;
private closeCurrentTag;
/** @internal */
onattribname(name: string): void;
/** @internal */
onattribdata(value: string): void;
/** @internal */
onattribend(quote: string | undefined | null): void;
private getInstructionName;
/** @internal */
ondeclaration(value: string): void;
/** @internal */
onprocessinginstruction(value: string): void;
/** @internal */
oncomment(value: string): void;
/** @internal */
oncdata(value: string): void;
/** @internal */
onerror(err: Error): void;
/** @internal */
onend(): void;

@@ -118,0 +138,0 @@ /**

@@ -99,3 +99,3 @@ "use strict";

"annotation-xml",
"foreignObject",
"foreignobject",
"desc",

@@ -114,2 +114,7 @@ "title",

this.endIndex = 0;
/**
* Store the start index of the current open tag,
* so we can update the start index for attributes.
*/
this.openTagStart = 0;
this.tagname = "";

@@ -121,3 +126,2 @@ this.attribname = "";

this.foreignContext = [];
this.options = options;
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};

@@ -130,12 +134,10 @@ this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;

}
Parser.prototype.updatePosition = function (offset) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();
};
// Tokenizer event handlers
/** @internal */
Parser.prototype.ontext = function (data) {
var _a, _b;
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
var idx = this.tokenizer.getAbsoluteIndex();
this.endIndex = idx;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
this.startIndex = idx;
};

@@ -145,8 +147,13 @@ Parser.prototype.isVoidElement = function (name) {

};
/** @internal */
Parser.prototype.onopentagname = function (name) {
var _a, _b, _c, _d;
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
this.emitOpenTag(name);
};
Parser.prototype.emitOpenTag = function (name) {
var _a, _b, _c, _d;
this.openTagStart = this.startIndex;
this.tagname = name;

@@ -158,3 +165,3 @@ var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);

var el = this.stack.pop();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el);
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el, true);
}

@@ -175,16 +182,25 @@ }

};
Parser.prototype.onopentagend = function () {
Parser.prototype.endOpenTag = function (isImplied) {
var _a, _b;
this.startIndex = this.openTagStart;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.attribs) {
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs);
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
this.attribs = null;
}
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
this.cbs.onclosetag(this.tagname);
this.cbs.onclosetag(this.tagname, true);
}
this.tagname = "";
};
/** @internal */
Parser.prototype.onopentagend = function () {
this.endOpenTag(false);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onclosetag = function (name) {
this.updatePosition(1);
var _a, _b, _c, _d, _e, _f;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.lowerCaseTagNames) {

@@ -197,10 +213,10 @@ name = name.toLowerCase();

}
if (this.stack.length && !this.isVoidElement(name)) {
if (!this.isVoidElement(name)) {
var pos = this.stack.lastIndexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
pos = this.stack.length - pos;
while (pos--) {
var count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop());
this.cbs.onclosetag(this.stack.pop(), pos !== 0);
}

@@ -211,12 +227,17 @@ }

}
else if (name === "p" && !this.options.xmlMode) {
this.onopentagname(name);
this.closeCurrentTag();
else if (!this.options.xmlMode && name === "p") {
this.emitOpenTag(name);
this.closeCurrentTag(true);
}
}
else if (!this.options.xmlMode && (name === "br" || name === "p")) {
this.onopentagname(name);
this.closeCurrentTag();
else if (!this.options.xmlMode && name === "br") {
// We can't go through `emitOpenTag` here, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, name, {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, name, false);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onselfclosingtag = function () {

@@ -226,22 +247,23 @@ if (this.options.xmlMode ||

this.foreignContext[this.foreignContext.length - 1]) {
this.closeCurrentTag();
this.closeCurrentTag(false);
}
else {
// Ignore the fact that the tag is self-closing.
this.onopentagend();
}
};
Parser.prototype.closeCurrentTag = function () {
Parser.prototype.closeCurrentTag = function (isOpenImplied) {
var _a, _b;
var name = this.tagname;
this.onopentagend();
/*
* Self-closing tags will be on the top of the stack
* (cheaper check than in onclosetag)
*/
this.endOpenTag(isOpenImplied);
// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name);
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
}
};
/** @internal */
Parser.prototype.onattribname = function (name) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
if (this.lowerCaseAttributeNames) {

@@ -252,7 +274,10 @@ name = name.toLowerCase();

};
/** @internal */
Parser.prototype.onattribdata = function (value) {
this.attribvalue += value;
};
/** @internal */
Parser.prototype.onattribend = function (quote) {
var _a, _b;
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);

@@ -274,25 +299,35 @@ if (this.attribs &&

};
/** @internal */
Parser.prototype.ondeclaration = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_1 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onprocessinginstruction = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
var name_2 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.oncomment = function (value) {
var _a, _b, _c, _d;
this.updatePosition(4);
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.oncdata = function (value) {
var _a, _b, _c, _d, _e, _f;
this.updatePosition(1);
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.options.xmlMode || this.options.recognizeCDATA) {

@@ -304,5 +339,9 @@ (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);

else {
this.oncomment("[CDATA[" + value + "]]");
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[" + value + "]]");
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
};
/** @internal */
Parser.prototype.onerror = function (err) {

@@ -312,6 +351,9 @@ var _a, _b;

};
/** @internal */
Parser.prototype.onend = function () {
var _a, _b;
if (this.cbs.onclosetag) {
for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i]))
// Set the end index for all remaining tags
this.endIndex = this.startIndex;
for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i], true))
;

@@ -332,2 +374,4 @@ }

this.stack = [];
this.startIndex = 0;
this.endIndex = 0;
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);

@@ -334,0 +378,0 @@ };

@@ -112,6 +112,6 @@ /** All the states the tokenizer can be in. */

private readonly entityTrie;
constructor(options: {
constructor({ xmlMode, decodeEntities, }: {
xmlMode?: boolean;
decodeEntities?: boolean;
} | null, cbs: Callbacks);
}, cbs: Callbacks);
reset(): void;

@@ -179,2 +179,5 @@ write(chunk: string): void;

private allowLegacyEntity;
/**
* Remove data that has already been consumed from the buffer.
*/
private cleanup;

@@ -188,2 +191,3 @@ /**

private finish;
/** Handle any trailing data. */
private handleTrailingData;

@@ -190,0 +194,0 @@ private getSection;

@@ -61,4 +61,4 @@ "use strict";

var Tokenizer = /** @class */ (function () {
function Tokenizer(options, cbs) {
var _a;
function Tokenizer(_a, cbs) {
var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
this.cbs = cbs;

@@ -90,6 +90,5 @@ /** The current state the tokenizer is in. */

this.trieExcess = 0;
this.cbs = cbs;
this.xmlMode = !!(options === null || options === void 0 ? void 0 : options.xmlMode);
this.decodeEntities = (_a = options === null || options === void 0 ? void 0 : options.decodeEntities) !== null && _a !== void 0 ? _a : true;
this.entityTrie = this.xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
}

@@ -109,4 +108,7 @@ Tokenizer.prototype.reset = function () {

if (this.ended)
this.cbs.onerror(Error(".write() after done!"));
this.buffer += chunk;
return this.cbs.onerror(Error(".write() after done!"));
if (this.buffer.length)
this.buffer += chunk;
else
this.buffer = chunk;
this.parse();

@@ -116,3 +118,3 @@ };

if (this.ended)
this.cbs.onerror(Error(".end() after done!"));
return this.cbs.onerror(Error(".end() after done!"));
if (chunk)

@@ -527,5 +529,6 @@ this.write(chunk);

this.trieExcess += 1;
this.trieIndex = decode_1.determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this._index--;
return;

@@ -557,3 +560,2 @@ }

this._state = this.baseState;
this._index--;
};

@@ -566,4 +568,4 @@ Tokenizer.prototype.decodeNumericEntity = function (base, strict) {

var parsed = parseInt(entity, base);
this.emitPartial(decode_codepoint_1.default(parsed));
this.sectionStart = strict ? this._index + 1 : this._index;
this.emitPartial((0, decode_codepoint_1.default)(parsed));
this.sectionStart = this._index + Number(strict);
}

@@ -605,29 +607,20 @@ this._state = this.baseState;

};
/**
* Remove data that has already been consumed from the buffer.
*/
Tokenizer.prototype.cleanup = function () {
if (this.sectionStart < 0) {
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
// If we are inside of text, emit what we already have.
if (this.running &&
this._state === 1 /* Text */ &&
this.sectionStart !== this._index) {
// TODO: We could emit attribute data here as well.
this.cbs.ontext(this.buffer.substr(this.sectionStart));
this.sectionStart = this._index;
}
else if (this.running) {
if (this._state === 1 /* Text */) {
if (this.sectionStart !== this._index) {
this.cbs.ontext(this.buffer.substr(this.sectionStart));
}
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
}
else if (this.sectionStart === this._index) {
// The section just started
this.buffer = "";
this.bufferOffset += this._index;
this._index = 0;
}
else {
// Remove everything unnecessary
this.buffer = this.buffer.substr(this.sectionStart);
this._index -= this.sectionStart;
this.bufferOffset += this.sectionStart;
}
var start = this.sectionStart < 0 ? this._index : this.sectionStart;
this.buffer =
start === this.buffer.length ? "" : this.buffer.substr(start);
this._index -= start;
this.bufferOffset += start;
if (this.sectionStart > 0) {
this.sectionStart = 0;

@@ -838,10 +831,7 @@ }

this.stateInNumericEntity(c);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
}
else if (this._state === 63 /* BeforeNumericEntity */) {
else {
// `this._state === State.BeforeNumericEntity`
stateBeforeNumericEntity(this, c);
}
else {
this.cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;

@@ -858,2 +848,3 @@ }

};
/** Handle any trailing data. */
Tokenizer.prototype.handleTrailingData = function () {

@@ -882,29 +873,25 @@ var data = this.buffer.substr(this.sectionStart);

this.decodeNumericEntity(10, false);
if (this.sectionStart < this._index) {
this._state = this.baseState;
this.handleTrailingData();
}
// All trailing data will have been consumed
}
else if (this._state === 66 /* InHexEntity */ && !this.xmlMode) {
this.decodeNumericEntity(16, false);
if (this.sectionStart < this._index) {
this._state = this.baseState;
this.handleTrailingData();
}
// All trailing data will have been consumed
}
else if (this._state !== 3 /* InTagName */ &&
this._state !== 8 /* BeforeAttributeName */ &&
this._state !== 11 /* BeforeAttributeValue */ &&
this._state !== 10 /* AfterAttributeName */ &&
this._state !== 9 /* InAttributeName */ &&
this._state !== 13 /* InAttributeValueSq */ &&
this._state !== 12 /* InAttributeValueDq */ &&
this._state !== 14 /* InAttributeValueNq */ &&
this._state !== 6 /* InClosingTagName */) {
else if (this._state === 3 /* InTagName */ ||
this._state === 8 /* BeforeAttributeName */ ||
this._state === 11 /* BeforeAttributeValue */ ||
this._state === 10 /* AfterAttributeName */ ||
this._state === 9 /* InAttributeName */ ||
this._state === 13 /* InAttributeValueSq */ ||
this._state === 12 /* InAttributeValueDq */ ||
this._state === 14 /* InAttributeValueNq */ ||
this._state === 6 /* InClosingTagName */) {
/*
* If we are currently in an opening or closing tag, us not calling the
* respective callback signals that the tag should be ignored.
*/
}
else {
this.cbs.ontext(data);
}
/*
* Else, ignore remaining data
* TODO add a way to remove current tag
*/
};

@@ -916,3 +903,3 @@ Tokenizer.prototype.getSection = function () {

if (this.baseState !== 1 /* Text */) {
this.cbs.onattribdata(value); // TODO implement the new event
this.cbs.onattribdata(value);
}

@@ -919,0 +906,0 @@ else {

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "7.0.0",
"version": "7.1.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -53,3 +53,3 @@ "funding": [

"domhandler": "^4.0.0",
"domutils": "^2.5.2",
"domutils": "^2.8.0",
"entities": "^3.0.1"

@@ -59,11 +59,11 @@ },

"@types/jest": "^27.0.1",
"@types/node": "^16.6.2",
"@typescript-eslint/eslint-plugin": "^4.29.2",
"@typescript-eslint/parser": "^4.29.2",
"@types/node": "^16.7.2",
"@typescript-eslint/eslint-plugin": "^4.29.3",
"@typescript-eslint/parser": "^4.29.3",
"eslint": "^7.32.0",
"eslint-config-prettier": "^8.1.0",
"jest": "^27.0.1",
"jest": "^27.1.0",
"prettier": "^2.1.1",
"ts-jest": "^27.0.5",
"typescript": "^4.3.5"
"typescript": "^4.4.2"
},

@@ -70,0 +70,0 @@ "jest": {

@@ -129,15 +129,18 @@ # htmlparser2

At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):
At the time of writing, the latest versions of all supported parsers show the following performance characteristics on GitHub Actions (sourced from [here](https://github.com/AndreasMadsen/htmlparser-benchmark/blob/e78cd8fc6c2adac08deedd4f274c33537451186b/stats.txt)):
```
gumbo-parser : 34.9208 ms/file ± 21.4238
html-parser : 24.8224 ms/file ± 15.8703
html5 : 419.597 ms/file ± 264.265
htmlparser : 60.0722 ms/file ± 384.844
htmlparser2-dom: 12.0749 ms/file ± 6.49474
htmlparser2 : 7.49130 ms/file ± 5.74368
hubbub : 30.4980 ms/file ± 16.4682
libxmljs : 14.1338 ms/file ± 18.6541
parse5 : 22.0439 ms/file ± 15.3743
sax : 49.6513 ms/file ± 26.6032
htmlparser2 : 2.17215 ms/file ± 3.81587
node-html-parser : 2.35983 ms/file ± 1.54487
html5parser : 2.43468 ms/file ± 2.81501
neutron-html5parser: 2.61356 ms/file ± 1.70324
htmlparser2-dom : 3.09034 ms/file ± 4.77033
html-dom-parser : 3.56804 ms/file ± 5.15621
libxmljs : 4.07490 ms/file ± 2.99869
htmljs-parser : 6.15812 ms/file ± 7.52497
parse5 : 9.70406 ms/file ± 6.74872
htmlparser : 15.0596 ms/file ± 89.0826
html-parser : 28.6282 ms/file ± 22.6652
saxes : 45.7921 ms/file ± 128.691
html5 : 120.844 ms/file ± 153.944
```

@@ -147,5 +150,4 @@

This module started as a fork of the `htmlparser` module.
The main difference is that `htmlparser2` is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)).
`htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
In 2011, this module started as a fork of the `htmlparser` module.
`htmlparser2` was rewritten multiple times and, while it maintains an API that's mostly compatible with `htmlparser` in most cases, the projects don't share any code anymore.

@@ -152,0 +154,0 @@ The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)).

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc