htmlparser2 - npm Package Compare versions

21

lib/esm/index.d.ts

		@@ -10,3 +10,3 @@ import { Parser, ParserOptions } from "./Parser.js";
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		*/
		@@ -21,3 +21,3 @@ export declare function parseDocument(data: string, options?: Options): Document;
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		* @deprecated Use `parseDocument` instead.
		@@ -29,11 +29,20 @@ */
		*
		* @param callback A callback that will be called once parsing has been completed.
		* @param options Optional options for the parser and DOM builder.
		* @param callback A callback that will be called once parsing has been completed, with the resulting document.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		*/
		export declare function createDocumentStream(callback: (error: Error \| null, document: Document) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
		/**
		* Creates a parser instance, with an attached DOM handler.
		*
		* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		* @deprecated Use `createDocumentStream` instead.
		*/
		export declare function createDomStream(callback: (error: Error \| null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
		export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
		export * as ElementType from "domelementtype";
		import { Feed } from "domutils";
		export { getFeed } from "domutils";
		import { type Feed } from "domutils";
		export { getFeed, type Feed } from "domutils";
		/**
		@@ -40,0 +49,0 @@ * Parse a feed.

20

lib/esm/index.js

		@@ -12,3 +12,3 @@ import { Parser } from "./Parser.js";
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		*/
		@@ -27,3 +27,3 @@ export function parseDocument(data, options) {
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		* @deprecated Use `parseDocument` instead.
		@@ -37,6 +37,18 @@ */
		*
		* @param callback A callback that will be called once parsing has been completed.
		* @param options Optional options for the parser and DOM builder.
		* @param callback A callback that will be called once parsing has been completed, with the resulting document.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		*/
		export function createDocumentStream(callback, options, elementCallback) {
		const handler = new DomHandler((error) => callback(error, handler.root), options, elementCallback);
		return new Parser(handler, options);
		}
		/**
		* Creates a parser instance, with an attached DOM handler.
		*
		* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		* @deprecated Use `createDocumentStream` instead.
		*/
		export function createDomStream(callback, options, elementCallback) {
		@@ -43,0 +55,0 @@ const handler = new DomHandler(callback, options, elementCallback);

9

lib/esm/Parser.d.ts

		@@ -95,2 +95,3 @@ import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";
		private readonly stack;
		/** Determines whether self-closing tags are recognized. */
		private readonly foreignContext;
		@@ -100,2 +101,4 @@ private readonly cbs;
		private readonly lowerCaseAttributeNames;
		/** We are parsing HTML. Inverse of the `xmlMode` option. */
		private readonly htmlMode;
		private readonly tokenizer;
		@@ -112,3 +115,7 @@ private readonly buffers;
		/** @internal */
		ontextentity(cp: number): void;
		ontextentity(cp: number, endIndex: number): void;
		/**
		* Checks if the current tag is a void element. Override this if you want
		* to specify your own additional void elements.
		*/
		protected isVoidElement(name: string): boolean;
		@@ -115,0 +122,0 @@ /** @internal */

95

lib/esm/Parser.js

		@@ -117,3 +117,2 @@ import Tokenizer, { QuoteType } from "./Tokenizer.js";
		this.stack = [];
		this.foreignContext = [];
		this.buffers = [];
		@@ -126,6 +125,8 @@ this.bufferOffset = 0;
		this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
		this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
		this.htmlMode = !this.options.xmlMode;
		this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
		this.lowerCaseAttributeNames =
		(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
		(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
		this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this);
		this.foreignContext = [!this.htmlMode];
		(_e = (_d = this.cbs).onparserinit) === null \|\| _e === void 0 ? void 0 : _e.call(_d, this);
		@@ -143,15 +144,14 @@ }
		/** @internal */
		ontextentity(cp) {
		ontextentity(cp, endIndex) {
		var _a, _b;
		/*
		* Entities can be emitted on the character, or directly after.
		* We use the section start here to get accurate indices.
		*/
		const index = this.tokenizer.getSectionStart();
		this.endIndex = index - 1;
		this.endIndex = endIndex - 1;
		(_b = (_a = this.cbs).ontext) === null \|\| _b === void 0 ? void 0 : _b.call(_a, fromCodePoint(cp));
		this.startIndex = index;
		this.startIndex = endIndex;
		}
		/**
		* Checks if the current tag is a void element. Override this if you want
		* to specify your own additional void elements.
		*/
		isVoidElement(name) {
		return !this.options.xmlMode && voidElements.has(name);
		return this.htmlMode && voidElements.has(name);
		}
		@@ -171,7 +171,6 @@ /** @internal */
		this.tagname = name;
		const impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
		const impliesClose = this.htmlMode && openImpliesClose.get(name);
		if (impliesClose) {
		while (this.stack.length > 0 &&
		impliesClose.has(this.stack[this.stack.length - 1])) {
		const element = this.stack.pop();
		while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
		const element = this.stack.shift();
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, element, true);
		@@ -181,9 +180,11 @@ }
		if (!this.isVoidElement(name)) {
		this.stack.push(name);
		if (foreignContextElements.has(name)) {
		this.foreignContext.push(true);
		this.stack.unshift(name);
		if (this.htmlMode) {
		if (foreignContextElements.has(name)) {
		this.foreignContext.unshift(true);
		}
		else if (htmlIntegrationElements.has(name)) {
		this.foreignContext.unshift(false);
		}
		}
		else if (htmlIntegrationElements.has(name)) {
		this.foreignContext.push(false);
		}
		}
		@@ -215,3 +216,3 @@ (_d = (_c = this.cbs).onopentagname) === null \|\| _d === void 0 ? void 0 : _d.call(_c, name);
		onclosetag(start, endIndex) {
		var _a, _b, _c, _d, _e, _f;
		var _a, _b, _c, _d, _e, _f, _g, _h;
		this.endIndex = endIndex;
		@@ -222,20 +223,17 @@ let name = this.getSlice(start, endIndex);
		}
		if (foreignContextElements.has(name) \|\|
		htmlIntegrationElements.has(name)) {
		this.foreignContext.pop();
		if (this.htmlMode &&
		(foreignContextElements.has(name) \|\|
		htmlIntegrationElements.has(name))) {
		this.foreignContext.shift();
		}
		if (!this.isVoidElement(name)) {
		const pos = this.stack.lastIndexOf(name);
		const pos = this.stack.indexOf(name);
		if (pos !== -1) {
		if (this.cbs.onclosetag) {
		let count = this.stack.length - pos;
		while (count--) {
		// We know the stack has sufficient elements.
		this.cbs.onclosetag(this.stack.pop(), count !== 0);
		}
		for (let index = 0; index <= pos; index++) {
		const element = this.stack.shift();
		// We know the stack has sufficient elements.
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
		}
		else
		this.stack.length = pos;
		}
		else if (!this.options.xmlMode && name === "p") {
		else if (this.htmlMode && name === "p") {
		// Implicit open before close
		@@ -246,7 +244,7 @@ this.emitOpenTag("p");
		}
		else if (!this.options.xmlMode && name === "br") {
		else if (this.htmlMode && name === "br") {
		// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
		(_b = (_a = this.cbs).onopentagname) === null \|\| _b === void 0 ? void 0 : _b.call(_a, "br");
		(_d = (_c = this.cbs).onopentag) === null \|\| _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
		(_f = (_e = this.cbs).onclosetag) === null \|\| _f === void 0 ? void 0 : _f.call(_e, "br", false);
		(_d = (_c = this.cbs).onopentagname) === null \|\| _d === void 0 ? void 0 : _d.call(_c, "br");
		(_f = (_e = this.cbs).onopentag) === null \|\| _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
		(_h = (_g = this.cbs).onclosetag) === null \|\| _h === void 0 ? void 0 : _h.call(_g, "br", false);
		}
		@@ -259,5 +257,3 @@ // Set `startIndex` for next node
		this.endIndex = endIndex;
		if (this.options.xmlMode \|\|
		this.options.recognizeSelfClosing \|\|
		this.foreignContext[this.foreignContext.length - 1]) {
		if (this.options.recognizeSelfClosing \|\| this.foreignContext[0]) {
		this.closeCurrentTag(false);
		@@ -277,6 +273,6 @@ // Set `startIndex` for next node
		// Self-closing tags will be on the top of the stack
		if (this.stack[this.stack.length - 1] === name) {
		if (this.stack[0] === name) {
		// If the opening tag isn't implied, the closing tag has to be implied.
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
		this.stack.pop();
		this.stack.shift();
		}
		@@ -361,3 +357,3 @@ }
		const value = this.getSlice(start, endIndex - offset);
		if (this.options.xmlMode \|\| this.options.recognizeCDATA) {
		if (!this.htmlMode \|\| this.options.recognizeCDATA) {
		(_b = (_a = this.cbs).oncdatastart) === null \|\| _b === void 0 ? void 0 : _b.call(_a);
		@@ -380,4 +376,5 @@ (_d = (_c = this.cbs).ontext) === null \|\| _d === void 0 ? void 0 : _d.call(_c, value);
		this.endIndex = this.startIndex;
		for (let index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
		;
		for (let index = 0; index < this.stack.length; index++) {
		this.cbs.onclosetag(this.stack[index], true);
		}
		}
		@@ -401,2 +398,4 @@ (_b = (_a = this.cbs).onend) === null \|\| _b === void 0 ? void 0 : _b.call(_a);
		this.buffers.length = 0;
		this.foreignContext.length = 0;
		this.foreignContext.unshift(!this.htmlMode);
		this.bufferOffset = 0;
		@@ -403,0 +402,0 @@ this.writeIndex = 0;

30

lib/esm/Tokenizer.d.ts

		@@ -22,3 +22,3 @@ export declare enum QuoteType {
		ontext(start: number, endIndex: number): void;
		ontextentity(codepoint: number): void;
		ontextentity(codepoint: number, endIndex: number): void;
		}
		@@ -35,2 +35,4 @@ export default class Tokenizer {
		private index;
		/** The start of the last entity. */
		private entityStart;
		/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
		@@ -46,3 +48,3 @@ private baseState;
		private readonly decodeEntities;
		private readonly entityTrie;
		private readonly entityDecoder;
		constructor({ xmlMode, decodeEntities, }: {
		@@ -57,10 +59,2 @@ xmlMode?: boolean;
		resume(): void;
		/**
		* The current index within all of the written data.
		*/
		getIndex(): number;
		/**
		* The start of the current section.
		*/
		getSectionStart(): number;
		private stateText;
		@@ -117,15 +111,4 @@ private currentSequence;
		private stateBeforeSpecialS;
		private trieIndex;
		private trieCurrent;
		/** For named entities, the index of the value. For numeric entities, the code point. */
		private entityResult;
		private entityExcess;
		private stateBeforeEntity;
		private stateInNamedEntity;
		private emitNamedEntity;
		private stateBeforeNumericEntity;
		private emitNumericEntity;
		private stateInNumericEntity;
		private stateInHexEntity;
		private allowLegacyEntity;
		private startEntity;
		private stateInEntity;
		/**
		@@ -145,5 +128,4 @@ * Remove data that has already been consumed from the buffer.
		private handleTrailingData;
		private emitPartial;
		private emitCodePoint;
		}
		//# sourceMappingURL=Tokenizer.d.ts.map

273

lib/esm/Tokenizer.js

		@@ -1,2 +0,2 @@
		import { htmlDecodeTree, xmlDecodeTree, BinTrieFlags, determineBranch, replaceCodePoint, } from "entities/lib/decode.js";
		import { EntityDecoder, DecodingMode, htmlDecodeTree, xmlDecodeTree, } from "entities/lib/decode.js";
		var CharCodes;
		@@ -64,7 +64,3 @@ (function (CharCodes) {
		State[State["InSpecialTag"] = 24] = "InSpecialTag";
		State[State["BeforeEntity"] = 25] = "BeforeEntity";
		State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
		State[State["InNamedEntity"] = 27] = "InNamedEntity";
		State[State["InNumericEntity"] = 28] = "InNumericEntity";
		State[State["InHexEntity"] = 29] = "InHexEntity";
		State[State["InEntity"] = 25] = "InEntity";
		})(State \|\| (State = {}));
		@@ -81,5 +77,2 @@ function isWhitespace(c) {
		}
		function isNumber(c) {
		return c >= CharCodes.Zero && c <= CharCodes.Nine;
		}
		function isASCIIAlpha(c) {
		@@ -89,6 +82,2 @@ return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) \|\|
		}
		function isHexDigit(c) {
		return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) \|\|
		(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
		}
		export var QuoteType;
		@@ -126,2 +115,4 @@ (function (QuoteType) {
		this.index = 0;
		/** The start of the last entity. */
		this.entityStart = 0;
		/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
		@@ -137,10 +128,5 @@ this.baseState = State.Text;
		this.sequenceIndex = 0;
		this.trieIndex = 0;
		this.trieCurrent = 0;
		/** For named entities, the index of the value. For numeric entities, the code point. */
		this.entityResult = 0;
		this.entityExcess = 0;
		this.xmlMode = xmlMode;
		this.decodeEntities = decodeEntities;
		this.entityTrie = xmlMode ? xmlDecodeTree : htmlDecodeTree;
		this.entityDecoder = new EntityDecoder(xmlMode ? xmlDecodeTree : htmlDecodeTree, (cp, consumed) => this.emitCodePoint(cp, consumed));
		}
		@@ -175,14 +161,2 @@ reset() {
		}
		/**
		* The current index within all of the written data.
		*/
		getIndex() {
		return this.index;
		}
		/**
		* The start of the current section.
		*/
		getSectionStart() {
		return this.sectionStart;
		}
		stateText(c) {
		@@ -198,3 +172,3 @@ if (c === CharCodes.Lt \|\|
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -246,3 +220,3 @@ }
		if (this.decodeEntities && c === CharCodes.Amp) {
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -408,3 +382,2 @@ }
		this.state = State.Text;
		this.baseState = State.Text;
		this.sectionStart = this.index + 1;
		@@ -423,3 +396,2 @@ }
		}
		this.baseState = this.state;
		this.sectionStart = this.index + 1;
		@@ -439,3 +411,2 @@ }
		this.state = State.Text;
		this.baseState = State.Text;
		this.sectionStart = this.index + 1;
		@@ -498,4 +469,3 @@ this.isSpecial = false; // Reset special state, in case of self-closing special tags
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.baseState = this.state;
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -518,4 +488,3 @@ }
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.baseState = this.state;
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -581,146 +550,27 @@ }
		}
		stateBeforeEntity(c) {
		// Start excess with 1 to include the '&'
		this.entityExcess = 1;
		this.entityResult = 0;
		if (c === CharCodes.Number) {
		this.state = State.BeforeNumericEntity;
		}
		else if (c === CharCodes.Amp) {
		// We have two `&` characters in a row. Stay in the current state.
		}
		else {
		this.trieIndex = 0;
		this.trieCurrent = this.entityTrie[0];
		this.state = State.InNamedEntity;
		this.stateInNamedEntity(c);
		}
		startEntity() {
		this.baseState = this.state;
		this.state = State.InEntity;
		this.entityStart = this.index;
		this.entityDecoder.startEntity(this.xmlMode
		? DecodingMode.Strict
		: this.baseState === State.Text \|\|
		this.baseState === State.InSpecialTag
		? DecodingMode.Legacy
		: DecodingMode.Attribute);
		}
		stateInNamedEntity(c) {
		this.entityExcess += 1;
		this.trieIndex = determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
		if (this.trieIndex < 0) {
		this.emitNamedEntity();
		this.index--;
		return;
		}
		this.trieCurrent = this.entityTrie[this.trieIndex];
		const masked = this.trieCurrent & BinTrieFlags.VALUE_LENGTH;
		// If the branch is a value, store it and continue
		if (masked) {
		// The mask is the number of bytes of the value, including the current byte.
		const valueLength = (masked >> 14) - 1;
		// If we have a legacy entity while parsing strictly, just skip the number of bytes
		if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
		this.trieIndex += valueLength;
		stateInEntity() {
		const length = this.entityDecoder.write(this.buffer, this.index - this.offset);
		// If `length` is positive, we are done with the entity.
		if (length >= 0) {
		this.state = this.baseState;
		if (length === 0) {
		this.index = this.entityStart;
		}
		else {
		// Add 1 as we have already incremented the excess
		const entityStart = this.index - this.entityExcess + 1;
		if (entityStart > this.sectionStart) {
		this.emitPartial(this.sectionStart, entityStart);
		}
		// If this is a surrogate pair, consume the next two bytes
		this.entityResult = this.trieIndex;
		this.trieIndex += valueLength;
		this.entityExcess = 0;
		this.sectionStart = this.index + 1;
		if (valueLength === 0) {
		this.emitNamedEntity();
		}
		}
		}
		}
		emitNamedEntity() {
		this.state = this.baseState;
		if (this.entityResult === 0) {
		return;
		}
		const valueLength = (this.entityTrie[this.entityResult] & BinTrieFlags.VALUE_LENGTH) >>
		14;
		switch (valueLength) {
		case 1: {
		this.emitCodePoint(this.entityTrie[this.entityResult] &
		~BinTrieFlags.VALUE_LENGTH);
		break;
		}
		case 2: {
		this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
		break;
		}
		case 3: {
		this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
		this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
		}
		}
		}
		stateBeforeNumericEntity(c) {
		if ((c \| 0x20) === CharCodes.LowerX) {
		this.entityExcess++;
		this.state = State.InHexEntity;
		}
		else {
		this.state = State.InNumericEntity;
		this.stateInNumericEntity(c);
		// Mark buffer as consumed.
		this.index = this.offset + this.buffer.length - 1;
		}
		}
		emitNumericEntity(strict) {
		const entityStart = this.index - this.entityExcess - 1;
		const numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
		if (numberStart !== this.index) {
		// Emit leading data if any
		if (entityStart > this.sectionStart) {
		this.emitPartial(this.sectionStart, entityStart);
		}
		this.sectionStart = this.index + Number(strict);
		this.emitCodePoint(replaceCodePoint(this.entityResult));
		}
		this.state = this.baseState;
		}
		stateInNumericEntity(c) {
		if (c === CharCodes.Semi) {
		this.emitNumericEntity(true);
		}
		else if (isNumber(c)) {
		this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
		this.entityExcess++;
		}
		else {
		if (this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		}
		else {
		this.state = this.baseState;
		}
		this.index--;
		}
		}
		stateInHexEntity(c) {
		if (c === CharCodes.Semi) {
		this.emitNumericEntity(true);
		}
		else if (isNumber(c)) {
		this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
		this.entityExcess++;
		}
		else if (isHexDigit(c)) {
		this.entityResult =
		this.entityResult * 16 + ((c \| 0x20) - CharCodes.LowerA + 10);
		this.entityExcess++;
		}
		else {
		if (this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		}
		else {
		this.state = this.baseState;
		}
		this.index--;
		}
		}
		allowLegacyEntity() {
		return (!this.xmlMode &&
		(this.baseState === State.Text \|\|
		this.baseState === State.InSpecialTag));
		}
		/**
		@@ -853,22 +703,6 @@ * Remove data that has already been consumed from the buffer.
		}
		case State.InNamedEntity: {
		this.stateInNamedEntity(c);
		case State.InEntity: {
		this.stateInEntity();
		break;
		}
		case State.BeforeEntity: {
		this.stateBeforeEntity(c);
		break;
		}
		case State.InHexEntity: {
		this.stateInHexEntity(c);
		break;
		}
		case State.InNumericEntity: {
		this.stateInNumericEntity(c);
		break;
		}
		default: {
		// `this._state === State.BeforeNumericEntity`
		this.stateBeforeNumericEntity(c);
		}
		}
		@@ -880,9 +714,7 @@ this.index++;
		finish() {
		if (this.state === State.InNamedEntity) {
		this.emitNamedEntity();
		if (this.state === State.InEntity) {
		this.entityDecoder.end();
		this.state = this.baseState;
		}
		// If there is remaining data, emit it in a reasonable way
		if (this.sectionStart < this.index) {
		this.handleTrailingData();
		}
		this.handleTrailingData();
		this.cbs.onend();
		@@ -893,2 +725,6 @@ }
		const endIndex = this.buffer.length + this.offset;
		// If there is no remaining data, we are done.
		if (this.sectionStart >= endIndex) {
		return;
		}
		if (this.state === State.InCommentLike) {
		@@ -902,12 +738,2 @@ if (this.currentSequence === Sequences.CdataEnd) {
		}
		else if (this.state === State.InNumericEntity &&
		this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		// All trailing data will have been consumed
		}
		else if (this.state === State.InHexEntity &&
		this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		// All trailing data will have been consumed
		}
		else if (this.state === State.InTagName \|\|
		@@ -931,18 +757,19 @@ this.state === State.BeforeAttributeName \|\|
		}
		emitPartial(start, endIndex) {
		emitCodePoint(cp, consumed) {
		if (this.baseState !== State.Text &&
		this.baseState !== State.InSpecialTag) {
		this.cbs.onattribdata(start, endIndex);
		}
		else {
		this.cbs.ontext(start, endIndex);
		}
		}
		emitCodePoint(cp) {
		if (this.baseState !== State.Text &&
		this.baseState !== State.InSpecialTag) {
		if (this.sectionStart < this.entityStart) {
		this.cbs.onattribdata(this.sectionStart, this.entityStart);
		}
		this.sectionStart = this.entityStart + consumed;
		this.index = this.sectionStart - 1;
		this.cbs.onattribentity(cp);
		}
		else {
		this.cbs.ontextentity(cp);
		if (this.sectionStart < this.entityStart) {
		this.cbs.ontext(this.sectionStart, this.entityStart);
		}
		this.sectionStart = this.entityStart + consumed;
		this.index = this.sectionStart - 1;
		this.cbs.ontextentity(cp, this.sectionStart);
		}
		@@ -949,0 +776,0 @@ }

21

lib/index.d.ts

		@@ -10,3 +10,3 @@ import { Parser, ParserOptions } from "./Parser.js";
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		*/
		@@ -21,3 +21,3 @@ export declare function parseDocument(data: string, options?: Options): Document;
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		* @deprecated Use `parseDocument` instead.
		@@ -29,11 +29,20 @@ */
		*
		* @param callback A callback that will be called once parsing has been completed.
		* @param options Optional options for the parser and DOM builder.
		* @param callback A callback that will be called once parsing has been completed, with the resulting document.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		*/
		export declare function createDocumentStream(callback: (error: Error \| null, document: Document) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
		/**
		* Creates a parser instance, with an attached DOM handler.
		*
		* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		* @deprecated Use `createDocumentStream` instead.
		*/
		export declare function createDomStream(callback: (error: Error \| null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
		export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
		export * as ElementType from "domelementtype";
		import { Feed } from "domutils";
		export { getFeed } from "domutils";
		import { type Feed } from "domutils";
		export { getFeed, type Feed } from "domutils";
		/**
		@@ -40,0 +49,0 @@ * Parse a feed.

23

lib/index.js

		@@ -29,3 +29,3 @@ "use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
		exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
		var Parser_js_1 = require("./Parser.js");
		@@ -44,3 +44,3 @@ var Parser_js_2 = require("./Parser.js");
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		*/
		@@ -60,3 +60,3 @@ function parseDocument(data, options) {
		* @param data The data that should be parsed.
		* @param options Optional options for the parser and DOM builder.
		* @param options Optional options for the parser and DOM handler.
		* @deprecated Use `parseDocument` instead.
		@@ -71,6 +71,19 @@ */
		*
		* @param callback A callback that will be called once parsing has been completed.
		* @param options Optional options for the parser and DOM builder.
		* @param callback A callback that will be called once parsing has been completed, with the resulting document.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		*/
		function createDocumentStream(callback, options, elementCallback) {
		var handler = new domhandler_1.DomHandler(function (error) { return callback(error, handler.root); }, options, elementCallback);
		return new Parser_js_1.Parser(handler, options);
		}
		exports.createDocumentStream = createDocumentStream;
		/**
		* Creates a parser instance, with an attached DOM handler.
		*
		* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
		* @param options Optional options for the parser and DOM handler.
		* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
		* @deprecated Use `createDocumentStream` instead.
		*/
		function createDomStream(callback, options, elementCallback) {
		@@ -77,0 +90,0 @@ var handler = new domhandler_1.DomHandler(callback, options, elementCallback);

9

lib/Parser.d.ts

		@@ -95,2 +95,3 @@ import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";
		private readonly stack;
		/** Determines whether self-closing tags are recognized. */
		private readonly foreignContext;
		@@ -100,2 +101,4 @@ private readonly cbs;
		private readonly lowerCaseAttributeNames;
		/** We are parsing HTML. Inverse of the `xmlMode` option. */
		private readonly htmlMode;
		private readonly tokenizer;
		@@ -112,3 +115,7 @@ private readonly buffers;
		/** @internal */
		ontextentity(cp: number): void;
		ontextentity(cp: number, endIndex: number): void;
		/**
		* Checks if the current tag is a void element. Override this if you want
		* to specify your own additional void elements.
		*/
		protected isVoidElement(name: string): boolean;
		@@ -115,0 +122,0 @@ /** @internal */

95

lib/Parser.js

		@@ -144,3 +144,2 @@ "use strict";
		this.stack = [];
		this.foreignContext = [];
		this.buffers = [];
		@@ -153,6 +152,8 @@ this.bufferOffset = 0;
		this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
		this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
		this.htmlMode = !this.options.xmlMode;
		this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
		this.lowerCaseAttributeNames =
		(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
		(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
		this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
		this.foreignContext = [!this.htmlMode];
		(_e = (_d = this.cbs).onparserinit) === null \|\| _e === void 0 ? void 0 : _e.call(_d, this);
		@@ -170,15 +171,14 @@ }
		/** @internal */
		Parser.prototype.ontextentity = function (cp) {
		Parser.prototype.ontextentity = function (cp, endIndex) {
		var _a, _b;
		/*
		* Entities can be emitted on the character, or directly after.
		* We use the section start here to get accurate indices.
		*/
		var index = this.tokenizer.getSectionStart();
		this.endIndex = index - 1;
		this.endIndex = endIndex - 1;
		(_b = (_a = this.cbs).ontext) === null \|\| _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
		this.startIndex = index;
		this.startIndex = endIndex;
		};
		/**
		* Checks if the current tag is a void element. Override this if you want
		* to specify your own additional void elements.
		*/
		Parser.prototype.isVoidElement = function (name) {
		return !this.options.xmlMode && voidElements.has(name);
		return this.htmlMode && voidElements.has(name);
		};
		@@ -198,7 +198,6 @@ /** @internal */
		this.tagname = name;
		var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
		var impliesClose = this.htmlMode && openImpliesClose.get(name);
		if (impliesClose) {
		while (this.stack.length > 0 &&
		impliesClose.has(this.stack[this.stack.length - 1])) {
		var element = this.stack.pop();
		while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
		var element = this.stack.shift();
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, element, true);
		@@ -208,9 +207,11 @@ }
		if (!this.isVoidElement(name)) {
		this.stack.push(name);
		if (foreignContextElements.has(name)) {
		this.foreignContext.push(true);
		this.stack.unshift(name);
		if (this.htmlMode) {
		if (foreignContextElements.has(name)) {
		this.foreignContext.unshift(true);
		}
		else if (htmlIntegrationElements.has(name)) {
		this.foreignContext.unshift(false);
		}
		}
		else if (htmlIntegrationElements.has(name)) {
		this.foreignContext.push(false);
		}
		}
		@@ -242,3 +243,3 @@ (_d = (_c = this.cbs).onopentagname) === null \|\| _d === void 0 ? void 0 : _d.call(_c, name);
		Parser.prototype.onclosetag = function (start, endIndex) {
		var _a, _b, _c, _d, _e, _f;
		var _a, _b, _c, _d, _e, _f, _g, _h;
		this.endIndex = endIndex;
		@@ -249,20 +250,17 @@ var name = this.getSlice(start, endIndex);
		}
		if (foreignContextElements.has(name) \|\|
		htmlIntegrationElements.has(name)) {
		this.foreignContext.pop();
		if (this.htmlMode &&
		(foreignContextElements.has(name) \|\|
		htmlIntegrationElements.has(name))) {
		this.foreignContext.shift();
		}
		if (!this.isVoidElement(name)) {
		var pos = this.stack.lastIndexOf(name);
		var pos = this.stack.indexOf(name);
		if (pos !== -1) {
		if (this.cbs.onclosetag) {
		var count = this.stack.length - pos;
		while (count--) {
		// We know the stack has sufficient elements.
		this.cbs.onclosetag(this.stack.pop(), count !== 0);
		}
		for (var index = 0; index <= pos; index++) {
		var element = this.stack.shift();
		// We know the stack has sufficient elements.
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
		}
		else
		this.stack.length = pos;
		}
		else if (!this.options.xmlMode && name === "p") {
		else if (this.htmlMode && name === "p") {
		// Implicit open before close
		@@ -273,7 +271,7 @@ this.emitOpenTag("p");
		}
		else if (!this.options.xmlMode && name === "br") {
		else if (this.htmlMode && name === "br") {
		// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
		(_b = (_a = this.cbs).onopentagname) === null \|\| _b === void 0 ? void 0 : _b.call(_a, "br");
		(_d = (_c = this.cbs).onopentag) === null \|\| _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
		(_f = (_e = this.cbs).onclosetag) === null \|\| _f === void 0 ? void 0 : _f.call(_e, "br", false);
		(_d = (_c = this.cbs).onopentagname) === null \|\| _d === void 0 ? void 0 : _d.call(_c, "br");
		(_f = (_e = this.cbs).onopentag) === null \|\| _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
		(_h = (_g = this.cbs).onclosetag) === null \|\| _h === void 0 ? void 0 : _h.call(_g, "br", false);
		}
		@@ -286,5 +284,3 @@ // Set `startIndex` for next node
		this.endIndex = endIndex;
		if (this.options.xmlMode \|\|
		this.options.recognizeSelfClosing \|\|
		this.foreignContext[this.foreignContext.length - 1]) {
		if (this.options.recognizeSelfClosing \|\| this.foreignContext[0]) {
		this.closeCurrentTag(false);
		@@ -304,6 +300,6 @@ // Set `startIndex` for next node
		// Self-closing tags will be on the top of the stack
		if (this.stack[this.stack.length - 1] === name) {
		if (this.stack[0] === name) {
		// If the opening tag isn't implied, the closing tag has to be implied.
		(_b = (_a = this.cbs).onclosetag) === null \|\| _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
		this.stack.pop();
		this.stack.shift();
		}
		@@ -388,3 +384,3 @@ };
		var value = this.getSlice(start, endIndex - offset);
		if (this.options.xmlMode \|\| this.options.recognizeCDATA) {
		if (!this.htmlMode \|\| this.options.recognizeCDATA) {
		(_b = (_a = this.cbs).oncdatastart) === null \|\| _b === void 0 ? void 0 : _b.call(_a);
		@@ -407,4 +403,5 @@ (_d = (_c = this.cbs).ontext) === null \|\| _d === void 0 ? void 0 : _d.call(_c, value);
		this.endIndex = this.startIndex;
		for (var index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
		;
		for (var index = 0; index < this.stack.length; index++) {
		this.cbs.onclosetag(this.stack[index], true);
		}
		}
		@@ -428,2 +425,4 @@ (_b = (_a = this.cbs).onend) === null \|\| _b === void 0 ? void 0 : _b.call(_a);
		this.buffers.length = 0;
		this.foreignContext.length = 0;
		this.foreignContext.unshift(!this.htmlMode);
		this.bufferOffset = 0;
		@@ -430,0 +429,0 @@ this.writeIndex = 0;

30

lib/Tokenizer.d.ts

		@@ -22,3 +22,3 @@ export declare enum QuoteType {
		ontext(start: number, endIndex: number): void;
		ontextentity(codepoint: number): void;
		ontextentity(codepoint: number, endIndex: number): void;
		}
		@@ -35,2 +35,4 @@ export default class Tokenizer {
		private index;
		/** The start of the last entity. */
		private entityStart;
		/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
		@@ -46,3 +48,3 @@ private baseState;
		private readonly decodeEntities;
		private readonly entityTrie;
		private readonly entityDecoder;
		constructor({ xmlMode, decodeEntities, }: {
		@@ -57,10 +59,2 @@ xmlMode?: boolean;
		resume(): void;
		/**
		* The current index within all of the written data.
		*/
		getIndex(): number;
		/**
		* The start of the current section.
		*/
		getSectionStart(): number;
		private stateText;
		@@ -117,15 +111,4 @@ private currentSequence;
		private stateBeforeSpecialS;
		private trieIndex;
		private trieCurrent;
		/** For named entities, the index of the value. For numeric entities, the code point. */
		private entityResult;
		private entityExcess;
		private stateBeforeEntity;
		private stateInNamedEntity;
		private emitNamedEntity;
		private stateBeforeNumericEntity;
		private emitNumericEntity;
		private stateInNumericEntity;
		private stateInHexEntity;
		private allowLegacyEntity;
		private startEntity;
		private stateInEntity;
		/**
		@@ -145,5 +128,4 @@ * Remove data that has already been consumed from the buffer.
		private handleTrailingData;
		private emitPartial;
		private emitCodePoint;
		}
		//# sourceMappingURL=Tokenizer.d.ts.map

272

lib/Tokenizer.js

		@@ -67,7 +67,3 @@ "use strict";
		State[State["InSpecialTag"] = 24] = "InSpecialTag";
		State[State["BeforeEntity"] = 25] = "BeforeEntity";
		State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
		State[State["InNamedEntity"] = 27] = "InNamedEntity";
		State[State["InNumericEntity"] = 28] = "InNumericEntity";
		State[State["InHexEntity"] = 29] = "InHexEntity";
		State[State["InEntity"] = 25] = "InEntity";
		})(State \|\| (State = {}));
		@@ -84,5 +80,2 @@ function isWhitespace(c) {
		}
		function isNumber(c) {
		return c >= CharCodes.Zero && c <= CharCodes.Nine;
		}
		function isASCIIAlpha(c) {
		@@ -92,6 +85,2 @@ return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) \|\|
		}
		function isHexDigit(c) {
		return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) \|\|
		(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
		}
		var QuoteType;
		@@ -121,2 +110,3 @@ (function (QuoteType) {
		var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
		var _this = this;
		this.cbs = cbs;
		@@ -131,2 +121,4 @@ /** The current state the tokenizer is in. */
		this.index = 0;
		/** The start of the last entity. */
		this.entityStart = 0;
		/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
		@@ -142,10 +134,5 @@ this.baseState = State.Text;
		this.sequenceIndex = 0;
		this.trieIndex = 0;
		this.trieCurrent = 0;
		/** For named entities, the index of the value. For numeric entities, the code point. */
		this.entityResult = 0;
		this.entityExcess = 0;
		this.xmlMode = xmlMode;
		this.decodeEntities = decodeEntities;
		this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree;
		this.entityDecoder = new decode_js_1.EntityDecoder(xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree, function (cp, consumed) { return _this.emitCodePoint(cp, consumed); });
		}
		@@ -180,14 +167,2 @@ Tokenizer.prototype.reset = function () {
		};
		/**
		* The current index within all of the written data.
		*/
		Tokenizer.prototype.getIndex = function () {
		return this.index;
		};
		/**
		* The start of the current section.
		*/
		Tokenizer.prototype.getSectionStart = function () {
		return this.sectionStart;
		};
		Tokenizer.prototype.stateText = function (c) {
		@@ -203,3 +178,3 @@ if (c === CharCodes.Lt \|\|
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -251,3 +226,3 @@ };
		if (this.decodeEntities && c === CharCodes.Amp) {
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -413,3 +388,2 @@ }
		this.state = State.Text;
		this.baseState = State.Text;
		this.sectionStart = this.index + 1;
		@@ -428,3 +402,2 @@ }
		}
		this.baseState = this.state;
		this.sectionStart = this.index + 1;
		@@ -444,3 +417,2 @@ }
		this.state = State.Text;
		this.baseState = State.Text;
		this.sectionStart = this.index + 1;
		@@ -503,4 +475,3 @@ this.isSpecial = false; // Reset special state, in case of self-closing special tags
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.baseState = this.state;
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -523,4 +494,3 @@ };
		else if (this.decodeEntities && c === CharCodes.Amp) {
		this.baseState = this.state;
		this.state = State.BeforeEntity;
		this.startEntity();
		}
		@@ -586,146 +556,27 @@ };
		};
		Tokenizer.prototype.stateBeforeEntity = function (c) {
		// Start excess with 1 to include the '&'
		this.entityExcess = 1;
		this.entityResult = 0;
		if (c === CharCodes.Number) {
		this.state = State.BeforeNumericEntity;
		}
		else if (c === CharCodes.Amp) {
		// We have two `&` characters in a row. Stay in the current state.
		}
		else {
		this.trieIndex = 0;
		this.trieCurrent = this.entityTrie[0];
		this.state = State.InNamedEntity;
		this.stateInNamedEntity(c);
		}
		Tokenizer.prototype.startEntity = function () {
		this.baseState = this.state;
		this.state = State.InEntity;
		this.entityStart = this.index;
		this.entityDecoder.startEntity(this.xmlMode
		? decode_js_1.DecodingMode.Strict
		: this.baseState === State.Text \|\|
		this.baseState === State.InSpecialTag
		? decode_js_1.DecodingMode.Legacy
		: decode_js_1.DecodingMode.Attribute);
		};
		Tokenizer.prototype.stateInNamedEntity = function (c) {
		this.entityExcess += 1;
		this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
		if (this.trieIndex < 0) {
		this.emitNamedEntity();
		this.index--;
		return;
		}
		this.trieCurrent = this.entityTrie[this.trieIndex];
		var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH;
		// If the branch is a value, store it and continue
		if (masked) {
		// The mask is the number of bytes of the value, including the current byte.
		var valueLength = (masked >> 14) - 1;
		// If we have a legacy entity while parsing strictly, just skip the number of bytes
		if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
		this.trieIndex += valueLength;
		Tokenizer.prototype.stateInEntity = function () {
		var length = this.entityDecoder.write(this.buffer, this.index - this.offset);
		// If `length` is positive, we are done with the entity.
		if (length >= 0) {
		this.state = this.baseState;
		if (length === 0) {
		this.index = this.entityStart;
		}
		else {
		// Add 1 as we have already incremented the excess
		var entityStart = this.index - this.entityExcess + 1;
		if (entityStart > this.sectionStart) {
		this.emitPartial(this.sectionStart, entityStart);
		}
		// If this is a surrogate pair, consume the next two bytes
		this.entityResult = this.trieIndex;
		this.trieIndex += valueLength;
		this.entityExcess = 0;
		this.sectionStart = this.index + 1;
		if (valueLength === 0) {
		this.emitNamedEntity();
		}
		}
		}
		};
		Tokenizer.prototype.emitNamedEntity = function () {
		this.state = this.baseState;
		if (this.entityResult === 0) {
		return;
		}
		var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >>
		14;
		switch (valueLength) {
		case 1: {
		this.emitCodePoint(this.entityTrie[this.entityResult] &
		~decode_js_1.BinTrieFlags.VALUE_LENGTH);
		break;
		}
		case 2: {
		this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
		break;
		}
		case 3: {
		this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
		this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
		}
		}
		};
		Tokenizer.prototype.stateBeforeNumericEntity = function (c) {
		if ((c \| 0x20) === CharCodes.LowerX) {
		this.entityExcess++;
		this.state = State.InHexEntity;
		}
		else {
		this.state = State.InNumericEntity;
		this.stateInNumericEntity(c);
		// Mark buffer as consumed.
		this.index = this.offset + this.buffer.length - 1;
		}
		};
		Tokenizer.prototype.emitNumericEntity = function (strict) {
		var entityStart = this.index - this.entityExcess - 1;
		var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
		if (numberStart !== this.index) {
		// Emit leading data if any
		if (entityStart > this.sectionStart) {
		this.emitPartial(this.sectionStart, entityStart);
		}
		this.sectionStart = this.index + Number(strict);
		this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult));
		}
		this.state = this.baseState;
		};
		Tokenizer.prototype.stateInNumericEntity = function (c) {
		if (c === CharCodes.Semi) {
		this.emitNumericEntity(true);
		}
		else if (isNumber(c)) {
		this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
		this.entityExcess++;
		}
		else {
		if (this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		}
		else {
		this.state = this.baseState;
		}
		this.index--;
		}
		};
		Tokenizer.prototype.stateInHexEntity = function (c) {
		if (c === CharCodes.Semi) {
		this.emitNumericEntity(true);
		}
		else if (isNumber(c)) {
		this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
		this.entityExcess++;
		}
		else if (isHexDigit(c)) {
		this.entityResult =
		this.entityResult * 16 + ((c \| 0x20) - CharCodes.LowerA + 10);
		this.entityExcess++;
		}
		else {
		if (this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		}
		else {
		this.state = this.baseState;
		}
		this.index--;
		}
		};
		Tokenizer.prototype.allowLegacyEntity = function () {
		return (!this.xmlMode &&
		(this.baseState === State.Text \|\|
		this.baseState === State.InSpecialTag));
		};
		/**
		@@ -858,22 +709,6 @@ * Remove data that has already been consumed from the buffer.
		}
		case State.InNamedEntity: {
		this.stateInNamedEntity(c);
		case State.InEntity: {
		this.stateInEntity();
		break;
		}
		case State.BeforeEntity: {
		this.stateBeforeEntity(c);
		break;
		}
		case State.InHexEntity: {
		this.stateInHexEntity(c);
		break;
		}
		case State.InNumericEntity: {
		this.stateInNumericEntity(c);
		break;
		}
		default: {
		// `this._state === State.BeforeNumericEntity`
		this.stateBeforeNumericEntity(c);
		}
		}
		@@ -885,9 +720,7 @@ this.index++;
		Tokenizer.prototype.finish = function () {
		if (this.state === State.InNamedEntity) {
		this.emitNamedEntity();
		if (this.state === State.InEntity) {
		this.entityDecoder.end();
		this.state = this.baseState;
		}
		// If there is remaining data, emit it in a reasonable way
		if (this.sectionStart < this.index) {
		this.handleTrailingData();
		}
		this.handleTrailingData();
		this.cbs.onend();
		@@ -898,2 +731,6 @@ };
		var endIndex = this.buffer.length + this.offset;
		// If there is no remaining data, we are done.
		if (this.sectionStart >= endIndex) {
		return;
		}
		if (this.state === State.InCommentLike) {
		@@ -907,12 +744,2 @@ if (this.currentSequence === Sequences.CdataEnd) {
		}
		else if (this.state === State.InNumericEntity &&
		this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		// All trailing data will have been consumed
		}
		else if (this.state === State.InHexEntity &&
		this.allowLegacyEntity()) {
		this.emitNumericEntity(false);
		// All trailing data will have been consumed
		}
		else if (this.state === State.InTagName \|\|
		@@ -936,18 +763,19 @@ this.state === State.BeforeAttributeName \|\|
		};
		Tokenizer.prototype.emitPartial = function (start, endIndex) {
		Tokenizer.prototype.emitCodePoint = function (cp, consumed) {
		if (this.baseState !== State.Text &&
		this.baseState !== State.InSpecialTag) {
		this.cbs.onattribdata(start, endIndex);
		}
		else {
		this.cbs.ontext(start, endIndex);
		}
		};
		Tokenizer.prototype.emitCodePoint = function (cp) {
		if (this.baseState !== State.Text &&
		this.baseState !== State.InSpecialTag) {
		if (this.sectionStart < this.entityStart) {
		this.cbs.onattribdata(this.sectionStart, this.entityStart);
		}
		this.sectionStart = this.entityStart + consumed;
		this.index = this.sectionStart - 1;
		this.cbs.onattribentity(cp);
		}
		else {
		this.cbs.ontextentity(cp);
		if (this.sectionStart < this.entityStart) {
		this.cbs.ontext(this.sectionStart, this.entityStart);
		}
		this.sectionStart = this.entityStart + consumed;
		this.index = this.sectionStart - 1;
		this.cbs.ontextentity(cp, this.sectionStart);
		}
		@@ -954,0 +782,0 @@ };

26

package.json

		{
		"name": "htmlparser2",
		"description": "Fast & forgiving HTML/XML parser",
		"version": "8.0.2",
		"version": "9.0.0",
		"author": "Felix Boehm <me@feedic.com>",
		@@ -66,18 +66,18 @@ "funding": [
		"domhandler": "^5.0.3",
		"domutils": "^3.0.1",
		"entities": "^4.4.0"
		"domutils": "^3.1.0",
		"entities": "^4.5.0"
		},
		"devDependencies": {
		"@types/jest": "^29.5.0",
		"@types/node": "^18.15.5",
		"@typescript-eslint/eslint-plugin": "^5.56.0",
		"@typescript-eslint/parser": "^5.56.0",
		"eslint": "^8.36.0",
		"@types/jest": "^29.5.1",
		"@types/node": "^20.1.1",
		"@typescript-eslint/eslint-plugin": "^5.59.5",
		"@typescript-eslint/parser": "^5.59.5",
		"eslint": "^8.40.0",
		"eslint-config-prettier": "^8.8.0",
		"eslint-plugin-n": "^15.6.1",
		"eslint-plugin-unicorn": "^46.0.0",
		"eslint-plugin-n": "^15.7.0",
		"eslint-plugin-unicorn": "^47.0.0",
		"jest": "^29.5.0",
		"prettier": "^2.8.6",
		"ts-jest": "^29.0.5",
		"typescript": "^4.9.5"
		"prettier": "^2.8.8",
		"ts-jest": "^29.1.0",
		"typescript": "^5.0.4"
		},
		@@ -84,0 +84,0 @@ "jest": {

7

README.md

		@@ -120,4 +120,6 @@ # htmlparser2

		## Parsing RSS/RDF/Atom Feeds
		## Parsing Feeds

		`htmlparser2` makes it easy to parse RSS, RDF and Atom feeds, by providing a `parseFeed` method:

		```javascript
		@@ -127,5 +129,2 @@ const feed = htmlparser2.parseFeed(content, options);

		Note: While the provided feed handler works for most feeds,
		you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.

		## Performance
		@@ -132,0 +131,0 @@

lib/esm/index.d.ts.map