Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 8.0.2 to 9.0.0

21

lib/esm/index.d.ts

@@ -10,3 +10,3 @@ import { Parser, ParserOptions } from "./Parser.js";

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
*/

@@ -21,3 +21,3 @@ export declare function parseDocument(data: string, options?: Options): Document;

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
* @deprecated Use `parseDocument` instead.

@@ -29,11 +29,20 @@ */

*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param callback A callback that will be called once parsing has been completed, with the resulting document.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
export declare function createDocumentStream(callback: (error: Error | null, document: Document) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
* @deprecated Use `createDocumentStream` instead.
*/
export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
export * as ElementType from "domelementtype";
import { Feed } from "domutils";
export { getFeed } from "domutils";
import { type Feed } from "domutils";
export { getFeed, type Feed } from "domutils";
/**

@@ -40,0 +49,0 @@ * Parse a feed.

@@ -12,3 +12,3 @@ import { Parser } from "./Parser.js";

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
*/

@@ -27,3 +27,3 @@ export function parseDocument(data, options) {

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
* @deprecated Use `parseDocument` instead.

@@ -37,6 +37,18 @@ */

*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param callback A callback that will be called once parsing has been completed, with the resulting document.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
export function createDocumentStream(callback, options, elementCallback) {
const handler = new DomHandler((error) => callback(error, handler.root), options, elementCallback);
return new Parser(handler, options);
}
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
* @deprecated Use `createDocumentStream` instead.
*/
export function createDomStream(callback, options, elementCallback) {

@@ -43,0 +55,0 @@ const handler = new DomHandler(callback, options, elementCallback);

@@ -95,2 +95,3 @@ import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";

private readonly stack;
/** Determines whether self-closing tags are recognized. */
private readonly foreignContext;

@@ -100,2 +101,4 @@ private readonly cbs;

private readonly lowerCaseAttributeNames;
/** We are parsing HTML. Inverse of the `xmlMode` option. */
private readonly htmlMode;
private readonly tokenizer;

@@ -112,3 +115,7 @@ private readonly buffers;

/** @internal */
ontextentity(cp: number): void;
ontextentity(cp: number, endIndex: number): void;
/**
* Checks if the current tag is a void element. Override this if you want
* to specify your own additional void elements.
*/
protected isVoidElement(name: string): boolean;

@@ -115,0 +122,0 @@ /** @internal */

@@ -117,3 +117,2 @@ import Tokenizer, { QuoteType } from "./Tokenizer.js";

this.stack = [];
this.foreignContext = [];
this.buffers = [];

@@ -126,6 +125,8 @@ this.bufferOffset = 0;

this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
this.htmlMode = !this.options.xmlMode;
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
this.lowerCaseAttributeNames =
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this);
this.foreignContext = [!this.htmlMode];
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);

@@ -143,15 +144,14 @@ }

/** @internal */
ontextentity(cp) {
ontextentity(cp, endIndex) {
var _a, _b;
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
const index = this.tokenizer.getSectionStart();
this.endIndex = index - 1;
this.endIndex = endIndex - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, fromCodePoint(cp));
this.startIndex = index;
this.startIndex = endIndex;
}
/**
* Checks if the current tag is a void element. Override this if you want
* to specify your own additional void elements.
*/
isVoidElement(name) {
return !this.options.xmlMode && voidElements.has(name);
return this.htmlMode && voidElements.has(name);
}

@@ -171,7 +171,6 @@ /** @internal */

this.tagname = name;
const impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
const impliesClose = this.htmlMode && openImpliesClose.get(name);
if (impliesClose) {
while (this.stack.length > 0 &&
impliesClose.has(this.stack[this.stack.length - 1])) {
const element = this.stack.pop();
while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
const element = this.stack.shift();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);

@@ -181,9 +180,11 @@ }

if (!this.isVoidElement(name)) {
this.stack.push(name);
if (foreignContextElements.has(name)) {
this.foreignContext.push(true);
this.stack.unshift(name);
if (this.htmlMode) {
if (foreignContextElements.has(name)) {
this.foreignContext.unshift(true);
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.unshift(false);
}
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.push(false);
}
}

@@ -215,3 +216,3 @@ (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);

onclosetag(start, endIndex) {
var _a, _b, _c, _d, _e, _f;
var _a, _b, _c, _d, _e, _f, _g, _h;
this.endIndex = endIndex;

@@ -222,20 +223,17 @@ let name = this.getSlice(start, endIndex);

}
if (foreignContextElements.has(name) ||
htmlIntegrationElements.has(name)) {
this.foreignContext.pop();
if (this.htmlMode &&
(foreignContextElements.has(name) ||
htmlIntegrationElements.has(name))) {
this.foreignContext.shift();
}
if (!this.isVoidElement(name)) {
const pos = this.stack.lastIndexOf(name);
const pos = this.stack.indexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
let count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop(), count !== 0);
}
for (let index = 0; index <= pos; index++) {
const element = this.stack.shift();
// We know the stack has sufficient elements.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
}
else
this.stack.length = pos;
}
else if (!this.options.xmlMode && name === "p") {
else if (this.htmlMode && name === "p") {
// Implicit open before close

@@ -246,7 +244,7 @@ this.emitOpenTag("p");

}
else if (!this.options.xmlMode && name === "br") {
else if (this.htmlMode && name === "br") {
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, "br");
(_f = (_e = this.cbs).onopentag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
(_h = (_g = this.cbs).onclosetag) === null || _h === void 0 ? void 0 : _h.call(_g, "br", false);
}

@@ -259,5 +257,3 @@ // Set `startIndex` for next node

this.endIndex = endIndex;
if (this.options.xmlMode ||
this.options.recognizeSelfClosing ||
this.foreignContext[this.foreignContext.length - 1]) {
if (this.options.recognizeSelfClosing || this.foreignContext[0]) {
this.closeCurrentTag(false);

@@ -277,6 +273,6 @@ // Set `startIndex` for next node

// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
if (this.stack[0] === name) {
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
this.stack.shift();
}

@@ -361,3 +357,3 @@ }

const value = this.getSlice(start, endIndex - offset);
if (this.options.xmlMode || this.options.recognizeCDATA) {
if (!this.htmlMode || this.options.recognizeCDATA) {
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);

@@ -380,4 +376,5 @@ (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);

this.endIndex = this.startIndex;
for (let index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
;
for (let index = 0; index < this.stack.length; index++) {
this.cbs.onclosetag(this.stack[index], true);
}
}

@@ -401,2 +398,4 @@ (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);

this.buffers.length = 0;
this.foreignContext.length = 0;
this.foreignContext.unshift(!this.htmlMode);
this.bufferOffset = 0;

@@ -403,0 +402,0 @@ this.writeIndex = 0;

@@ -22,3 +22,3 @@ export declare enum QuoteType {

ontext(start: number, endIndex: number): void;
ontextentity(codepoint: number): void;
ontextentity(codepoint: number, endIndex: number): void;
}

@@ -35,2 +35,4 @@ export default class Tokenizer {

private index;
/** The start of the last entity. */
private entityStart;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */

@@ -46,3 +48,3 @@ private baseState;

private readonly decodeEntities;
private readonly entityTrie;
private readonly entityDecoder;
constructor({ xmlMode, decodeEntities, }: {

@@ -57,10 +59,2 @@ xmlMode?: boolean;

resume(): void;
/**
* The current index within all of the written data.
*/
getIndex(): number;
/**
* The start of the current section.
*/
getSectionStart(): number;
private stateText;

@@ -117,15 +111,4 @@ private currentSequence;

private stateBeforeSpecialS;
private trieIndex;
private trieCurrent;
/** For named entities, the index of the value. For numeric entities, the code point. */
private entityResult;
private entityExcess;
private stateBeforeEntity;
private stateInNamedEntity;
private emitNamedEntity;
private stateBeforeNumericEntity;
private emitNumericEntity;
private stateInNumericEntity;
private stateInHexEntity;
private allowLegacyEntity;
private startEntity;
private stateInEntity;
/**

@@ -145,5 +128,4 @@ * Remove data that has already been consumed from the buffer.

private handleTrailingData;
private emitPartial;
private emitCodePoint;
}
//# sourceMappingURL=Tokenizer.d.ts.map

@@ -1,2 +0,2 @@

import { htmlDecodeTree, xmlDecodeTree, BinTrieFlags, determineBranch, replaceCodePoint, } from "entities/lib/decode.js";
import { EntityDecoder, DecodingMode, htmlDecodeTree, xmlDecodeTree, } from "entities/lib/decode.js";
var CharCodes;

@@ -64,7 +64,3 @@ (function (CharCodes) {

State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["BeforeEntity"] = 25] = "BeforeEntity";
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
State[State["InNamedEntity"] = 27] = "InNamedEntity";
State[State["InNumericEntity"] = 28] = "InNumericEntity";
State[State["InHexEntity"] = 29] = "InHexEntity";
State[State["InEntity"] = 25] = "InEntity";
})(State || (State = {}));

@@ -81,5 +77,2 @@ function isWhitespace(c) {

}
function isNumber(c) {
return c >= CharCodes.Zero && c <= CharCodes.Nine;
}
function isASCIIAlpha(c) {

@@ -89,6 +82,2 @@ return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||

}
function isHexDigit(c) {
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
}
export var QuoteType;

@@ -126,2 +115,4 @@ (function (QuoteType) {

this.index = 0;
/** The start of the last entity. */
this.entityStart = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */

@@ -137,10 +128,5 @@ this.baseState = State.Text;

this.sequenceIndex = 0;
this.trieIndex = 0;
this.trieCurrent = 0;
/** For named entities, the index of the value. For numeric entities, the code point. */
this.entityResult = 0;
this.entityExcess = 0;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? xmlDecodeTree : htmlDecodeTree;
this.entityDecoder = new EntityDecoder(xmlMode ? xmlDecodeTree : htmlDecodeTree, (cp, consumed) => this.emitCodePoint(cp, consumed));
}

@@ -175,14 +161,2 @@ reset() {

}
/**
* The current index within all of the written data.
*/
getIndex() {
return this.index;
}
/**
* The start of the current section.
*/
getSectionStart() {
return this.sectionStart;
}
stateText(c) {

@@ -198,3 +172,3 @@ if (c === CharCodes.Lt ||

else if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -246,3 +220,3 @@ }

if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -408,3 +382,2 @@ }

this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;

@@ -423,3 +396,2 @@ }

}
this.baseState = this.state;
this.sectionStart = this.index + 1;

@@ -439,3 +411,2 @@ }

this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;

@@ -498,4 +469,3 @@ this.isSpecial = false; // Reset special state, in case of self-closing special tags

else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -518,4 +488,3 @@ }

else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -581,146 +550,27 @@ }

}
stateBeforeEntity(c) {
// Start excess with 1 to include the '&'
this.entityExcess = 1;
this.entityResult = 0;
if (c === CharCodes.Number) {
this.state = State.BeforeNumericEntity;
}
else if (c === CharCodes.Amp) {
// We have two `&` characters in a row. Stay in the current state.
}
else {
this.trieIndex = 0;
this.trieCurrent = this.entityTrie[0];
this.state = State.InNamedEntity;
this.stateInNamedEntity(c);
}
startEntity() {
this.baseState = this.state;
this.state = State.InEntity;
this.entityStart = this.index;
this.entityDecoder.startEntity(this.xmlMode
? DecodingMode.Strict
: this.baseState === State.Text ||
this.baseState === State.InSpecialTag
? DecodingMode.Legacy
: DecodingMode.Attribute);
}
stateInNamedEntity(c) {
this.entityExcess += 1;
this.trieIndex = determineBranch(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this.index--;
return;
}
this.trieCurrent = this.entityTrie[this.trieIndex];
const masked = this.trieCurrent & BinTrieFlags.VALUE_LENGTH;
// If the branch is a value, store it and continue
if (masked) {
// The mask is the number of bytes of the value, including the current byte.
const valueLength = (masked >> 14) - 1;
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
this.trieIndex += valueLength;
stateInEntity() {
const length = this.entityDecoder.write(this.buffer, this.index - this.offset);
// If `length` is positive, we are done with the entity.
if (length >= 0) {
this.state = this.baseState;
if (length === 0) {
this.index = this.entityStart;
}
else {
// Add 1 as we have already incremented the excess
const entityStart = this.index - this.entityExcess + 1;
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
// If this is a surrogate pair, consume the next two bytes
this.entityResult = this.trieIndex;
this.trieIndex += valueLength;
this.entityExcess = 0;
this.sectionStart = this.index + 1;
if (valueLength === 0) {
this.emitNamedEntity();
}
}
}
}
emitNamedEntity() {
this.state = this.baseState;
if (this.entityResult === 0) {
return;
}
const valueLength = (this.entityTrie[this.entityResult] & BinTrieFlags.VALUE_LENGTH) >>
14;
switch (valueLength) {
case 1: {
this.emitCodePoint(this.entityTrie[this.entityResult] &
~BinTrieFlags.VALUE_LENGTH);
break;
}
case 2: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
break;
}
case 3: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
}
}
}
stateBeforeNumericEntity(c) {
if ((c | 0x20) === CharCodes.LowerX) {
this.entityExcess++;
this.state = State.InHexEntity;
}
else {
this.state = State.InNumericEntity;
this.stateInNumericEntity(c);
// Mark buffer as consumed.
this.index = this.offset + this.buffer.length - 1;
}
}
emitNumericEntity(strict) {
const entityStart = this.index - this.entityExcess - 1;
const numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
if (numberStart !== this.index) {
// Emit leading data if any
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
this.sectionStart = this.index + Number(strict);
this.emitCodePoint(replaceCodePoint(this.entityResult));
}
this.state = this.baseState;
}
stateInNumericEntity(c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
}
stateInHexEntity(c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
this.entityExcess++;
}
else if (isHexDigit(c)) {
this.entityResult =
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
}
allowLegacyEntity() {
return (!this.xmlMode &&
(this.baseState === State.Text ||
this.baseState === State.InSpecialTag));
}
/**

@@ -853,22 +703,6 @@ * Remove data that has already been consumed from the buffer.

}
case State.InNamedEntity: {
this.stateInNamedEntity(c);
case State.InEntity: {
this.stateInEntity();
break;
}
case State.BeforeEntity: {
this.stateBeforeEntity(c);
break;
}
case State.InHexEntity: {
this.stateInHexEntity(c);
break;
}
case State.InNumericEntity: {
this.stateInNumericEntity(c);
break;
}
default: {
// `this._state === State.BeforeNumericEntity`
this.stateBeforeNumericEntity(c);
}
}

@@ -880,9 +714,7 @@ this.index++;

finish() {
if (this.state === State.InNamedEntity) {
this.emitNamedEntity();
if (this.state === State.InEntity) {
this.entityDecoder.end();
this.state = this.baseState;
}
// If there is remaining data, emit it in a reasonable way
if (this.sectionStart < this.index) {
this.handleTrailingData();
}
this.handleTrailingData();
this.cbs.onend();

@@ -893,2 +725,6 @@ }

const endIndex = this.buffer.length + this.offset;
// If there is no remaining data, we are done.
if (this.sectionStart >= endIndex) {
return;
}
if (this.state === State.InCommentLike) {

@@ -902,12 +738,2 @@ if (this.currentSequence === Sequences.CdataEnd) {

}
else if (this.state === State.InNumericEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InHexEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InTagName ||

@@ -931,18 +757,19 @@ this.state === State.BeforeAttributeName ||

}
emitPartial(start, endIndex) {
emitCodePoint(cp, consumed) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribdata(start, endIndex);
}
else {
this.cbs.ontext(start, endIndex);
}
}
emitCodePoint(cp) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart);
}
this.sectionStart = this.entityStart + consumed;
this.index = this.sectionStart - 1;
this.cbs.onattribentity(cp);
}
else {
this.cbs.ontextentity(cp);
if (this.sectionStart < this.entityStart) {
this.cbs.ontext(this.sectionStart, this.entityStart);
}
this.sectionStart = this.entityStart + consumed;
this.index = this.sectionStart - 1;
this.cbs.ontextentity(cp, this.sectionStart);
}

@@ -949,0 +776,0 @@ }

@@ -10,3 +10,3 @@ import { Parser, ParserOptions } from "./Parser.js";

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
*/

@@ -21,3 +21,3 @@ export declare function parseDocument(data: string, options?: Options): Document;

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
* @deprecated Use `parseDocument` instead.

@@ -29,11 +29,20 @@ */

*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param callback A callback that will be called once parsing has been completed, with the resulting document.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
export declare function createDocumentStream(callback: (error: Error | null, document: Document) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
* @deprecated Use `createDocumentStream` instead.
*/
export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
export * as ElementType from "domelementtype";
import { Feed } from "domutils";
export { getFeed } from "domutils";
import { type Feed } from "domutils";
export { getFeed, type Feed } from "domutils";
/**

@@ -40,0 +49,0 @@ * Parse a feed.

@@ -29,3 +29,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
var Parser_js_1 = require("./Parser.js");

@@ -44,3 +44,3 @@ var Parser_js_2 = require("./Parser.js");

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
*/

@@ -60,3 +60,3 @@ function parseDocument(data, options) {

* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @param options Optional options for the parser and DOM handler.
* @deprecated Use `parseDocument` instead.

@@ -71,6 +71,19 @@ */

*
* @param callback A callback that will be called once parsing has been completed.
* @param options Optional options for the parser and DOM builder.
* @param callback A callback that will be called once parsing has been completed, with the resulting document.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
*/
function createDocumentStream(callback, options, elementCallback) {
var handler = new domhandler_1.DomHandler(function (error) { return callback(error, handler.root); }, options, elementCallback);
return new Parser_js_1.Parser(handler, options);
}
exports.createDocumentStream = createDocumentStream;
/**
* Creates a parser instance, with an attached DOM handler.
*
* @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
* @param options Optional options for the parser and DOM handler.
* @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
* @deprecated Use `createDocumentStream` instead.
*/
function createDomStream(callback, options, elementCallback) {

@@ -77,0 +90,0 @@ var handler = new domhandler_1.DomHandler(callback, options, elementCallback);

@@ -95,2 +95,3 @@ import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";

private readonly stack;
/** Determines whether self-closing tags are recognized. */
private readonly foreignContext;

@@ -100,2 +101,4 @@ private readonly cbs;

private readonly lowerCaseAttributeNames;
/** We are parsing HTML. Inverse of the `xmlMode` option. */
private readonly htmlMode;
private readonly tokenizer;

@@ -112,3 +115,7 @@ private readonly buffers;

/** @internal */
ontextentity(cp: number): void;
ontextentity(cp: number, endIndex: number): void;
/**
* Checks if the current tag is a void element. Override this if you want
* to specify your own additional void elements.
*/
protected isVoidElement(name: string): boolean;

@@ -115,0 +122,0 @@ /** @internal */

@@ -144,3 +144,2 @@ "use strict";

this.stack = [];
this.foreignContext = [];
this.buffers = [];

@@ -153,6 +152,8 @@ this.bufferOffset = 0;

this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
this.htmlMode = !this.options.xmlMode;
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
this.lowerCaseAttributeNames =
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
this.foreignContext = [!this.htmlMode];
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);

@@ -170,15 +171,14 @@ }

/** @internal */
Parser.prototype.ontextentity = function (cp) {
Parser.prototype.ontextentity = function (cp, endIndex) {
var _a, _b;
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
var index = this.tokenizer.getSectionStart();
this.endIndex = index - 1;
this.endIndex = endIndex - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
this.startIndex = index;
this.startIndex = endIndex;
};
/**
* Checks if the current tag is a void element. Override this if you want
* to specify your own additional void elements.
*/
Parser.prototype.isVoidElement = function (name) {
return !this.options.xmlMode && voidElements.has(name);
return this.htmlMode && voidElements.has(name);
};

@@ -198,7 +198,6 @@ /** @internal */

this.tagname = name;
var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
var impliesClose = this.htmlMode && openImpliesClose.get(name);
if (impliesClose) {
while (this.stack.length > 0 &&
impliesClose.has(this.stack[this.stack.length - 1])) {
var element = this.stack.pop();
while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
var element = this.stack.shift();
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);

@@ -208,9 +207,11 @@ }

if (!this.isVoidElement(name)) {
this.stack.push(name);
if (foreignContextElements.has(name)) {
this.foreignContext.push(true);
this.stack.unshift(name);
if (this.htmlMode) {
if (foreignContextElements.has(name)) {
this.foreignContext.unshift(true);
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.unshift(false);
}
}
else if (htmlIntegrationElements.has(name)) {
this.foreignContext.push(false);
}
}

@@ -242,3 +243,3 @@ (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);

Parser.prototype.onclosetag = function (start, endIndex) {
var _a, _b, _c, _d, _e, _f;
var _a, _b, _c, _d, _e, _f, _g, _h;
this.endIndex = endIndex;

@@ -249,20 +250,17 @@ var name = this.getSlice(start, endIndex);

}
if (foreignContextElements.has(name) ||
htmlIntegrationElements.has(name)) {
this.foreignContext.pop();
if (this.htmlMode &&
(foreignContextElements.has(name) ||
htmlIntegrationElements.has(name))) {
this.foreignContext.shift();
}
if (!this.isVoidElement(name)) {
var pos = this.stack.lastIndexOf(name);
var pos = this.stack.indexOf(name);
if (pos !== -1) {
if (this.cbs.onclosetag) {
var count = this.stack.length - pos;
while (count--) {
// We know the stack has sufficient elements.
this.cbs.onclosetag(this.stack.pop(), count !== 0);
}
for (var index = 0; index <= pos; index++) {
var element = this.stack.shift();
// We know the stack has sufficient elements.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
}
else
this.stack.length = pos;
}
else if (!this.options.xmlMode && name === "p") {
else if (this.htmlMode && name === "p") {
// Implicit open before close

@@ -273,7 +271,7 @@ this.emitOpenTag("p");

}
else if (!this.options.xmlMode && name === "br") {
else if (this.htmlMode && name === "br") {
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, "br");
(_f = (_e = this.cbs).onopentag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
(_h = (_g = this.cbs).onclosetag) === null || _h === void 0 ? void 0 : _h.call(_g, "br", false);
}

@@ -286,5 +284,3 @@ // Set `startIndex` for next node

this.endIndex = endIndex;
if (this.options.xmlMode ||
this.options.recognizeSelfClosing ||
this.foreignContext[this.foreignContext.length - 1]) {
if (this.options.recognizeSelfClosing || this.foreignContext[0]) {
this.closeCurrentTag(false);

@@ -304,6 +300,6 @@ // Set `startIndex` for next node

// Self-closing tags will be on the top of the stack
if (this.stack[this.stack.length - 1] === name) {
if (this.stack[0] === name) {
// If the opening tag isn't implied, the closing tag has to be implied.
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
this.stack.pop();
this.stack.shift();
}

@@ -388,3 +384,3 @@ };

var value = this.getSlice(start, endIndex - offset);
if (this.options.xmlMode || this.options.recognizeCDATA) {
if (!this.htmlMode || this.options.recognizeCDATA) {
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);

@@ -407,4 +403,5 @@ (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);

this.endIndex = this.startIndex;
for (var index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true))
;
for (var index = 0; index < this.stack.length; index++) {
this.cbs.onclosetag(this.stack[index], true);
}
}

@@ -428,2 +425,4 @@ (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);

this.buffers.length = 0;
this.foreignContext.length = 0;
this.foreignContext.unshift(!this.htmlMode);
this.bufferOffset = 0;

@@ -430,0 +429,0 @@ this.writeIndex = 0;

@@ -22,3 +22,3 @@ export declare enum QuoteType {

ontext(start: number, endIndex: number): void;
ontextentity(codepoint: number): void;
ontextentity(codepoint: number, endIndex: number): void;
}

@@ -35,2 +35,4 @@ export default class Tokenizer {

private index;
/** The start of the last entity. */
private entityStart;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */

@@ -46,3 +48,3 @@ private baseState;

private readonly decodeEntities;
private readonly entityTrie;
private readonly entityDecoder;
constructor({ xmlMode, decodeEntities, }: {

@@ -57,10 +59,2 @@ xmlMode?: boolean;

resume(): void;
/**
* The current index within all of the written data.
*/
getIndex(): number;
/**
* The start of the current section.
*/
getSectionStart(): number;
private stateText;

@@ -117,15 +111,4 @@ private currentSequence;

private stateBeforeSpecialS;
private trieIndex;
private trieCurrent;
/** For named entities, the index of the value. For numeric entities, the code point. */
private entityResult;
private entityExcess;
private stateBeforeEntity;
private stateInNamedEntity;
private emitNamedEntity;
private stateBeforeNumericEntity;
private emitNumericEntity;
private stateInNumericEntity;
private stateInHexEntity;
private allowLegacyEntity;
private startEntity;
private stateInEntity;
/**

@@ -145,5 +128,4 @@ * Remove data that has already been consumed from the buffer.

private handleTrailingData;
private emitPartial;
private emitCodePoint;
}
//# sourceMappingURL=Tokenizer.d.ts.map

@@ -67,7 +67,3 @@ "use strict";

State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["BeforeEntity"] = 25] = "BeforeEntity";
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
State[State["InNamedEntity"] = 27] = "InNamedEntity";
State[State["InNumericEntity"] = 28] = "InNumericEntity";
State[State["InHexEntity"] = 29] = "InHexEntity";
State[State["InEntity"] = 25] = "InEntity";
})(State || (State = {}));

@@ -84,5 +80,2 @@ function isWhitespace(c) {

}
function isNumber(c) {
return c >= CharCodes.Zero && c <= CharCodes.Nine;
}
function isASCIIAlpha(c) {

@@ -92,6 +85,2 @@ return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||

}
function isHexDigit(c) {
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
}
var QuoteType;

@@ -121,2 +110,3 @@ (function (QuoteType) {

var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
var _this = this;
this.cbs = cbs;

@@ -131,2 +121,4 @@ /** The current state the tokenizer is in. */

this.index = 0;
/** The start of the last entity. */
this.entityStart = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */

@@ -142,10 +134,5 @@ this.baseState = State.Text;

this.sequenceIndex = 0;
this.trieIndex = 0;
this.trieCurrent = 0;
/** For named entities, the index of the value. For numeric entities, the code point. */
this.entityResult = 0;
this.entityExcess = 0;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree;
this.entityDecoder = new decode_js_1.EntityDecoder(xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree, function (cp, consumed) { return _this.emitCodePoint(cp, consumed); });
}

@@ -180,14 +167,2 @@ Tokenizer.prototype.reset = function () {

};
/**
* The current index within all of the written data.
*/
Tokenizer.prototype.getIndex = function () {
return this.index;
};
/**
* The start of the current section.
*/
Tokenizer.prototype.getSectionStart = function () {
return this.sectionStart;
};
Tokenizer.prototype.stateText = function (c) {

@@ -203,3 +178,3 @@ if (c === CharCodes.Lt ||

else if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -251,3 +226,3 @@ };

if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -413,3 +388,2 @@ }

this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;

@@ -428,3 +402,2 @@ }

}
this.baseState = this.state;
this.sectionStart = this.index + 1;

@@ -444,3 +417,2 @@ }

this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;

@@ -503,4 +475,3 @@ this.isSpecial = false; // Reset special state, in case of self-closing special tags

else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -523,4 +494,3 @@ };

else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
this.startEntity();
}

@@ -586,146 +556,27 @@ };

};
Tokenizer.prototype.stateBeforeEntity = function (c) {
// Start excess with 1 to include the '&'
this.entityExcess = 1;
this.entityResult = 0;
if (c === CharCodes.Number) {
this.state = State.BeforeNumericEntity;
}
else if (c === CharCodes.Amp) {
// We have two `&` characters in a row. Stay in the current state.
}
else {
this.trieIndex = 0;
this.trieCurrent = this.entityTrie[0];
this.state = State.InNamedEntity;
this.stateInNamedEntity(c);
}
Tokenizer.prototype.startEntity = function () {
this.baseState = this.state;
this.state = State.InEntity;
this.entityStart = this.index;
this.entityDecoder.startEntity(this.xmlMode
? decode_js_1.DecodingMode.Strict
: this.baseState === State.Text ||
this.baseState === State.InSpecialTag
? decode_js_1.DecodingMode.Legacy
: decode_js_1.DecodingMode.Attribute);
};
Tokenizer.prototype.stateInNamedEntity = function (c) {
this.entityExcess += 1;
this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this.index--;
return;
}
this.trieCurrent = this.entityTrie[this.trieIndex];
var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH;
// If the branch is a value, store it and continue
if (masked) {
// The mask is the number of bytes of the value, including the current byte.
var valueLength = (masked >> 14) - 1;
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
this.trieIndex += valueLength;
Tokenizer.prototype.stateInEntity = function () {
var length = this.entityDecoder.write(this.buffer, this.index - this.offset);
// If `length` is positive, we are done with the entity.
if (length >= 0) {
this.state = this.baseState;
if (length === 0) {
this.index = this.entityStart;
}
else {
// Add 1 as we have already incremented the excess
var entityStart = this.index - this.entityExcess + 1;
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
// If this is a surrogate pair, consume the next two bytes
this.entityResult = this.trieIndex;
this.trieIndex += valueLength;
this.entityExcess = 0;
this.sectionStart = this.index + 1;
if (valueLength === 0) {
this.emitNamedEntity();
}
}
}
};
Tokenizer.prototype.emitNamedEntity = function () {
this.state = this.baseState;
if (this.entityResult === 0) {
return;
}
var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >>
14;
switch (valueLength) {
case 1: {
this.emitCodePoint(this.entityTrie[this.entityResult] &
~decode_js_1.BinTrieFlags.VALUE_LENGTH);
break;
}
case 2: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
break;
}
case 3: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
}
}
};
Tokenizer.prototype.stateBeforeNumericEntity = function (c) {
if ((c | 0x20) === CharCodes.LowerX) {
this.entityExcess++;
this.state = State.InHexEntity;
}
else {
this.state = State.InNumericEntity;
this.stateInNumericEntity(c);
// Mark buffer as consumed.
this.index = this.offset + this.buffer.length - 1;
}
};
Tokenizer.prototype.emitNumericEntity = function (strict) {
var entityStart = this.index - this.entityExcess - 1;
var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
if (numberStart !== this.index) {
// Emit leading data if any
if (entityStart > this.sectionStart) {
this.emitPartial(this.sectionStart, entityStart);
}
this.sectionStart = this.index + Number(strict);
this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult));
}
this.state = this.baseState;
};
Tokenizer.prototype.stateInNumericEntity = function (c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
};
Tokenizer.prototype.stateInHexEntity = function (c) {
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (isNumber(c)) {
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
this.entityExcess++;
}
else if (isHexDigit(c)) {
this.entityResult =
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.emitNumericEntity(false);
}
else {
this.state = this.baseState;
}
this.index--;
}
};
Tokenizer.prototype.allowLegacyEntity = function () {
return (!this.xmlMode &&
(this.baseState === State.Text ||
this.baseState === State.InSpecialTag));
};
/**

@@ -858,22 +709,6 @@ * Remove data that has already been consumed from the buffer.

}
case State.InNamedEntity: {
this.stateInNamedEntity(c);
case State.InEntity: {
this.stateInEntity();
break;
}
case State.BeforeEntity: {
this.stateBeforeEntity(c);
break;
}
case State.InHexEntity: {
this.stateInHexEntity(c);
break;
}
case State.InNumericEntity: {
this.stateInNumericEntity(c);
break;
}
default: {
// `this._state === State.BeforeNumericEntity`
this.stateBeforeNumericEntity(c);
}
}

@@ -885,9 +720,7 @@ this.index++;

Tokenizer.prototype.finish = function () {
if (this.state === State.InNamedEntity) {
this.emitNamedEntity();
if (this.state === State.InEntity) {
this.entityDecoder.end();
this.state = this.baseState;
}
// If there is remaining data, emit it in a reasonable way
if (this.sectionStart < this.index) {
this.handleTrailingData();
}
this.handleTrailingData();
this.cbs.onend();

@@ -898,2 +731,6 @@ };

var endIndex = this.buffer.length + this.offset;
// If there is no remaining data, we are done.
if (this.sectionStart >= endIndex) {
return;
}
if (this.state === State.InCommentLike) {

@@ -907,12 +744,2 @@ if (this.currentSequence === Sequences.CdataEnd) {

}
else if (this.state === State.InNumericEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InHexEntity &&
this.allowLegacyEntity()) {
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this.state === State.InTagName ||

@@ -936,18 +763,19 @@ this.state === State.BeforeAttributeName ||

};
Tokenizer.prototype.emitPartial = function (start, endIndex) {
Tokenizer.prototype.emitCodePoint = function (cp, consumed) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribdata(start, endIndex);
}
else {
this.cbs.ontext(start, endIndex);
}
};
Tokenizer.prototype.emitCodePoint = function (cp) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart);
}
this.sectionStart = this.entityStart + consumed;
this.index = this.sectionStart - 1;
this.cbs.onattribentity(cp);
}
else {
this.cbs.ontextentity(cp);
if (this.sectionStart < this.entityStart) {
this.cbs.ontext(this.sectionStart, this.entityStart);
}
this.sectionStart = this.entityStart + consumed;
this.index = this.sectionStart - 1;
this.cbs.ontextentity(cp, this.sectionStart);
}

@@ -954,0 +782,0 @@ };

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "8.0.2",
"version": "9.0.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -66,18 +66,18 @@ "funding": [

"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
"domutils": "^3.1.0",
"entities": "^4.5.0"
},
"devDependencies": {
"@types/jest": "^29.5.0",
"@types/node": "^18.15.5",
"@typescript-eslint/eslint-plugin": "^5.56.0",
"@typescript-eslint/parser": "^5.56.0",
"eslint": "^8.36.0",
"@types/jest": "^29.5.1",
"@types/node": "^20.1.1",
"@typescript-eslint/eslint-plugin": "^5.59.5",
"@typescript-eslint/parser": "^5.59.5",
"eslint": "^8.40.0",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-n": "^15.6.1",
"eslint-plugin-unicorn": "^46.0.0",
"eslint-plugin-n": "^15.7.0",
"eslint-plugin-unicorn": "^47.0.0",
"jest": "^29.5.0",
"prettier": "^2.8.6",
"ts-jest": "^29.0.5",
"typescript": "^4.9.5"
"prettier": "^2.8.8",
"ts-jest": "^29.1.0",
"typescript": "^5.0.4"
},

@@ -84,0 +84,0 @@ "jest": {

@@ -120,4 +120,6 @@ # htmlparser2

## Parsing RSS/RDF/Atom Feeds
## Parsing Feeds
`htmlparser2` makes it easy to parse RSS, RDF and Atom feeds, by providing a `parseFeed` method:
```javascript

@@ -127,5 +129,2 @@ const feed = htmlparser2.parseFeed(content, options);

Note: While the provided feed handler works for most feeds,
you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.
## Performance

@@ -132,0 +131,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc