Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 7.2.0 to 8.0.0

lib/esm/index.d.ts

25

lib/index.d.ts

@@ -1,5 +0,5 @@

import { Parser, ParserOptions } from "./Parser";
export { Parser, ParserOptions };
import { DomHandler, DomHandlerOptions, Node, Element, Document } from "domhandler";
export { DomHandler, DomHandlerOptions };
import { Parser, ParserOptions } from "./Parser.js";
export { Parser, type ParserOptions };
import { DomHandler, DomHandlerOptions, ChildNode, Element, Document } from "domhandler";
export { DomHandler, type DomHandlerOptions };
declare type Options = ParserOptions & DomHandlerOptions;

@@ -23,3 +23,3 @@ /**

*/
export declare function parseDOM(data: string, options?: Options): Node[];
export declare function parseDOM(data: string, options?: Options): ChildNode[];
/**

@@ -32,10 +32,17 @@ * Creates a parser instance, with an attached DOM handler.

*/
export declare function createDomStream(cb: (error: Error | null, dom: Node[]) => void, options?: Options, elementCb?: (element: Element) => void): Parser;
export { default as Tokenizer, Callbacks as TokenizerCallbacks, } from "./Tokenizer";
export declare function createDomStream(cb: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCb?: (element: Element) => void): Parser;
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
import * as ElementType from "domelementtype";
export { ElementType };
export * from "./FeedHandler";
import { getFeed, Feed } from "domutils";
export { getFeed };
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | null;
export * as DomUtils from "domutils";
export { DomHandler as DefaultHandler };
export { FeedHandler as RssHandler } from "./FeedHandler";
//# sourceMappingURL=index.d.ts.map
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {

@@ -21,5 +25,2 @@ if (k2 === undefined) k2 = k;

};
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
var __importDefault = (this && this.__importDefault) || function (mod) {

@@ -29,5 +30,5 @@ return (mod && mod.__esModule) ? mod : { "default": mod };

Object.defineProperty(exports, "__esModule", { value: true });
exports.RssHandler = exports.DefaultHandler = exports.DomUtils = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DomHandler = exports.Parser = void 0;
var Parser_1 = require("./Parser");
Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_1.Parser; } });
exports.DefaultHandler = exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DomHandler = exports.Parser = void 0;
var Parser_js_1 = require("./Parser.js");
Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_js_1.Parser; } });
var domhandler_1 = require("domhandler");

@@ -45,3 +46,3 @@ Object.defineProperty(exports, "DomHandler", { enumerable: true, get: function () { return domhandler_1.DomHandler; } });

var handler = new domhandler_1.DomHandler(undefined, options);
new Parser_1.Parser(handler, options).end(data);
new Parser_js_1.Parser(handler, options).end(data);
return handler.root;

@@ -73,9 +74,7 @@ }

var handler = new domhandler_1.DomHandler(cb, options, elementCb);
return new Parser_1.Parser(handler, options);
return new Parser_js_1.Parser(handler, options);
}
exports.createDomStream = createDomStream;
var Tokenizer_1 = require("./Tokenizer");
Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_1).default; } });
var ElementType = __importStar(require("domelementtype"));
exports.ElementType = ElementType;
var Tokenizer_js_1 = require("./Tokenizer.js");
Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_js_1).default; } });
/*

@@ -85,5 +84,18 @@ * All of the following exports exist for backwards-compatibility.

*/
__exportStar(require("./FeedHandler"), exports);
var ElementType = __importStar(require("domelementtype"));
exports.ElementType = ElementType;
var domutils_1 = require("domutils");
Object.defineProperty(exports, "getFeed", { enumerable: true, get: function () { return domutils_1.getFeed; } });
/**
* Parse a feed.
*
* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
*/
function parseFeed(feed, options) {
if (options === void 0) { options = { xmlMode: true }; }
return (0, domutils_1.getFeed)(parseDOM(feed, options));
}
exports.parseFeed = parseFeed;
exports.DomUtils = __importStar(require("domutils"));
var FeedHandler_1 = require("./FeedHandler");
Object.defineProperty(exports, "RssHandler", { enumerable: true, get: function () { return FeedHandler_1.FeedHandler; } });
//# sourceMappingURL=index.js.map

@@ -1,2 +0,2 @@

import Tokenizer from "./Tokenizer";
import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";
export interface ParserOptions {

@@ -79,3 +79,3 @@ /**

}
export declare class Parser {
export declare class Parser implements Callbacks {
private readonly options;

@@ -101,35 +101,43 @@ /** The start index of the last event. */

private readonly tokenizer;
private readonly buffers;
private bufferOffset;
/** The index of the last written buffer. Used when resuming after a `pause()`. */
private writeIndex;
/** Indicates whether the parser has finished running / `.end` has been called. */
private ended;
constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
/** @internal */
ontext(data: string): void;
ontext(start: number, endIndex: number): void;
/** @internal */
ontextentity(cp: number): void;
protected isVoidElement(name: string): boolean;
/** @internal */
onopentagname(name: string): void;
onopentagname(start: number, endIndex: number): void;
private emitOpenTag;
private endOpenTag;
/** @internal */
onopentagend(): void;
onopentagend(endIndex: number): void;
/** @internal */
onclosetag(name: string): void;
onclosetag(start: number, endIndex: number): void;
/** @internal */
onselfclosingtag(): void;
onselfclosingtag(endIndex: number): void;
private closeCurrentTag;
/** @internal */
onattribname(name: string): void;
onattribname(start: number, endIndex: number): void;
/** @internal */
onattribdata(value: string): void;
onattribdata(start: number, endIndex: number): void;
/** @internal */
onattribend(quote: string | undefined | null): void;
onattribentity(cp: number): void;
/** @internal */
onattribend(quote: QuoteType, endIndex: number): void;
private getInstructionName;
/** @internal */
ondeclaration(value: string): void;
ondeclaration(start: number, endIndex: number): void;
/** @internal */
onprocessinginstruction(value: string): void;
onprocessinginstruction(start: number, endIndex: number): void;
/** @internal */
oncomment(value: string): void;
oncomment(start: number, endIndex: number, offset: number): void;
/** @internal */
oncdata(value: string): void;
oncdata(start: number, endIndex: number, offset: number): void;
/** @internal */
onerror(err: Error): void;
/** @internal */
onend(): void;

@@ -147,2 +155,4 @@ /**

parseComplete(data: string): void;
private getSlice;
private shiftBuffer;
/**

@@ -149,0 +159,0 @@ * Parses a chunk of data and calls the corresponding callbacks.

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Parser = void 0;
var Tokenizer_1 = __importDefault(require("./Tokenizer"));
var Tokenizer_js_1 = __importStar(require("./Tokenizer.js"));
var decode_js_1 = require("entities/lib/decode.js");
var formTags = new Set([

@@ -124,2 +145,8 @@ "input",

this.foreignContext = [];
this.buffers = [];
this.bufferOffset = 0;
/** The index of the last written buffer. Used when resuming after a `pause()`. */
this.writeIndex = 0;
/** Indicates whether the parser has finished running / `.end` has been called. */
this.ended = false;
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};

@@ -129,3 +156,3 @@ this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;

(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this);
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);

@@ -135,7 +162,19 @@ }

/** @internal */
Parser.prototype.ontext = function (data) {
Parser.prototype.ontext = function (start, endIndex) {
var _a, _b;
var idx = this.tokenizer.getAbsoluteIndex();
var data = this.getSlice(start, endIndex);
this.endIndex = endIndex - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
this.startIndex = endIndex;
};
/** @internal */
Parser.prototype.ontextentity = function (cp) {
var _a, _b;
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
var idx = this.tokenizer.getSectionStart();
this.endIndex = idx - 1;
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
this.startIndex = idx;

@@ -147,4 +186,5 @@ };

/** @internal */
Parser.prototype.onopentagname = function (name) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
Parser.prototype.onopentagname = function (start, endIndex) {
this.endIndex = endIndex;
var name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {

@@ -183,3 +223,2 @@ name = name.toLowerCase();

this.startIndex = this.openTagStart;
this.endIndex = this.tokenizer.getAbsoluteIndex();
if (this.attribs) {

@@ -195,11 +234,13 @@ (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);

/** @internal */
Parser.prototype.onopentagend = function () {
Parser.prototype.onopentagend = function (endIndex) {
this.endIndex = endIndex;
this.endOpenTag(false);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onclosetag = function (name) {
Parser.prototype.onclosetag = function (start, endIndex) {
var _a, _b, _c, _d, _e, _f;
this.endIndex = this.tokenizer.getAbsoluteIndex();
this.endIndex = endIndex;
var name = this.getSlice(start, endIndex);
if (this.lowerCaseTagNames) {

@@ -226,3 +267,4 @@ name = name.toLowerCase();

else if (!this.options.xmlMode && name === "p") {
this.emitOpenTag(name);
// Implicit open before close
this.emitOpenTag("p");
this.closeCurrentTag(true);

@@ -232,12 +274,13 @@ }

else if (!this.options.xmlMode && name === "br") {
// We can't go through `emitOpenTag` here, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, name, {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, name, false);
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br");
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true);
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onselfclosingtag = function () {
Parser.prototype.onselfclosingtag = function (endIndex) {
this.endIndex = endIndex;
if (this.options.xmlMode ||

@@ -248,7 +291,7 @@ this.options.recognizeSelfClosing ||

// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
}
else {
// Ignore the fact that the tag is self-closing.
this.onopentagend();
this.onopentagend(endIndex);
}

@@ -268,18 +311,28 @@ };

/** @internal */
Parser.prototype.onattribname = function (name) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
if (this.lowerCaseAttributeNames) {
name = name.toLowerCase();
}
this.attribname = name;
Parser.prototype.onattribname = function (start, endIndex) {
this.startIndex = start;
var name = this.getSlice(start, endIndex);
this.attribname = this.lowerCaseAttributeNames
? name.toLowerCase()
: name;
};
/** @internal */
Parser.prototype.onattribdata = function (value) {
this.attribvalue += value;
Parser.prototype.onattribdata = function (start, endIndex) {
this.attribvalue += this.getSlice(start, endIndex);
};
/** @internal */
Parser.prototype.onattribend = function (quote) {
Parser.prototype.onattribentity = function (cp) {
this.attribvalue += (0, decode_js_1.fromCodePoint)(cp);
};
/** @internal */
Parser.prototype.onattribend = function (quote, endIndex) {
var _a, _b;
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);
this.endIndex = endIndex;
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === Tokenizer_js_1.QuoteType.Double
? '"'
: quote === Tokenizer_js_1.QuoteType.Single
? "'"
: quote === Tokenizer_js_1.QuoteType.NoValue
? undefined
: null);
if (this.attribs &&

@@ -289,3 +342,2 @@ !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {

}
this.attribname = "";
this.attribvalue = "";

@@ -302,34 +354,37 @@ };

/** @internal */
Parser.prototype.ondeclaration = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
Parser.prototype.ondeclaration = function (start, endIndex) {
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
var name_1 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
var name = this.getInstructionName(value);
this.cbs.onprocessinginstruction("!".concat(name), "!".concat(value));
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onprocessinginstruction = function (value) {
this.endIndex = this.tokenizer.getAbsoluteIndex();
Parser.prototype.onprocessinginstruction = function (start, endIndex) {
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex);
if (this.cbs.onprocessinginstruction) {
var name_2 = this.getInstructionName(value);
this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
var name = this.getInstructionName(value);
this.cbs.onprocessinginstruction("?".concat(name), "?".concat(value));
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.oncomment = function (value) {
Parser.prototype.oncomment = function (start, endIndex, offset) {
var _a, _b, _c, _d;
this.endIndex = this.tokenizer.getAbsoluteIndex();
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
this.endIndex = endIndex;
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.oncdata = function (value) {
Parser.prototype.oncdata = function (start, endIndex, offset) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
this.endIndex = this.tokenizer.getAbsoluteIndex();
this.endIndex = endIndex;
var value = this.getSlice(start, endIndex - offset);
if (this.options.xmlMode || this.options.recognizeCDATA) {

@@ -341,14 +396,9 @@ (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);

else {
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[" + value + "]]");
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[".concat(value, "]]"));
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
}
// Set `startIndex` for next node
this.startIndex = this.endIndex + 1;
this.startIndex = endIndex + 1;
};
/** @internal */
Parser.prototype.onerror = function (err) {
var _a, _b;
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err);
};
/** @internal */
Parser.prototype.onend = function () {

@@ -374,6 +424,10 @@ var _a, _b;

this.attribs = null;
this.stack = [];
this.stack.length = 0;
this.startIndex = 0;
this.endIndex = 0;
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
this.buffers.length = 0;
this.bufferOffset = 0;
this.writeIndex = 0;
this.ended = false;
};

@@ -390,2 +444,18 @@ /**

};
Parser.prototype.getSlice = function (start, end) {
while (start - this.bufferOffset >= this.buffers[0].length) {
this.shiftBuffer();
}
var str = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
while (end - this.bufferOffset > this.buffers[0].length) {
this.shiftBuffer();
str += this.buffers[0].slice(0, end - this.bufferOffset);
}
return str;
};
Parser.prototype.shiftBuffer = function () {
this.bufferOffset += this.buffers[0].length;
this.writeIndex--;
this.buffers.shift();
};
/**

@@ -397,3 +467,12 @@ * Parses a chunk of data and calls the corresponding callbacks.

Parser.prototype.write = function (chunk) {
this.tokenizer.write(chunk);
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
return;
}
this.buffers.push(chunk);
if (this.tokenizer.running) {
this.tokenizer.write(chunk);
this.writeIndex++;
}
};

@@ -406,3 +485,11 @@ /**

Parser.prototype.end = function (chunk) {
this.tokenizer.end(chunk);
var _a, _b;
if (this.ended) {
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, Error(".end() after done!"));
return;
}
if (chunk)
this.write(chunk);
this.ended = true;
this.tokenizer.end();
};

@@ -420,2 +507,8 @@ /**

this.tokenizer.resume();
while (this.tokenizer.running &&
this.writeIndex < this.buffers.length) {
this.tokenizer.write(this.buffers[this.writeIndex++]);
}
if (this.ended)
this.tokenizer.end();
};

@@ -443,1 +536,2 @@ /**

exports.Parser = Parser;
//# sourceMappingURL=Parser.js.map

@@ -1,48 +0,23 @@

/** All the states the tokenizer can be in. */
declare const enum State {
Text = 1,
BeforeTagName = 2,
InTagName = 3,
InSelfClosingTag = 4,
BeforeClosingTagName = 5,
InClosingTagName = 6,
AfterClosingTagName = 7,
BeforeAttributeName = 8,
InAttributeName = 9,
AfterAttributeName = 10,
BeforeAttributeValue = 11,
InAttributeValueDq = 12,
InAttributeValueSq = 13,
InAttributeValueNq = 14,
BeforeDeclaration = 15,
InDeclaration = 16,
InProcessingInstruction = 17,
BeforeComment = 18,
CDATASequence = 19,
InSpecialComment = 20,
InCommentLike = 21,
BeforeSpecialS = 22,
SpecialStartSequence = 23,
InSpecialTag = 24,
BeforeEntity = 25,
BeforeNumericEntity = 26,
InNamedEntity = 27,
InNumericEntity = 28,
InHexEntity = 29
export declare enum QuoteType {
NoValue = 0,
Unquoted = 1,
Single = 2,
Double = 3
}
export interface Callbacks {
onattribdata(value: string): void;
onattribend(quote: string | undefined | null): void;
onattribname(name: string): void;
oncdata(data: string): void;
onclosetag(name: string): void;
oncomment(data: string): void;
ondeclaration(content: string): void;
onattribdata(start: number, endIndex: number): void;
onattribentity(codepoint: number): void;
onattribend(quote: QuoteType, endIndex: number): void;
onattribname(start: number, endIndex: number): void;
oncdata(start: number, endIndex: number, endOffset: number): void;
onclosetag(start: number, endIndex: number): void;
oncomment(start: number, endIndex: number, endOffset: number): void;
ondeclaration(start: number, endIndex: number): void;
onend(): void;
onerror(error: Error, state?: State): void;
onopentagend(): void;
onopentagname(name: string): void;
onprocessinginstruction(instruction: string): void;
onselfclosingtag(): void;
ontext(value: string): void;
onopentagend(endIndex: number): void;
onopentagname(start: number, endIndex: number): void;
onprocessinginstruction(start: number, endIndex: number): void;
onselfclosingtag(endIndex: number): void;
ontext(start: number, endIndex: number): void;
ontextentity(codepoint: number): void;
}

@@ -52,14 +27,9 @@ export default class Tokenizer {

/** The current state the tokenizer is in. */
private _state;
private state;
/** The read buffer. */
private buffer;
/** The beginning of the section that is currently being read. */
sectionStart: number;
private sectionStart;
/** The index within the buffer that we are currently looking at. */
private _index;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
private bufferOffset;
private index;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */

@@ -70,5 +40,5 @@ private baseState;

/** Indicates whether the tokenizer has been paused. */
private running;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
private ended;
running: boolean;
/** The offset of the current buffer. */
private offset;
private readonly xmlMode;

@@ -83,13 +53,13 @@ private readonly decodeEntities;

write(chunk: string): void;
end(chunk?: string): void;
end(): void;
pause(): void;
resume(): void;
/**
* The start of the current section.
* The current index within all of the written data.
*/
getAbsoluteSectionStart(): number;
getIndex(): number;
/**
* The current index within all of the written data.
* The start of the current section.
*/
getAbsoluteIndex(): number;
getSectionStart(): number;
private stateText;

@@ -148,3 +118,4 @@ private currentSequence;

private trieCurrent;
private trieResult;
/** For named entities, the index of the value. For numeric entities, the code point. */
private entityResult;
private entityExcess;

@@ -155,3 +126,3 @@ private stateBeforeEntity;

private stateBeforeNumericEntity;
private decodeNumericEntity;
private emitNumericEntity;
private stateInNumericEntity;

@@ -174,6 +145,5 @@ private stateInHexEntity;

private handleTrailingData;
private getSection;
private emitPartial;
private emitCodePoint;
}
export {};
//# sourceMappingURL=Tokenizer.d.ts.map
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint"));
var decode_1 = require("entities/lib/decode");
exports.QuoteType = void 0;
var decode_js_1 = require("entities/lib/decode.js");
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["Tab"] = 9] = "Tab";
CharCodes[CharCodes["NewLine"] = 10] = "NewLine";
CharCodes[CharCodes["FormFeed"] = 12] = "FormFeed";
CharCodes[CharCodes["CarriageReturn"] = 13] = "CarriageReturn";
CharCodes[CharCodes["Space"] = 32] = "Space";
CharCodes[CharCodes["ExclamationMark"] = 33] = "ExclamationMark";
CharCodes[CharCodes["Num"] = 35] = "Num";
CharCodes[CharCodes["Amp"] = 38] = "Amp";
CharCodes[CharCodes["SingleQuote"] = 39] = "SingleQuote";
CharCodes[CharCodes["DoubleQuote"] = 34] = "DoubleQuote";
CharCodes[CharCodes["Dash"] = 45] = "Dash";
CharCodes[CharCodes["Slash"] = 47] = "Slash";
CharCodes[CharCodes["Zero"] = 48] = "Zero";
CharCodes[CharCodes["Nine"] = 57] = "Nine";
CharCodes[CharCodes["Semi"] = 59] = "Semi";
CharCodes[CharCodes["Lt"] = 60] = "Lt";
CharCodes[CharCodes["Eq"] = 61] = "Eq";
CharCodes[CharCodes["Gt"] = 62] = "Gt";
CharCodes[CharCodes["Questionmark"] = 63] = "Questionmark";
CharCodes[CharCodes["UpperA"] = 65] = "UpperA";
CharCodes[CharCodes["LowerA"] = 97] = "LowerA";
CharCodes[CharCodes["UpperF"] = 70] = "UpperF";
CharCodes[CharCodes["LowerF"] = 102] = "LowerF";
CharCodes[CharCodes["UpperZ"] = 90] = "UpperZ";
CharCodes[CharCodes["LowerZ"] = 122] = "LowerZ";
CharCodes[CharCodes["LowerX"] = 120] = "LowerX";
CharCodes[CharCodes["OpeningSquareBracket"] = 91] = "OpeningSquareBracket";
})(CharCodes || (CharCodes = {}));
/** All the states the tokenizer can be in. */
var State;
(function (State) {
State[State["Text"] = 1] = "Text";
State[State["BeforeTagName"] = 2] = "BeforeTagName";
State[State["InTagName"] = 3] = "InTagName";
State[State["InSelfClosingTag"] = 4] = "InSelfClosingTag";
State[State["BeforeClosingTagName"] = 5] = "BeforeClosingTagName";
State[State["InClosingTagName"] = 6] = "InClosingTagName";
State[State["AfterClosingTagName"] = 7] = "AfterClosingTagName";
// Attributes
State[State["BeforeAttributeName"] = 8] = "BeforeAttributeName";
State[State["InAttributeName"] = 9] = "InAttributeName";
State[State["AfterAttributeName"] = 10] = "AfterAttributeName";
State[State["BeforeAttributeValue"] = 11] = "BeforeAttributeValue";
State[State["InAttributeValueDq"] = 12] = "InAttributeValueDq";
State[State["InAttributeValueSq"] = 13] = "InAttributeValueSq";
State[State["InAttributeValueNq"] = 14] = "InAttributeValueNq";
// Declarations
State[State["BeforeDeclaration"] = 15] = "BeforeDeclaration";
State[State["InDeclaration"] = 16] = "InDeclaration";
// Processing instructions
State[State["InProcessingInstruction"] = 17] = "InProcessingInstruction";
// Comments & CDATA
State[State["BeforeComment"] = 18] = "BeforeComment";
State[State["CDATASequence"] = 19] = "CDATASequence";
State[State["InSpecialComment"] = 20] = "InSpecialComment";
State[State["InCommentLike"] = 21] = "InCommentLike";
// Special tags
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["BeforeEntity"] = 25] = "BeforeEntity";
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity";
State[State["InNamedEntity"] = 27] = "InNamedEntity";
State[State["InNumericEntity"] = 28] = "InNumericEntity";
State[State["InHexEntity"] = 29] = "InHexEntity";
})(State || (State = {}));
function isWhitespace(c) {
return (c === 32 /* Space */ ||
c === 10 /* NewLine */ ||
c === 9 /* Tab */ ||
c === 12 /* FormFeed */ ||
c === 13 /* CarriageReturn */);
return (c === CharCodes.Space ||
c === CharCodes.NewLine ||
c === CharCodes.Tab ||
c === CharCodes.FormFeed ||
c === CharCodes.CarriageReturn);
}
function isEndOfTagSection(c) {
return c === 47 /* Slash */ || c === 62 /* Gt */ || isWhitespace(c);
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
}
function isNumber(c) {
return c >= 48 /* Zero */ && c <= 57 /* Nine */;
return c >= CharCodes.Zero && c <= CharCodes.Nine;
}
function isASCIIAlpha(c) {
return ((c >= 97 /* LowerA */ && c <= 122 /* LowerZ */) ||
(c >= 65 /* UpperA */ && c <= 90 /* UpperZ */));
return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
(c >= CharCodes.UpperA && c <= CharCodes.UpperZ));
}
function isHexDigit(c) {
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) ||
(c >= CharCodes.LowerA && c <= CharCodes.LowerF));
}
var QuoteType;
(function (QuoteType) {
QuoteType[QuoteType["NoValue"] = 0] = "NoValue";
QuoteType[QuoteType["Unquoted"] = 1] = "Unquoted";
QuoteType[QuoteType["Single"] = 2] = "Single";
QuoteType[QuoteType["Double"] = 3] = "Double";
})(QuoteType = exports.QuoteType || (exports.QuoteType = {}));
/**

@@ -32,10 +108,8 @@ * Sequences used to match longer strings.

var Sequences = {
Cdata: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint16Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint16Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint16Array([
0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
]),
StyleEnd: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
TitleEnd: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]),
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
};

@@ -47,3 +121,3 @@ var Tokenizer = /** @class */ (function () {

/** The current state the tokenizer is in. */
this._state = 1 /* Text */;
this.state = State.Text;
/** The read buffer. */

@@ -54,10 +128,5 @@ this.buffer = "";

/** The index within the buffer that we are currently looking at. */
this._index = 0;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
this.bufferOffset = 0;
this.index = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
this.baseState = 1 /* Text */;
this.baseState = State.Text;
/** For special parsing behavior inside of script and style tags. */

@@ -67,36 +136,30 @@ this.isSpecial = false;

this.running = true;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
this.ended = false;
/** The offset of the current buffer. */
this.offset = 0;
this.sequenceIndex = 0;
this.trieIndex = 0;
this.trieCurrent = 0;
this.trieResult = null;
/** For named entities, the index of the value. For numeric entities, the code point. */
this.entityResult = 0;
this.entityExcess = 0;
this.xmlMode = xmlMode;
this.decodeEntities = decodeEntities;
this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree;
}
Tokenizer.prototype.reset = function () {
this._state = 1 /* Text */;
this.state = State.Text;
this.buffer = "";
this.sectionStart = 0;
this._index = 0;
this.bufferOffset = 0;
this.baseState = 1 /* Text */;
this.index = 0;
this.baseState = State.Text;
this.currentSequence = undefined;
this.running = true;
this.ended = false;
this.offset = 0;
};
Tokenizer.prototype.write = function (chunk) {
if (this.ended)
return this.cbs.onerror(Error(".write() after done!"));
this.buffer += chunk;
this.offset += this.buffer.length;
this.buffer = chunk;
this.parse();
};
Tokenizer.prototype.end = function (chunk) {
if (this.ended)
return this.cbs.onerror(Error(".end() after done!"));
if (chunk)
this.write(chunk);
this.ended = true;
Tokenizer.prototype.end = function () {
if (this.running)

@@ -110,32 +173,29 @@ this.finish();

this.running = true;
if (this._index < this.buffer.length) {
if (this.index < this.buffer.length + this.offset) {
this.parse();
}
if (this.ended) {
this.finish();
}
};
/**
* The start of the current section.
* The current index within all of the written data.
*/
Tokenizer.prototype.getAbsoluteSectionStart = function () {
return this.sectionStart + this.bufferOffset;
Tokenizer.prototype.getIndex = function () {
return this.index;
};
/**
* The current index within all of the written data.
* The start of the current section.
*/
Tokenizer.prototype.getAbsoluteIndex = function () {
return this.bufferOffset + this._index;
Tokenizer.prototype.getSectionStart = function () {
return this.sectionStart;
};
Tokenizer.prototype.stateText = function (c) {
if (c === 60 /* Lt */ ||
(!this.decodeEntities && this.fastForwardTo(60 /* Lt */))) {
if (this._index > this.sectionStart) {
this.cbs.ontext(this.getSection());
if (c === CharCodes.Lt ||
(!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) {
if (this.index > this.sectionStart) {
this.cbs.ontext(this.sectionStart, this.index);
}
this._state = 2 /* BeforeTagName */;
this.sectionStart = this._index;
this.state = State.BeforeTagName;
this.sectionStart = this.index;
}
else if (this.decodeEntities && c === 38 /* Amp */) {
this._state = 25 /* BeforeEntity */;
else if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}

@@ -158,3 +218,3 @@ };

this.sequenceIndex = 0;
this._state = 3 /* InTagName */;
this.state = State.InTagName;
this.stateInTagName(c);

@@ -165,10 +225,10 @@ };

if (this.sequenceIndex === this.currentSequence.length) {
if (c === 62 /* Gt */ || isWhitespace(c)) {
var endOfText = this._index - this.currentSequence.length;
if (c === CharCodes.Gt || isWhitespace(c)) {
var endOfText = this.index - this.currentSequence.length;
if (this.sectionStart < endOfText) {
// Spoof the index so that reported locations match up.
var actualIndex = this._index;
this._index = endOfText;
this.cbs.ontext(this.getSection());
this._index = actualIndex;
var actualIndex = this.index;
this.index = endOfText;
this.cbs.ontext(this.sectionStart, endOfText);
this.index = actualIndex;
}

@@ -188,7 +248,7 @@ this.isSpecial = false;

// We have to parse entities in <title> tags.
if (this.decodeEntities && c === 38 /* Amp */) {
this._state = 25 /* BeforeEntity */;
if (this.decodeEntities && c === CharCodes.Amp) {
this.state = State.BeforeEntity;
}
}
else if (this.fastForwardTo(60 /* Lt */)) {
else if (this.fastForwardTo(CharCodes.Lt)) {
// Outside of <title> tags, we can fast-forward.

@@ -200,3 +260,3 @@ this.sequenceIndex = 1;

// If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
this.sequenceIndex = Number(c === 60 /* Lt */);
this.sequenceIndex = Number(c === CharCodes.Lt);
}

@@ -207,6 +267,6 @@ };

if (++this.sequenceIndex === Sequences.Cdata.length) {
this._state = 21 /* InCommentLike */;
this.state = State.InCommentLike;
this.currentSequence = Sequences.CdataEnd;
this.sequenceIndex = 0;
this.sectionStart = this._index + 1;
this.sectionStart = this.index + 1;
}

@@ -216,3 +276,3 @@ }

this.sequenceIndex = 0;
this._state = 16 /* InDeclaration */;
this.state = State.InDeclaration;
this.stateInDeclaration(c); // Reconsume the character

@@ -228,4 +288,4 @@ }

Tokenizer.prototype.fastForwardTo = function (c) {
while (++this._index < this.buffer.length) {
if (this.buffer.charCodeAt(this._index) === c) {
while (++this.index < this.buffer.length + this.offset) {
if (this.buffer.charCodeAt(this.index - this.offset) === c) {
return true;

@@ -240,3 +300,3 @@ }

*/
this._index = this.buffer.length - 1;
this.index = this.buffer.length + this.offset - 1;
return false;

@@ -255,13 +315,11 @@ };

if (++this.sequenceIndex === this.currentSequence.length) {
// Remove 2 trailing chars
var section = this.buffer.slice(this.sectionStart, this._index - 2);
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(section);
this.cbs.oncdata(this.sectionStart, this.index, 2);
}
else {
this.cbs.oncomment(section);
this.cbs.oncomment(this.sectionStart, this.index, 2);
}
this.sequenceIndex = 0;
this.sectionStart = this._index + 1;
this._state = 1 /* Text */;
this.sectionStart = this.index + 1;
this.state = State.Text;
}

@@ -293,16 +351,16 @@ }

this.sequenceIndex = offset;
this._state = 23 /* SpecialStartSequence */;
this.state = State.SpecialStartSequence;
};
Tokenizer.prototype.stateBeforeTagName = function (c) {
if (c === 33 /* ExclamationMark */) {
this._state = 15 /* BeforeDeclaration */;
this.sectionStart = this._index + 1;
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration;
this.sectionStart = this.index + 1;
}
else if (c === 63 /* Questionmark */) {
this._state = 17 /* InProcessingInstruction */;
this.sectionStart = this._index + 1;
else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction;
this.sectionStart = this.index + 1;
}
else if (this.isTagStartChar(c)) {
var lower = c | 0x20;
this.sectionStart = this._index;
this.sectionStart = this.index;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {

@@ -312,13 +370,13 @@ this.startSpecial(Sequences.TitleEnd, 3);

else {
this._state =
this.state =
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? 22 /* BeforeSpecialS */
: 3 /* InTagName */;
? State.BeforeSpecialS
: State.InTagName;
}
}
else if (c === 47 /* Slash */) {
this._state = 5 /* BeforeClosingTagName */;
else if (c === CharCodes.Slash) {
this.state = State.BeforeClosingTagName;
}
else {
this._state = 1 /* Text */;
this.state = State.Text;
this.stateText(c);

@@ -329,5 +387,5 @@ }

if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.getSection());
this.cbs.onopentagname(this.sectionStart, this.index);
this.sectionStart = -1;
this._state = 8 /* BeforeAttributeName */;
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);

@@ -340,17 +398,17 @@ }

}
else if (c === 62 /* Gt */) {
this._state = 1 /* Text */;
else if (c === CharCodes.Gt) {
this.state = State.Text;
}
else {
this._state = this.isTagStartChar(c)
? 6 /* InClosingTagName */
: 20 /* InSpecialComment */;
this.sectionStart = this._index;
this.state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateInClosingTagName = function (c) {
if (c === 62 /* Gt */ || isWhitespace(c)) {
this.cbs.onclosetag(this.getSection());
if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.sectionStart, this.index);
this.sectionStart = -1;
this._state = 7 /* AfterClosingTagName */;
this.state = State.AfterClosingTagName;
this.stateAfterClosingTagName(c);

@@ -361,38 +419,38 @@ }

// Skip everything until ">"
if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) {
this._state = 1 /* Text */;
this.sectionStart = this._index + 1;
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateBeforeAttributeName = function (c) {
if (c === 62 /* Gt */) {
this.cbs.onopentagend();
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index);
if (this.isSpecial) {
this._state = 24 /* InSpecialTag */;
this.state = State.InSpecialTag;
this.sequenceIndex = 0;
}
else {
this._state = 1 /* Text */;
this.state = State.Text;
}
this.baseState = this._state;
this.sectionStart = this._index + 1;
this.baseState = this.state;
this.sectionStart = this.index + 1;
}
else if (c === 47 /* Slash */) {
this._state = 4 /* InSelfClosingTag */;
else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag;
}
else if (!isWhitespace(c)) {
this._state = 9 /* InAttributeName */;
this.sectionStart = this._index;
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateInSelfClosingTag = function (c) {
if (c === 62 /* Gt */) {
this.cbs.onselfclosingtag();
this._state = 1 /* Text */;
this.baseState = 1 /* Text */;
this.sectionStart = this._index + 1;
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag(this.index);
this.state = State.Text;
this.baseState = State.Text;
this.sectionStart = this.index + 1;
this.isSpecial = false; // Reset special state, in case of self-closing special tags
}
else if (!isWhitespace(c)) {
this._state = 8 /* BeforeAttributeName */;
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);

@@ -402,6 +460,6 @@ }

Tokenizer.prototype.stateInAttributeName = function (c) {
if (c === 61 /* Eq */ || isEndOfTagSection(c)) {
this.cbs.onattribname(this.getSection());
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index);
this.sectionStart = -1;
this._state = 10 /* AfterAttributeName */;
this.state = State.AfterAttributeName;
this.stateAfterAttributeName(c);

@@ -411,28 +469,28 @@ }

Tokenizer.prototype.stateAfterAttributeName = function (c) {
if (c === 61 /* Eq */) {
this._state = 11 /* BeforeAttributeValue */;
if (c === CharCodes.Eq) {
this.state = State.BeforeAttributeValue;
}
else if (c === 47 /* Slash */ || c === 62 /* Gt */) {
this.cbs.onattribend(undefined);
this._state = 8 /* BeforeAttributeName */;
else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (!isWhitespace(c)) {
this.cbs.onattribend(undefined);
this._state = 9 /* InAttributeName */;
this.sectionStart = this._index;
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.state = State.InAttributeName;
this.sectionStart = this.index;
}
};
Tokenizer.prototype.stateBeforeAttributeValue = function (c) {
if (c === 34 /* DoubleQuote */) {
this._state = 12 /* InAttributeValueDq */;
this.sectionStart = this._index + 1;
if (c === CharCodes.DoubleQuote) {
this.state = State.InAttributeValueDq;
this.sectionStart = this.index + 1;
}
else if (c === 39 /* SingleQuote */) {
this._state = 13 /* InAttributeValueSq */;
this.sectionStart = this._index + 1;
else if (c === CharCodes.SingleQuote) {
this.state = State.InAttributeValueSq;
this.sectionStart = this.index + 1;
}
else if (!isWhitespace(c)) {
this.sectionStart = this._index;
this._state = 14 /* InAttributeValueNq */;
this.sectionStart = this.index;
this.state = State.InAttributeValueNq;
this.stateInAttributeValueNoQuotes(c); // Reconsume token

@@ -444,74 +502,76 @@ }

(!this.decodeEntities && this.fastForwardTo(quote))) {
this.cbs.onattribdata(this.getSection());
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(String.fromCharCode(quote));
this._state = 8 /* BeforeAttributeName */;
this.cbs.onattribend(quote === CharCodes.DoubleQuote
? QuoteType.Double
: QuoteType.Single, this.index);
this.state = State.BeforeAttributeName;
}
else if (this.decodeEntities && c === 38 /* Amp */) {
this.baseState = this._state;
this._state = 25 /* BeforeEntity */;
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
};
Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) {
this.handleInAttributeValue(c, 34 /* DoubleQuote */);
this.handleInAttributeValue(c, CharCodes.DoubleQuote);
};
Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) {
this.handleInAttributeValue(c, 39 /* SingleQuote */);
this.handleInAttributeValue(c, CharCodes.SingleQuote);
};
Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) {
if (isWhitespace(c) || c === 62 /* Gt */) {
this.cbs.onattribdata(this.getSection());
if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = -1;
this.cbs.onattribend(null);
this._state = 8 /* BeforeAttributeName */;
this.cbs.onattribend(QuoteType.Unquoted, this.index);
this.state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
else if (this.decodeEntities && c === 38 /* Amp */) {
this.baseState = this._state;
this._state = 25 /* BeforeEntity */;
else if (this.decodeEntities && c === CharCodes.Amp) {
this.baseState = this.state;
this.state = State.BeforeEntity;
}
};
Tokenizer.prototype.stateBeforeDeclaration = function (c) {
if (c === 91 /* OpeningSquareBracket */) {
this._state = 19 /* CDATASequence */;
if (c === CharCodes.OpeningSquareBracket) {
this.state = State.CDATASequence;
this.sequenceIndex = 0;
}
else {
this._state =
c === 45 /* Dash */
? 18 /* BeforeComment */
: 16 /* InDeclaration */;
this.state =
c === CharCodes.Dash
? State.BeforeComment
: State.InDeclaration;
}
};
Tokenizer.prototype.stateInDeclaration = function (c) {
if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) {
this.cbs.ondeclaration(this.getSection());
this._state = 1 /* Text */;
this.sectionStart = this._index + 1;
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.ondeclaration(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateInProcessingInstruction = function (c) {
if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) {
this.cbs.onprocessinginstruction(this.getSection());
this._state = 1 /* Text */;
this.sectionStart = this._index + 1;
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
}
};
Tokenizer.prototype.stateBeforeComment = function (c) {
if (c === 45 /* Dash */) {
this._state = 21 /* InCommentLike */;
if (c === CharCodes.Dash) {
this.state = State.InCommentLike;
this.currentSequence = Sequences.CommentEnd;
// Allow short comments (eg. <!-->)
this.sequenceIndex = 2;
this.sectionStart = this._index + 1;
this.sectionStart = this.index + 1;
}
else {
this._state = 16 /* InDeclaration */;
this.state = State.InDeclaration;
}
};
Tokenizer.prototype.stateInSpecialComment = function (c) {
if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) {
this.cbs.oncomment(this.getSection());
this._state = 1 /* Text */;
this.sectionStart = this._index + 1;
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.sectionStart, this.index, 0);
this.state = State.Text;
this.sectionStart = this.index + 1;
}

@@ -528,3 +588,3 @@ };

else {
this._state = 3 /* InTagName */;
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again

@@ -536,6 +596,7 @@ }

this.entityExcess = 1;
if (c === 35 /* Num */) {
this._state = 26 /* BeforeNumericEntity */;
this.entityResult = 0;
if (c === CharCodes.Num) {
this.state = State.BeforeNumericEntity;
}
else if (c === 38 /* Amp */) {
else if (c === CharCodes.Amp) {
// We have two `&` characters in a row. Stay in the current state.

@@ -546,4 +607,3 @@ }

this.trieCurrent = this.entityTrie[0];
this.trieResult = null;
this._state = 27 /* InNamedEntity */;
this.state = State.InNamedEntity;
this.stateInNamedEntity(c);

@@ -554,29 +614,32 @@ }

this.entityExcess += 1;
this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
if (this.trieIndex < 0) {
this.emitNamedEntity();
this._index--;
this.index--;
return;
}
this.trieCurrent = this.entityTrie[this.trieIndex];
var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH;
// If the branch is a value, store it and continue
if (this.trieCurrent & decode_1.BinTrieFlags.HAS_VALUE) {
if (masked) {
// The mask is the number of bytes of the value, including the current byte.
var valueLength = (masked >> 14) - 1;
// If we have a legacy entity while parsing strictly, just skip the number of bytes
if (!this.allowLegacyEntity() && c !== 59 /* Semi */) {
// No need to consider multi-byte values, as the legacy entity is always a single byte
this.trieIndex += 1;
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) {
this.trieIndex += valueLength;
}
else {
// Add 1 as we have already incremented the excess
var entityStart = this._index - this.entityExcess + 1;
var entityStart = this.index - this.entityExcess + 1;
if (entityStart > this.sectionStart) {
this.emitPartial(this.buffer.substring(this.sectionStart, entityStart));
this.emitPartial(this.sectionStart, entityStart);
}
// If this is a surrogate pair, combine the higher bits from the node with the next byte
this.trieResult =
this.trieCurrent & decode_1.BinTrieFlags.MULTI_BYTE
? String.fromCharCode(this.entityTrie[++this.trieIndex], this.entityTrie[++this.trieIndex])
: String.fromCharCode(this.entityTrie[++this.trieIndex]);
// If this is a surrogate pair, consume the next two bytes
this.entityResult = this.trieIndex;
this.trieIndex += valueLength;
this.entityExcess = 0;
this.sectionStart = this._index + 1;
this.sectionStart = this.index + 1;
if (valueLength === 0) {
this.emitNamedEntity();
}
}

@@ -586,73 +649,90 @@ }

Tokenizer.prototype.emitNamedEntity = function () {
if (this.trieResult) {
this.emitPartial(this.trieResult);
this.state = this.baseState;
if (this.entityResult === 0) {
return;
}
this._state = this.baseState;
var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >>
14;
switch (valueLength) {
case 1:
this.emitCodePoint(this.entityTrie[this.entityResult] &
~decode_js_1.BinTrieFlags.VALUE_LENGTH);
break;
case 2:
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
break;
case 3: {
this.emitCodePoint(this.entityTrie[this.entityResult + 1]);
this.emitCodePoint(this.entityTrie[this.entityResult + 2]);
}
}
};
Tokenizer.prototype.stateBeforeNumericEntity = function (c) {
if ((c | 0x20) === 120 /* LowerX */) {
if ((c | 0x20) === CharCodes.LowerX) {
this.entityExcess++;
this._state = 29 /* InHexEntity */;
this.state = State.InHexEntity;
}
else {
this._state = 28 /* InNumericEntity */;
this.state = State.InNumericEntity;
this.stateInNumericEntity(c);
}
};
Tokenizer.prototype.decodeNumericEntity = function (base, strict) {
var entityStart = this._index - this.entityExcess - 1;
var numberStart = entityStart + 2 + (base >> 4);
if (numberStart !== this._index) {
Tokenizer.prototype.emitNumericEntity = function (strict) {
var entityStart = this.index - this.entityExcess - 1;
var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity);
if (numberStart !== this.index) {
// Emit leading data if any
if (entityStart > this.sectionStart) {
this.emitPartial(this.buffer.substring(this.sectionStart, entityStart));
this.emitPartial(this.sectionStart, entityStart);
}
// Parse entity
var entity = this.buffer.substring(numberStart, this._index);
var parsed = parseInt(entity, base);
this.emitPartial((0, decode_codepoint_1.default)(parsed));
this.sectionStart = this._index + Number(strict);
this.sectionStart = this.index + Number(strict);
this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult));
}
this._state = this.baseState;
this.state = this.baseState;
};
Tokenizer.prototype.stateInNumericEntity = function (c) {
if (c === 59 /* Semi */) {
this.decodeNumericEntity(10, true);
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if (!isNumber(c)) {
else if (isNumber(c)) {
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.decodeNumericEntity(10, false);
this.emitNumericEntity(false);
}
else {
this._state = this.baseState;
this.state = this.baseState;
}
this._index--;
this.index--;
}
else {
this.entityExcess++;
}
};
Tokenizer.prototype.stateInHexEntity = function (c) {
if (c === 59 /* Semi */) {
this.decodeNumericEntity(16, true);
if (c === CharCodes.Semi) {
this.emitNumericEntity(true);
}
else if ((c < 97 /* LowerA */ || c > 102 /* LowerF */) &&
(c < 65 /* UpperA */ || c > 70 /* UpperF */) &&
!isNumber(c)) {
else if (isNumber(c)) {
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero);
this.entityExcess++;
}
else if (isHexDigit(c)) {
this.entityResult =
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10);
this.entityExcess++;
}
else {
if (this.allowLegacyEntity()) {
this.decodeNumericEntity(16, false);
this.emitNumericEntity(false);
}
else {
this._state = this.baseState;
this.state = this.baseState;
}
this._index--;
this.index--;
}
else {
this.entityExcess++;
}
};
Tokenizer.prototype.allowLegacyEntity = function () {
return (!this.xmlMode &&
(this.baseState === 1 /* Text */ ||
this.baseState === 24 /* InSpecialTag */));
(this.baseState === State.Text ||
this.baseState === State.InSpecialTag));
};

@@ -663,23 +743,19 @@ /**

Tokenizer.prototype.cleanup = function () {
// If we are inside of text, emit what we already have.
if (this.running &&
this.sectionStart !== this._index &&
(this._state === 1 /* Text */ ||
(this._state === 24 /* InSpecialTag */ &&
this.sequenceIndex === 0))) {
// TODO: We could emit attribute data here as well.
this.cbs.ontext(this.buffer.substr(this.sectionStart));
this.sectionStart = this._index;
// If we are inside of text or attributes, emit what we already have.
if (this.running && this.sectionStart !== this.index) {
if (this.state === State.Text ||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)) {
this.cbs.ontext(this.sectionStart, this.index);
this.sectionStart = this.index;
}
else if (this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueNq) {
this.cbs.onattribdata(this.sectionStart, this.index);
this.sectionStart = this.index;
}
}
var start = this.sectionStart < 0 ? this._index : this.sectionStart;
this.buffer =
start === this.buffer.length ? "" : this.buffer.substr(start);
this._index -= start;
this.bufferOffset += start;
if (this.sectionStart > 0) {
this.sectionStart = 0;
}
};
Tokenizer.prototype.shouldContinue = function () {
return this._index < this.buffer.length && this.running;
return this.index < this.buffer.length + this.offset && this.running;
};

@@ -693,85 +769,85 @@ /**

while (this.shouldContinue()) {
var c = this.buffer.charCodeAt(this._index);
if (this._state === 1 /* Text */) {
var c = this.buffer.charCodeAt(this.index - this.offset);
if (this.state === State.Text) {
this.stateText(c);
}
else if (this._state === 23 /* SpecialStartSequence */) {
else if (this.state === State.SpecialStartSequence) {
this.stateSpecialStartSequence(c);
}
else if (this._state === 24 /* InSpecialTag */) {
else if (this.state === State.InSpecialTag) {
this.stateInSpecialTag(c);
}
else if (this._state === 19 /* CDATASequence */) {
else if (this.state === State.CDATASequence) {
this.stateCDATASequence(c);
}
else if (this._state === 12 /* InAttributeValueDq */) {
else if (this.state === State.InAttributeValueDq) {
this.stateInAttributeValueDoubleQuotes(c);
}
else if (this._state === 9 /* InAttributeName */) {
else if (this.state === State.InAttributeName) {
this.stateInAttributeName(c);
}
else if (this._state === 21 /* InCommentLike */) {
else if (this.state === State.InCommentLike) {
this.stateInCommentLike(c);
}
else if (this._state === 20 /* InSpecialComment */) {
else if (this.state === State.InSpecialComment) {
this.stateInSpecialComment(c);
}
else if (this._state === 8 /* BeforeAttributeName */) {
else if (this.state === State.BeforeAttributeName) {
this.stateBeforeAttributeName(c);
}
else if (this._state === 3 /* InTagName */) {
else if (this.state === State.InTagName) {
this.stateInTagName(c);
}
else if (this._state === 6 /* InClosingTagName */) {
else if (this.state === State.InClosingTagName) {
this.stateInClosingTagName(c);
}
else if (this._state === 2 /* BeforeTagName */) {
else if (this.state === State.BeforeTagName) {
this.stateBeforeTagName(c);
}
else if (this._state === 10 /* AfterAttributeName */) {
else if (this.state === State.AfterAttributeName) {
this.stateAfterAttributeName(c);
}
else if (this._state === 13 /* InAttributeValueSq */) {
else if (this.state === State.InAttributeValueSq) {
this.stateInAttributeValueSingleQuotes(c);
}
else if (this._state === 11 /* BeforeAttributeValue */) {
else if (this.state === State.BeforeAttributeValue) {
this.stateBeforeAttributeValue(c);
}
else if (this._state === 5 /* BeforeClosingTagName */) {
else if (this.state === State.BeforeClosingTagName) {
this.stateBeforeClosingTagName(c);
}
else if (this._state === 7 /* AfterClosingTagName */) {
else if (this.state === State.AfterClosingTagName) {
this.stateAfterClosingTagName(c);
}
else if (this._state === 22 /* BeforeSpecialS */) {
else if (this.state === State.BeforeSpecialS) {
this.stateBeforeSpecialS(c);
}
else if (this._state === 14 /* InAttributeValueNq */) {
else if (this.state === State.InAttributeValueNq) {
this.stateInAttributeValueNoQuotes(c);
}
else if (this._state === 4 /* InSelfClosingTag */) {
else if (this.state === State.InSelfClosingTag) {
this.stateInSelfClosingTag(c);
}
else if (this._state === 16 /* InDeclaration */) {
else if (this.state === State.InDeclaration) {
this.stateInDeclaration(c);
}
else if (this._state === 15 /* BeforeDeclaration */) {
else if (this.state === State.BeforeDeclaration) {
this.stateBeforeDeclaration(c);
}
else if (this._state === 18 /* BeforeComment */) {
else if (this.state === State.BeforeComment) {
this.stateBeforeComment(c);
}
else if (this._state === 17 /* InProcessingInstruction */) {
else if (this.state === State.InProcessingInstruction) {
this.stateInProcessingInstruction(c);
}
else if (this._state === 27 /* InNamedEntity */) {
else if (this.state === State.InNamedEntity) {
this.stateInNamedEntity(c);
}
else if (this._state === 25 /* BeforeEntity */) {
else if (this.state === State.BeforeEntity) {
this.stateBeforeEntity(c);
}
else if (this._state === 29 /* InHexEntity */) {
else if (this.state === State.InHexEntity) {
this.stateInHexEntity(c);
}
else if (this._state === 28 /* InNumericEntity */) {
else if (this.state === State.InNumericEntity) {
this.stateInNumericEntity(c);

@@ -783,3 +859,3 @@ }

}
this._index++;
this.index++;
}

@@ -789,7 +865,7 @@ this.cleanup();

Tokenizer.prototype.finish = function () {
if (this._state === 27 /* InNamedEntity */) {
if (this.state === State.InNamedEntity) {
this.emitNamedEntity();
}
// If there is remaining data, emit it in a reasonable way
if (this.sectionStart < this._index) {
if (this.sectionStart < this.index) {
this.handleTrailingData();

@@ -801,30 +877,30 @@ }

Tokenizer.prototype.handleTrailingData = function () {
var data = this.buffer.substr(this.sectionStart);
if (this._state === 21 /* InCommentLike */) {
var endIndex = this.buffer.length + this.offset;
if (this.state === State.InCommentLike) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(data);
this.cbs.oncdata(this.sectionStart, endIndex, 0);
}
else {
this.cbs.oncomment(data);
this.cbs.oncomment(this.sectionStart, endIndex, 0);
}
}
else if (this._state === 28 /* InNumericEntity */ &&
else if (this.state === State.InNumericEntity &&
this.allowLegacyEntity()) {
this.decodeNumericEntity(10, false);
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this._state === 29 /* InHexEntity */ &&
else if (this.state === State.InHexEntity &&
this.allowLegacyEntity()) {
this.decodeNumericEntity(16, false);
this.emitNumericEntity(false);
// All trailing data will have been consumed
}
else if (this._state === 3 /* InTagName */ ||
this._state === 8 /* BeforeAttributeName */ ||
this._state === 11 /* BeforeAttributeValue */ ||
this._state === 10 /* AfterAttributeName */ ||
this._state === 9 /* InAttributeName */ ||
this._state === 13 /* InAttributeValueSq */ ||
this._state === 12 /* InAttributeValueDq */ ||
this._state === 14 /* InAttributeValueNq */ ||
this._state === 6 /* InClosingTagName */) {
else if (this.state === State.InTagName ||
this.state === State.BeforeAttributeName ||
this.state === State.BeforeAttributeValue ||
this.state === State.AfterAttributeName ||
this.state === State.InAttributeName ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueNq ||
this.state === State.InClosingTagName) {
/*

@@ -836,15 +912,21 @@ * If we are currently in an opening or closing tag, us not calling the

else {
this.cbs.ontext(data);
this.cbs.ontext(this.sectionStart, endIndex);
}
};
Tokenizer.prototype.getSection = function () {
return this.buffer.substring(this.sectionStart, this._index);
Tokenizer.prototype.emitPartial = function (start, endIndex) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribdata(start, endIndex);
}
else {
this.cbs.ontext(start, endIndex);
}
};
Tokenizer.prototype.emitPartial = function (value) {
if (this.baseState !== 1 /* Text */ &&
this.baseState !== 24 /* InSpecialTag */) {
this.cbs.onattribdata(value);
Tokenizer.prototype.emitCodePoint = function (cp) {
if (this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag) {
this.cbs.onattribentity(cp);
}
else {
this.cbs.ontext(value);
this.cbs.ontextentity(cp);
}

@@ -855,1 +937,2 @@ };

exports.default = Tokenizer;
//# sourceMappingURL=Tokenizer.js.map
/// <reference types="node" />
import { Handler, ParserOptions } from "./Parser";
import { Handler, ParserOptions } from "./Parser.js";
import { Writable } from "stream";

@@ -4,0 +4,0 @@ /**

@@ -19,3 +19,3 @@ "use strict";

exports.WritableStream = void 0;
var Parser_1 = require("./Parser");
var Parser_js_1 = require("./Parser.js");
/*

@@ -41,3 +41,3 @@ * NOTE: If either of these two imports produces a type error,

_this._decoder = new string_decoder_1.StringDecoder();
_this._parser = new Parser_1.Parser(cbs, options);
_this._parser = new Parser_js_1.Parser(cbs, options);
return _this;

@@ -56,1 +56,2 @@ }

exports.WritableStream = WritableStream;
//# sourceMappingURL=WritableStream.js.map
{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "7.2.0",
"version": "8.0.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -34,2 +34,7 @@ "funding": [

"types": "lib/index.d.ts",
"module": "lib/esm/index.js",
"exports": {
"require": "./lib/index.js",
"import": "./lib/esm/index.js"
},
"files": [

@@ -48,26 +53,32 @@ "lib/**/*"

"format:prettier:raw": "prettier '**/*.{ts,md,json,yml}'",
"build": "tsc",
"build": "npm run build:cjs && npm run build:esm",
"build:cjs": "tsc --sourceRoot https://raw.githubusercontent.com/fb55/htmlparser2/$(git rev-parse HEAD)/src/",
"build:esm": "npm run build:cjs -- --module esnext --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json",
"prepare": "npm run build"
},
"dependencies": {
"domelementtype": "^2.0.1",
"domhandler": "^4.2.2",
"domutils": "^2.8.0",
"entities": "^3.0.1"
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"entities": "^4.3.0"
},
"devDependencies": {
"@types/jest": "^27.0.2",
"@types/node": "^16.11.7",
"@typescript-eslint/eslint-plugin": "^5.3.1",
"@typescript-eslint/parser": "^5.3.1",
"eslint": "^8.2.0",
"eslint-config-prettier": "^8.1.0",
"jest": "^27.3.1",
"prettier": "^2.4.1",
"ts-jest": "^27.0.7",
"typescript": "^4.4.4"
"@types/jest": "^27.4.1",
"@types/node": "^17.0.25",
"@typescript-eslint/eslint-plugin": "^5.20.0",
"@typescript-eslint/parser": "^5.20.0",
"eslint": "^8.13.0",
"eslint-config-prettier": "^8.5.0",
"jest": "^27.5.1",
"prettier": "^2.6.2",
"ts-jest": "^27.1.4",
"typescript": "^4.6.3"
},
"jest": {
"preset": "ts-jest",
"testEnvironment": "node"
"testEnvironment": "node",
"coverageProvider": "v8",
"moduleNameMapper": {
"^(.*)\\.js$": "$1"
}
},

@@ -74,0 +85,0 @@ "prettier": {

@@ -10,2 +10,4 @@ # htmlparser2

_htmlparser2 is [the fastest HTML parser](#performance), and takes some shortcuts to get there. If you need strict HTML spec compliance, have a look at [parse5](https://github.com/inikulin/parse5)._
## Installation

@@ -12,0 +14,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc