Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 9.0.0 to 9.1.0

9

lib/esm/index.d.ts

@@ -1,4 +0,5 @@

import { Parser, ParserOptions } from "./Parser.js";
export { Parser, type ParserOptions } from "./Parser.js";
import { DomHandlerOptions, ChildNode, Element, Document } from "domhandler";
import { Parser, type ParserOptions } from "./Parser.js";
export type { Handler, ParserOptions } from "./Parser.js";
export { Parser } from "./Parser.js";
import { type DomHandlerOptions, type ChildNode, type Element, type Document } from "domhandler";
export { DomHandler, DomHandler as DefaultHandler, type DomHandlerOptions, } from "domhandler";

@@ -41,3 +42,3 @@ export type Options = ParserOptions & DomHandlerOptions;

export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, QuoteType, } from "./Tokenizer.js";
export * as ElementType from "domelementtype";

@@ -44,0 +45,0 @@ import { type Feed } from "domutils";

@@ -55,3 +55,3 @@ import { Parser } from "./Parser.js";

}
export { default as Tokenizer, } from "./Tokenizer.js";
export { default as Tokenizer, QuoteType, } from "./Tokenizer.js";
/*

@@ -58,0 +58,0 @@ * All of the following exports exist for backwards-compatibility.

@@ -1,2 +0,2 @@

import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";
import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js";
export interface ParserOptions {

@@ -100,2 +100,3 @@ /**

private readonly lowerCaseAttributeNames;
private readonly recognizeSelfClosing;
/** We are parsing HTML. Inverse of the `xmlMode` option. */

@@ -102,0 +103,0 @@ private readonly htmlMode;

@@ -101,3 +101,3 @@ import Tokenizer, { QuoteType } from "./Tokenizer.js";

constructor(cbs, options = {}) {
var _a, _b, _c, _d, _e;
var _a, _b, _c, _d, _e, _f;
this.options = options;

@@ -129,5 +129,7 @@ /** The start index of the last event. */

(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this);
this.recognizeSelfClosing =
(_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode;
this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer)(this.options, this);
this.foreignContext = [!this.htmlMode];
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
(_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this);
}

@@ -251,3 +253,3 @@ // Tokenizer event handlers

this.endIndex = endIndex;
if (this.options.recognizeSelfClosing || this.foreignContext[0]) {
if (this.recognizeSelfClosing || this.foreignContext[0]) {
this.closeCurrentTag(false);

@@ -254,0 +256,0 @@ // Set `startIndex` for next node

@@ -107,2 +107,3 @@ export declare enum QuoteType {

private stateBeforeSpecialS;
private stateBeforeSpecialT;
private startEntity;

@@ -109,0 +110,0 @@ private stateInEntity;

@@ -62,5 +62,6 @@ import { EntityDecoder, DecodingMode, htmlDecodeTree, xmlDecodeTree, } from "entities/lib/decode.js";

State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["InEntity"] = 25] = "InEntity";
State[State["BeforeSpecialT"] = 23] = "BeforeSpecialT";
State[State["SpecialStartSequence"] = 24] = "SpecialStartSequence";
State[State["InSpecialTag"] = 25] = "InSpecialTag";
State[State["InEntity"] = 26] = "InEntity";
})(State || (State = {}));

@@ -95,8 +96,11 @@ function isWhitespace(c) {

const Sequences = {
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]),
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script`
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
TextareaEnd: new Uint8Array([
0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61,
]), // `</textarea`
};

@@ -324,10 +328,13 @@ export default class Tokenizer {

this.sectionStart = this.index;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3);
if (this.xmlMode) {
this.state = State.InTagName;
}
else if (lower === Sequences.ScriptEnd[2]) {
this.state = State.BeforeSpecialS;
}
else if (lower === Sequences.TitleEnd[2]) {
this.state = State.BeforeSpecialT;
}
else {
this.state =
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName;
this.state = State.InTagName;
}

@@ -415,3 +422,3 @@ }

this.cbs.onattribname(this.sectionStart, this.index);
this.sectionStart = -1;
this.sectionStart = this.index;
this.state = State.AfterAttributeName;

@@ -426,3 +433,4 @@ this.stateAfterAttributeName(c);

else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
this.sectionStart = -1;
this.state = State.BeforeAttributeName;

@@ -432,3 +440,3 @@ this.stateBeforeAttributeName(c);

else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
this.state = State.InAttributeName;

@@ -460,3 +468,3 @@ this.sectionStart = this.index;

? QuoteType.Double
: QuoteType.Single, this.index);
: QuoteType.Single, this.index + 1);
this.state = State.BeforeAttributeName;

@@ -544,2 +552,15 @@ }

}
stateBeforeSpecialT(c) {
const lower = c | 0x20;
if (lower === Sequences.TitleEnd[3]) {
this.startSpecial(Sequences.TitleEnd, 4);
}
else if (lower === Sequences.TextareaEnd[3]) {
this.startSpecial(Sequences.TextareaEnd, 4);
}
else {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
}
startEntity() {

@@ -673,2 +694,6 @@ this.baseState = this.state;

}
case State.BeforeSpecialT: {
this.stateBeforeSpecialT(c);
break;
}
case State.InAttributeValueNq: {

@@ -675,0 +700,0 @@ this.stateInAttributeValueNoQuotes(c);

/// <reference types="node" />
/// <reference types="node" />
import { Handler, ParserOptions } from "./Parser.js";
import { type Handler, type ParserOptions } from "./Parser.js";
import { Writable } from "node:stream";

@@ -5,0 +5,0 @@ /**

@@ -1,4 +0,5 @@

import { Parser, ParserOptions } from "./Parser.js";
export { Parser, type ParserOptions } from "./Parser.js";
import { DomHandlerOptions, ChildNode, Element, Document } from "domhandler";
import { Parser, type ParserOptions } from "./Parser.js";
export type { Handler, ParserOptions } from "./Parser.js";
export { Parser } from "./Parser.js";
import { type DomHandlerOptions, type ChildNode, type Element, type Document } from "domhandler";
export { DomHandler, DomHandler as DefaultHandler, type DomHandlerOptions, } from "domhandler";

@@ -41,3 +42,3 @@ export type Options = ParserOptions & DomHandlerOptions;

export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser;
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js";
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, QuoteType, } from "./Tokenizer.js";
export * as ElementType from "domelementtype";

@@ -44,0 +45,0 @@ import { type Feed } from "domutils";

@@ -29,3 +29,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.QuoteType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0;
var Parser_js_1 = require("./Parser.js");

@@ -93,2 +93,3 @@ var Parser_js_2 = require("./Parser.js");

Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_js_1).default; } });
Object.defineProperty(exports, "QuoteType", { enumerable: true, get: function () { return Tokenizer_js_1.QuoteType; } });
/*

@@ -95,0 +96,0 @@ * All of the following exports exist for backwards-compatibility.

@@ -1,2 +0,2 @@

import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js";
import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js";
export interface ParserOptions {

@@ -100,2 +100,3 @@ /**

private readonly lowerCaseAttributeNames;
private readonly recognizeSelfClosing;
/** We are parsing HTML. Inverse of the `xmlMode` option. */

@@ -102,0 +103,0 @@ private readonly htmlMode;

@@ -128,3 +128,3 @@ "use strict";

if (options === void 0) { options = {}; }
var _a, _b, _c, _d, _e;
var _a, _b, _c, _d, _e, _f;
this.options = options;

@@ -156,5 +156,7 @@ /** The start index of the last event. */

(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this);
this.recognizeSelfClosing =
(_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode;
this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer_js_1.default)(this.options, this);
this.foreignContext = [!this.htmlMode];
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
(_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this);
}

@@ -278,3 +280,3 @@ // Tokenizer event handlers

this.endIndex = endIndex;
if (this.options.recognizeSelfClosing || this.foreignContext[0]) {
if (this.recognizeSelfClosing || this.foreignContext[0]) {
this.closeCurrentTag(false);

@@ -281,0 +283,0 @@ // Set `startIndex` for next node

@@ -107,2 +107,3 @@ export declare enum QuoteType {

private stateBeforeSpecialS;
private stateBeforeSpecialT;
private startEntity;

@@ -109,0 +110,0 @@ private stateInEntity;

@@ -65,5 +65,6 @@ "use strict";

State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS";
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence";
State[State["InSpecialTag"] = 24] = "InSpecialTag";
State[State["InEntity"] = 25] = "InEntity";
State[State["BeforeSpecialT"] = 23] = "BeforeSpecialT";
State[State["SpecialStartSequence"] = 24] = "SpecialStartSequence";
State[State["InSpecialTag"] = 25] = "InSpecialTag";
State[State["InEntity"] = 26] = "InEntity";
})(State || (State = {}));

@@ -90,3 +91,3 @@ function isWhitespace(c) {

QuoteType[QuoteType["Double"] = 3] = "Double";
})(QuoteType = exports.QuoteType || (exports.QuoteType = {}));
})(QuoteType || (exports.QuoteType = QuoteType = {}));
/**

@@ -99,8 +100,11 @@ * Sequences used to match longer strings.

var Sequences = {
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]),
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]),
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]),
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]),
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]),
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script`
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
TextareaEnd: new Uint8Array([
0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61,
]), // `</textarea`
};

@@ -330,10 +334,13 @@ var Tokenizer = /** @class */ (function () {

this.sectionStart = this.index;
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3);
if (this.xmlMode) {
this.state = State.InTagName;
}
else if (lower === Sequences.ScriptEnd[2]) {
this.state = State.BeforeSpecialS;
}
else if (lower === Sequences.TitleEnd[2]) {
this.state = State.BeforeSpecialT;
}
else {
this.state =
!this.xmlMode && lower === Sequences.ScriptEnd[2]
? State.BeforeSpecialS
: State.InTagName;
this.state = State.InTagName;
}

@@ -421,3 +428,3 @@ }

this.cbs.onattribname(this.sectionStart, this.index);
this.sectionStart = -1;
this.sectionStart = this.index;
this.state = State.AfterAttributeName;

@@ -432,3 +439,4 @@ this.stateAfterAttributeName(c);

else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
this.sectionStart = -1;
this.state = State.BeforeAttributeName;

@@ -438,3 +446,3 @@ this.stateBeforeAttributeName(c);

else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.index);
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
this.state = State.InAttributeName;

@@ -466,3 +474,3 @@ this.sectionStart = this.index;

? QuoteType.Double
: QuoteType.Single, this.index);
: QuoteType.Single, this.index + 1);
this.state = State.BeforeAttributeName;

@@ -550,2 +558,15 @@ }

};
Tokenizer.prototype.stateBeforeSpecialT = function (c) {
var lower = c | 0x20;
if (lower === Sequences.TitleEnd[3]) {
this.startSpecial(Sequences.TitleEnd, 4);
}
else if (lower === Sequences.TextareaEnd[3]) {
this.startSpecial(Sequences.TextareaEnd, 4);
}
else {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
};
Tokenizer.prototype.startEntity = function () {

@@ -679,2 +700,6 @@ this.baseState = this.state;

}
case State.BeforeSpecialT: {
this.stateBeforeSpecialT(c);
break;
}
case State.InAttributeValueNq: {

@@ -681,0 +706,0 @@ this.stateInAttributeValueNoQuotes(c);

/// <reference types="node" />
/// <reference types="node" />
import { Handler, ParserOptions } from "./Parser.js";
import { type Handler, type ParserOptions } from "./Parser.js";
import { Writable } from "node:stream";

@@ -5,0 +5,0 @@ /**

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML parser",
"version": "9.0.0",
"version": "9.1.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -70,14 +70,14 @@ "funding": [

"devDependencies": {
"@types/jest": "^29.5.1",
"@types/node": "^20.1.1",
"@typescript-eslint/eslint-plugin": "^5.59.5",
"@typescript-eslint/parser": "^5.59.5",
"eslint": "^8.40.0",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-n": "^15.7.0",
"eslint-plugin-unicorn": "^47.0.0",
"jest": "^29.5.0",
"prettier": "^2.8.8",
"ts-jest": "^29.1.0",
"typescript": "^5.0.4"
"@types/jest": "^29.5.11",
"@types/node": "^20.10.6",
"@typescript-eslint/eslint-plugin": "^6.17.0",
"@typescript-eslint/parser": "^6.17.0",
"eslint": "^8.56.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-n": "^16.6.1",
"eslint-plugin-unicorn": "^50.0.1",
"jest": "^29.7.0",
"prettier": "^3.1.1",
"ts-jest": "^29.1.1",
"typescript": "^5.3.3"
},

@@ -84,0 +84,0 @@ "jest": {

@@ -72,3 +72,3 @@ # htmlparser2

parser.write(
"Xyz <script type='text/javascript'>const foo = '<<bar>>';</script>"
"Xyz <script type='text/javascript'>const foo = '<<bar>>';</script>",
);

@@ -75,0 +75,0 @@ parser.end();

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc