htmlparser2
Advanced tools
Comparing version 9.0.0 to 9.1.0
@@ -1,4 +0,5 @@ | ||
import { Parser, ParserOptions } from "./Parser.js"; | ||
export { Parser, type ParserOptions } from "./Parser.js"; | ||
import { DomHandlerOptions, ChildNode, Element, Document } from "domhandler"; | ||
import { Parser, type ParserOptions } from "./Parser.js"; | ||
export type { Handler, ParserOptions } from "./Parser.js"; | ||
export { Parser } from "./Parser.js"; | ||
import { type DomHandlerOptions, type ChildNode, type Element, type Document } from "domhandler"; | ||
export { DomHandler, DomHandler as DefaultHandler, type DomHandlerOptions, } from "domhandler"; | ||
@@ -41,3 +42,3 @@ export type Options = ParserOptions & DomHandlerOptions; | ||
export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser; | ||
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js"; | ||
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, QuoteType, } from "./Tokenizer.js"; | ||
export * as ElementType from "domelementtype"; | ||
@@ -44,0 +45,0 @@ import { type Feed } from "domutils"; |
@@ -55,3 +55,3 @@ import { Parser } from "./Parser.js"; | ||
} | ||
export { default as Tokenizer, } from "./Tokenizer.js"; | ||
export { default as Tokenizer, QuoteType, } from "./Tokenizer.js"; | ||
/* | ||
@@ -58,0 +58,0 @@ * All of the following exports exist for backwards-compatibility. |
@@ -1,2 +0,2 @@ | ||
import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js"; | ||
import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js"; | ||
export interface ParserOptions { | ||
@@ -100,2 +100,3 @@ /** | ||
private readonly lowerCaseAttributeNames; | ||
private readonly recognizeSelfClosing; | ||
/** We are parsing HTML. Inverse of the `xmlMode` option. */ | ||
@@ -102,0 +103,0 @@ private readonly htmlMode; |
@@ -101,3 +101,3 @@ import Tokenizer, { QuoteType } from "./Tokenizer.js"; | ||
constructor(cbs, options = {}) { | ||
var _a, _b, _c, _d, _e; | ||
var _a, _b, _c, _d, _e, _f; | ||
this.options = options; | ||
@@ -129,5 +129,7 @@ /** The start index of the last event. */ | ||
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode; | ||
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this); | ||
this.recognizeSelfClosing = | ||
(_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode; | ||
this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer)(this.options, this); | ||
this.foreignContext = [!this.htmlMode]; | ||
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this); | ||
(_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this); | ||
} | ||
@@ -251,3 +253,3 @@ // Tokenizer event handlers | ||
this.endIndex = endIndex; | ||
if (this.options.recognizeSelfClosing || this.foreignContext[0]) { | ||
if (this.recognizeSelfClosing || this.foreignContext[0]) { | ||
this.closeCurrentTag(false); | ||
@@ -254,0 +256,0 @@ // Set `startIndex` for next node |
@@ -107,2 +107,3 @@ export declare enum QuoteType { | ||
private stateBeforeSpecialS; | ||
private stateBeforeSpecialT; | ||
private startEntity; | ||
@@ -109,0 +110,0 @@ private stateInEntity; |
@@ -62,5 +62,6 @@ import { EntityDecoder, DecodingMode, htmlDecodeTree, xmlDecodeTree, } from "entities/lib/decode.js"; | ||
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS"; | ||
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence"; | ||
State[State["InSpecialTag"] = 24] = "InSpecialTag"; | ||
State[State["InEntity"] = 25] = "InEntity"; | ||
State[State["BeforeSpecialT"] = 23] = "BeforeSpecialT"; | ||
State[State["SpecialStartSequence"] = 24] = "SpecialStartSequence"; | ||
State[State["InSpecialTag"] = 25] = "InSpecialTag"; | ||
State[State["InEntity"] = 26] = "InEntity"; | ||
})(State || (State = {})); | ||
@@ -95,8 +96,11 @@ function isWhitespace(c) { | ||
const Sequences = { | ||
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), | ||
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), | ||
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), | ||
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), | ||
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), | ||
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[ | ||
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]> | ||
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->` | ||
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script` | ||
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style` | ||
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title` | ||
TextareaEnd: new Uint8Array([ | ||
0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, | ||
]), // `</textarea` | ||
}; | ||
@@ -324,10 +328,13 @@ export default class Tokenizer { | ||
this.sectionStart = this.index; | ||
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { | ||
this.startSpecial(Sequences.TitleEnd, 3); | ||
if (this.xmlMode) { | ||
this.state = State.InTagName; | ||
} | ||
else if (lower === Sequences.ScriptEnd[2]) { | ||
this.state = State.BeforeSpecialS; | ||
} | ||
else if (lower === Sequences.TitleEnd[2]) { | ||
this.state = State.BeforeSpecialT; | ||
} | ||
else { | ||
this.state = | ||
!this.xmlMode && lower === Sequences.ScriptEnd[2] | ||
? State.BeforeSpecialS | ||
: State.InTagName; | ||
this.state = State.InTagName; | ||
} | ||
@@ -415,3 +422,3 @@ } | ||
this.cbs.onattribname(this.sectionStart, this.index); | ||
this.sectionStart = -1; | ||
this.sectionStart = this.index; | ||
this.state = State.AfterAttributeName; | ||
@@ -426,3 +433,4 @@ this.stateAfterAttributeName(c); | ||
else if (c === CharCodes.Slash || c === CharCodes.Gt) { | ||
this.cbs.onattribend(QuoteType.NoValue, this.index); | ||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart); | ||
this.sectionStart = -1; | ||
this.state = State.BeforeAttributeName; | ||
@@ -432,3 +440,3 @@ this.stateBeforeAttributeName(c); | ||
else if (!isWhitespace(c)) { | ||
this.cbs.onattribend(QuoteType.NoValue, this.index); | ||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart); | ||
this.state = State.InAttributeName; | ||
@@ -460,3 +468,3 @@ this.sectionStart = this.index; | ||
? QuoteType.Double | ||
: QuoteType.Single, this.index); | ||
: QuoteType.Single, this.index + 1); | ||
this.state = State.BeforeAttributeName; | ||
@@ -544,2 +552,15 @@ } | ||
} | ||
stateBeforeSpecialT(c) { | ||
const lower = c | 0x20; | ||
if (lower === Sequences.TitleEnd[3]) { | ||
this.startSpecial(Sequences.TitleEnd, 4); | ||
} | ||
else if (lower === Sequences.TextareaEnd[3]) { | ||
this.startSpecial(Sequences.TextareaEnd, 4); | ||
} | ||
else { | ||
this.state = State.InTagName; | ||
this.stateInTagName(c); // Consume the token again | ||
} | ||
} | ||
startEntity() { | ||
@@ -673,2 +694,6 @@ this.baseState = this.state; | ||
} | ||
case State.BeforeSpecialT: { | ||
this.stateBeforeSpecialT(c); | ||
break; | ||
} | ||
case State.InAttributeValueNq: { | ||
@@ -675,0 +700,0 @@ this.stateInAttributeValueNoQuotes(c); |
/// <reference types="node" /> | ||
/// <reference types="node" /> | ||
import { Handler, ParserOptions } from "./Parser.js"; | ||
import { type Handler, type ParserOptions } from "./Parser.js"; | ||
import { Writable } from "node:stream"; | ||
@@ -5,0 +5,0 @@ /** |
@@ -1,4 +0,5 @@ | ||
import { Parser, ParserOptions } from "./Parser.js"; | ||
export { Parser, type ParserOptions } from "./Parser.js"; | ||
import { DomHandlerOptions, ChildNode, Element, Document } from "domhandler"; | ||
import { Parser, type ParserOptions } from "./Parser.js"; | ||
export type { Handler, ParserOptions } from "./Parser.js"; | ||
export { Parser } from "./Parser.js"; | ||
import { type DomHandlerOptions, type ChildNode, type Element, type Document } from "domhandler"; | ||
export { DomHandler, DomHandler as DefaultHandler, type DomHandlerOptions, } from "domhandler"; | ||
@@ -41,3 +42,3 @@ export type Options = ParserOptions & DomHandlerOptions; | ||
export declare function createDomStream(callback: (error: Error | null, dom: ChildNode[]) => void, options?: Options, elementCallback?: (element: Element) => void): Parser; | ||
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, } from "./Tokenizer.js"; | ||
export { default as Tokenizer, type Callbacks as TokenizerCallbacks, QuoteType, } from "./Tokenizer.js"; | ||
export * as ElementType from "domelementtype"; | ||
@@ -44,0 +45,0 @@ import { type Feed } from "domutils"; |
@@ -29,3 +29,3 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0; | ||
exports.DomUtils = exports.parseFeed = exports.getFeed = exports.ElementType = exports.QuoteType = exports.Tokenizer = exports.createDomStream = exports.createDocumentStream = exports.parseDOM = exports.parseDocument = exports.DefaultHandler = exports.DomHandler = exports.Parser = void 0; | ||
var Parser_js_1 = require("./Parser.js"); | ||
@@ -93,2 +93,3 @@ var Parser_js_2 = require("./Parser.js"); | ||
Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_js_1).default; } }); | ||
Object.defineProperty(exports, "QuoteType", { enumerable: true, get: function () { return Tokenizer_js_1.QuoteType; } }); | ||
/* | ||
@@ -95,0 +96,0 @@ * All of the following exports exist for backwards-compatibility. |
@@ -1,2 +0,2 @@ | ||
import Tokenizer, { Callbacks, QuoteType } from "./Tokenizer.js"; | ||
import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js"; | ||
export interface ParserOptions { | ||
@@ -100,2 +100,3 @@ /** | ||
private readonly lowerCaseAttributeNames; | ||
private readonly recognizeSelfClosing; | ||
/** We are parsing HTML. Inverse of the `xmlMode` option. */ | ||
@@ -102,0 +103,0 @@ private readonly htmlMode; |
@@ -128,3 +128,3 @@ "use strict"; | ||
if (options === void 0) { options = {}; } | ||
var _a, _b, _c, _d, _e; | ||
var _a, _b, _c, _d, _e, _f; | ||
this.options = options; | ||
@@ -156,5 +156,7 @@ /** The start index of the last event. */ | ||
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode; | ||
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_js_1.default)(this.options, this); | ||
this.recognizeSelfClosing = | ||
(_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode; | ||
this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer_js_1.default)(this.options, this); | ||
this.foreignContext = [!this.htmlMode]; | ||
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this); | ||
(_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this); | ||
} | ||
@@ -278,3 +280,3 @@ // Tokenizer event handlers | ||
this.endIndex = endIndex; | ||
if (this.options.recognizeSelfClosing || this.foreignContext[0]) { | ||
if (this.recognizeSelfClosing || this.foreignContext[0]) { | ||
this.closeCurrentTag(false); | ||
@@ -281,0 +283,0 @@ // Set `startIndex` for next node |
@@ -107,2 +107,3 @@ export declare enum QuoteType { | ||
private stateBeforeSpecialS; | ||
private stateBeforeSpecialT; | ||
private startEntity; | ||
@@ -109,0 +110,0 @@ private stateInEntity; |
@@ -65,5 +65,6 @@ "use strict"; | ||
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS"; | ||
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence"; | ||
State[State["InSpecialTag"] = 24] = "InSpecialTag"; | ||
State[State["InEntity"] = 25] = "InEntity"; | ||
State[State["BeforeSpecialT"] = 23] = "BeforeSpecialT"; | ||
State[State["SpecialStartSequence"] = 24] = "SpecialStartSequence"; | ||
State[State["InSpecialTag"] = 25] = "InSpecialTag"; | ||
State[State["InEntity"] = 26] = "InEntity"; | ||
})(State || (State = {})); | ||
@@ -90,3 +91,3 @@ function isWhitespace(c) { | ||
QuoteType[QuoteType["Double"] = 3] = "Double"; | ||
})(QuoteType = exports.QuoteType || (exports.QuoteType = {})); | ||
})(QuoteType || (exports.QuoteType = QuoteType = {})); | ||
/** | ||
@@ -99,8 +100,11 @@ * Sequences used to match longer strings. | ||
var Sequences = { | ||
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), | ||
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), | ||
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), | ||
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), | ||
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), | ||
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[ | ||
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]> | ||
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->` | ||
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script` | ||
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style` | ||
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title` | ||
TextareaEnd: new Uint8Array([ | ||
0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, | ||
]), // `</textarea` | ||
}; | ||
@@ -330,10 +334,13 @@ var Tokenizer = /** @class */ (function () { | ||
this.sectionStart = this.index; | ||
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { | ||
this.startSpecial(Sequences.TitleEnd, 3); | ||
if (this.xmlMode) { | ||
this.state = State.InTagName; | ||
} | ||
else if (lower === Sequences.ScriptEnd[2]) { | ||
this.state = State.BeforeSpecialS; | ||
} | ||
else if (lower === Sequences.TitleEnd[2]) { | ||
this.state = State.BeforeSpecialT; | ||
} | ||
else { | ||
this.state = | ||
!this.xmlMode && lower === Sequences.ScriptEnd[2] | ||
? State.BeforeSpecialS | ||
: State.InTagName; | ||
this.state = State.InTagName; | ||
} | ||
@@ -421,3 +428,3 @@ } | ||
this.cbs.onattribname(this.sectionStart, this.index); | ||
this.sectionStart = -1; | ||
this.sectionStart = this.index; | ||
this.state = State.AfterAttributeName; | ||
@@ -432,3 +439,4 @@ this.stateAfterAttributeName(c); | ||
else if (c === CharCodes.Slash || c === CharCodes.Gt) { | ||
this.cbs.onattribend(QuoteType.NoValue, this.index); | ||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart); | ||
this.sectionStart = -1; | ||
this.state = State.BeforeAttributeName; | ||
@@ -438,3 +446,3 @@ this.stateBeforeAttributeName(c); | ||
else if (!isWhitespace(c)) { | ||
this.cbs.onattribend(QuoteType.NoValue, this.index); | ||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart); | ||
this.state = State.InAttributeName; | ||
@@ -466,3 +474,3 @@ this.sectionStart = this.index; | ||
? QuoteType.Double | ||
: QuoteType.Single, this.index); | ||
: QuoteType.Single, this.index + 1); | ||
this.state = State.BeforeAttributeName; | ||
@@ -550,2 +558,15 @@ } | ||
}; | ||
Tokenizer.prototype.stateBeforeSpecialT = function (c) { | ||
var lower = c | 0x20; | ||
if (lower === Sequences.TitleEnd[3]) { | ||
this.startSpecial(Sequences.TitleEnd, 4); | ||
} | ||
else if (lower === Sequences.TextareaEnd[3]) { | ||
this.startSpecial(Sequences.TextareaEnd, 4); | ||
} | ||
else { | ||
this.state = State.InTagName; | ||
this.stateInTagName(c); // Consume the token again | ||
} | ||
}; | ||
Tokenizer.prototype.startEntity = function () { | ||
@@ -679,2 +700,6 @@ this.baseState = this.state; | ||
} | ||
case State.BeforeSpecialT: { | ||
this.stateBeforeSpecialT(c); | ||
break; | ||
} | ||
case State.InAttributeValueNq: { | ||
@@ -681,0 +706,0 @@ this.stateInAttributeValueNoQuotes(c); |
/// <reference types="node" /> | ||
/// <reference types="node" /> | ||
import { Handler, ParserOptions } from "./Parser.js"; | ||
import { type Handler, type ParserOptions } from "./Parser.js"; | ||
import { Writable } from "node:stream"; | ||
@@ -5,0 +5,0 @@ /** |
{ | ||
"name": "htmlparser2", | ||
"description": "Fast & forgiving HTML/XML parser", | ||
"version": "9.0.0", | ||
"version": "9.1.0", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
@@ -70,14 +70,14 @@ "funding": [ | ||
"devDependencies": { | ||
"@types/jest": "^29.5.1", | ||
"@types/node": "^20.1.1", | ||
"@typescript-eslint/eslint-plugin": "^5.59.5", | ||
"@typescript-eslint/parser": "^5.59.5", | ||
"eslint": "^8.40.0", | ||
"eslint-config-prettier": "^8.8.0", | ||
"eslint-plugin-n": "^15.7.0", | ||
"eslint-plugin-unicorn": "^47.0.0", | ||
"jest": "^29.5.0", | ||
"prettier": "^2.8.8", | ||
"ts-jest": "^29.1.0", | ||
"typescript": "^5.0.4" | ||
"@types/jest": "^29.5.11", | ||
"@types/node": "^20.10.6", | ||
"@typescript-eslint/eslint-plugin": "^6.17.0", | ||
"@typescript-eslint/parser": "^6.17.0", | ||
"eslint": "^8.56.0", | ||
"eslint-config-prettier": "^9.1.0", | ||
"eslint-plugin-n": "^16.6.1", | ||
"eslint-plugin-unicorn": "^50.0.1", | ||
"jest": "^29.7.0", | ||
"prettier": "^3.1.1", | ||
"ts-jest": "^29.1.1", | ||
"typescript": "^5.3.3" | ||
}, | ||
@@ -84,0 +84,0 @@ "jest": { |
@@ -72,3 +72,3 @@ # htmlparser2 | ||
parser.write( | ||
"Xyz <script type='text/javascript'>const foo = '<<bar>>';</script>" | ||
"Xyz <script type='text/javascript'>const foo = '<<bar>>';</script>", | ||
); | ||
@@ -75,0 +75,0 @@ parser.end(); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
253719
3633