Comparing version 4.4.0 to 4.5.0
@@ -0,4 +1,19 @@ | ||
/** | ||
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point. | ||
*/ | ||
export declare const fromCodePoint: (...codePoints: number[]) => string; | ||
/** | ||
* Replace the given code point with a replacement character if it is a | ||
* surrogate or is outside the valid range. Otherwise return the code | ||
* point unchanged. | ||
*/ | ||
export declare function replaceCodePoint(codePoint: number): number; | ||
/** | ||
* Replace the code point if relevant, then convert it to a string. | ||
* | ||
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead. | ||
* @param codePoint The code point to decode. | ||
* @returns The decoded code point. | ||
*/ | ||
export default function decodeCodePoint(codePoint: number): string; | ||
//# sourceMappingURL=decode_codepoint.d.ts.map |
@@ -8,2 +8,3 @@ "use strict"; | ||
[0, 65533], | ||
// C1 Unicode control character reference replacements | ||
[128, 8364], | ||
@@ -37,2 +38,5 @@ [130, 8218], | ||
]); | ||
/** | ||
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point. | ||
*/ | ||
exports.fromCodePoint = | ||
@@ -50,2 +54,7 @@ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins | ||
}; | ||
/** | ||
* Replace the given code point with a replacement character if it is a | ||
* surrogate or is outside the valid range. Otherwise return the code | ||
* point unchanged. | ||
*/ | ||
function replaceCodePoint(codePoint) { | ||
@@ -59,2 +68,9 @@ var _a; | ||
exports.replaceCodePoint = replaceCodePoint; | ||
/** | ||
* Replace the code point if relevant, then convert it to a string. | ||
* | ||
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead. | ||
* @param codePoint The code point to decode. | ||
* @returns The decoded code point. | ||
*/ | ||
function decodeCodePoint(codePoint) { | ||
@@ -61,0 +77,0 @@ return (0, exports.fromCodePoint)(replaceCodePoint(codePoint)); |
@@ -11,12 +11,183 @@ import htmlDecodeTree from "./generated/decode-data-html.js"; | ||
} | ||
export declare enum DecodingMode { | ||
/** Entities in text nodes that can end with any character. */ | ||
Legacy = 0, | ||
/** Only allow entities terminated with a semicolon. */ | ||
Strict = 1, | ||
/** Entities in attributes have limitations on ending characters. */ | ||
Attribute = 2 | ||
} | ||
/** | ||
* Producers for character reference errors as defined in the HTML spec. | ||
*/ | ||
export interface EntityErrorProducer { | ||
missingSemicolonAfterCharacterReference(): void; | ||
absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void; | ||
validateNumericCharacterReference(code: number): void; | ||
} | ||
/** | ||
* Token decoder with support of writing partial entities. | ||
*/ | ||
export declare class EntityDecoder { | ||
/** The tree used to decode entities. */ | ||
private readonly decodeTree; | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
private readonly emitCodePoint; | ||
/** An object that is used to produce errors. */ | ||
private readonly errors?; | ||
constructor( | ||
/** The tree used to decode entities. */ | ||
decodeTree: Uint16Array, | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
emitCodePoint: (cp: number, consumed: number) => void, | ||
/** An object that is used to produce errors. */ | ||
errors?: EntityErrorProducer | undefined); | ||
/** The current state of the decoder. */ | ||
private state; | ||
/** Characters that were consumed while parsing an entity. */ | ||
private consumed; | ||
/** | ||
* The result of the entity. | ||
* | ||
* Either the result index of a numeric entity, or the codepoint of a | ||
* numeric entity. | ||
*/ | ||
private result; | ||
/** The current index in the decode tree. */ | ||
private treeIndex; | ||
/** The number of characters that were consumed in excess. */ | ||
private excess; | ||
/** The mode in which the decoder is operating. */ | ||
private decodeMode; | ||
/** Resets the instance to make it reusable. */ | ||
startEntity(decodeMode: DecodingMode): void; | ||
/** | ||
* Write an entity to the decoder. This can be called multiple times with partial entities. | ||
* If the entity is incomplete, the decoder will return -1. | ||
* | ||
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the | ||
* entity is incomplete, and resume when the next string is written. | ||
* | ||
* @param string The string containing the entity (or a continuation of the entity). | ||
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
write(str: string, offset: number): number; | ||
/** | ||
* Switches between the numeric decimal and hexadecimal states. | ||
* | ||
* Equivalent to the `Numeric character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericStart; | ||
private addToNumericResult; | ||
/** | ||
* Parses a hexadecimal numeric entity. | ||
* | ||
* Equivalent to the `Hexademical character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericHex; | ||
/** | ||
* Parses a decimal numeric entity. | ||
* | ||
* Equivalent to the `Decimal character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericDecimal; | ||
/** | ||
* Validate and emit a numeric entity. | ||
* | ||
* Implements the logic from the `Hexademical character reference start | ||
* state` and `Numeric character reference end state` in the HTML spec. | ||
* | ||
* @param lastCp The last code point of the entity. Used to see if the | ||
* entity was terminated with a semicolon. | ||
* @param expectedLength The minimum number of characters that should be | ||
* consumed. Used to validate that at least one digit | ||
* was consumed. | ||
* @returns The number of characters that were consumed. | ||
*/ | ||
private emitNumericEntity; | ||
/** | ||
* Parses a named entity. | ||
* | ||
* Equivalent to the `Named character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNamedEntity; | ||
/** | ||
* Emit a named entity that was not terminated with a semicolon. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
private emitNotTerminatedNamedEntity; | ||
/** | ||
* Emit a named entity. | ||
* | ||
* @param result The index of the entity in the decode tree. | ||
* @param valueLength The number of bytes in the entity. | ||
* @param consumed The number of characters consumed. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
private emitNamedEntityData; | ||
/** | ||
* Signal to the parser that the end of the input was reached. | ||
* | ||
* Remaining data will be emitted and relevant errors will be produced. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
end(): number; | ||
} | ||
/** | ||
* Determines the branch of the current node that is taken given the current | ||
* character. This function is used to traverse the trie. | ||
* | ||
* @param decodeTree The trie. | ||
* @param current The current node. | ||
* @param nodeIdx The index right after the current node and its value. | ||
* @param char The current character. | ||
* @returns The index of the next node, or -1 if no branch is taken. | ||
*/ | ||
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number; | ||
/** | ||
* Decodes an HTML string, allowing for entities not terminated by a semi-colon. | ||
* Decodes an HTML string. | ||
* | ||
* @param str The string to decode. | ||
* @param mode The decoding mode. | ||
* @returns The decoded string. | ||
*/ | ||
export declare function decodeHTML(str: string): string; | ||
export declare function decodeHTML(str: string, mode?: DecodingMode): string; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an HTML string in an attribute. | ||
* | ||
@@ -26,5 +197,12 @@ * @param str The string to decode. | ||
*/ | ||
export declare function decodeHTMLAttribute(str: string): string; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
* @param str The string to decode. | ||
* @returns The decoded string. | ||
*/ | ||
export declare function decodeHTMLStrict(str: string): string; | ||
/** | ||
* Decodes an XML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an XML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
@@ -31,0 +209,0 @@ * @param str The string to decode. |
"use strict"; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
var desc = Object.getOwnPropertyDescriptor(m, k); | ||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { | ||
desc = { enumerable: true, get: function() { return m[k]; } }; | ||
} | ||
Object.defineProperty(o, k2, desc); | ||
}) : (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
o[k2] = m[k]; | ||
})); | ||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { | ||
Object.defineProperty(o, "default", { enumerable: true, value: v }); | ||
}) : function(o, v) { | ||
o["default"] = v; | ||
}); | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); | ||
__setModuleDefault(result, mod); | ||
return result; | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
@@ -6,3 +29,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.BinTrieFlags = exports.fromCodePoint = exports.replaceCodePoint = exports.decodeCodePoint = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0; | ||
exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTMLAttribute = exports.decodeHTML = exports.determineBranch = exports.EntityDecoder = exports.DecodingMode = exports.BinTrieFlags = exports.fromCodePoint = exports.replaceCodePoint = exports.decodeCodePoint = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0; | ||
var decode_data_html_js_1 = __importDefault(require("./generated/decode-data-html.js")); | ||
@@ -12,3 +35,3 @@ exports.htmlDecodeTree = decode_data_html_js_1.default; | ||
exports.xmlDecodeTree = decode_data_xml_js_1.default; | ||
var decode_codepoint_js_1 = __importDefault(require("./decode_codepoint.js")); | ||
var decode_codepoint_js_1 = __importStar(require("./decode_codepoint.js")); | ||
exports.decodeCodePoint = decode_codepoint_js_1.default; | ||
@@ -22,2 +45,3 @@ var decode_codepoint_js_2 = require("./decode_codepoint.js"); | ||
CharCodes[CharCodes["SEMI"] = 59] = "SEMI"; | ||
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS"; | ||
CharCodes[CharCodes["ZERO"] = 48] = "ZERO"; | ||
@@ -28,5 +52,9 @@ CharCodes[CharCodes["NINE"] = 57] = "NINE"; | ||
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X"; | ||
/** Bit that needs to be set to convert an upper case ASCII character to lower case */ | ||
CharCodes[CharCodes["To_LOWER_BIT"] = 32] = "To_LOWER_BIT"; | ||
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z"; | ||
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A"; | ||
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F"; | ||
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z"; | ||
})(CharCodes || (CharCodes = {})); | ||
/** Bit that needs to be set to convert an upper case ASCII character to lower case */ | ||
var TO_LOWER_BIT = 32; | ||
var BinTrieFlags; | ||
@@ -38,81 +66,398 @@ (function (BinTrieFlags) { | ||
})(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {})); | ||
function getDecoder(decodeTree) { | ||
return function decodeHTMLBinary(str, strict) { | ||
var ret = ""; | ||
var lastIdx = 0; | ||
var strIdx = 0; | ||
while ((strIdx = str.indexOf("&", strIdx)) >= 0) { | ||
ret += str.slice(lastIdx, strIdx); | ||
lastIdx = strIdx; | ||
// Skip the "&" | ||
strIdx += 1; | ||
// If we have a numeric entity, handle this separately. | ||
if (str.charCodeAt(strIdx) === CharCodes.NUM) { | ||
// Skip the leading "&#". For hex entities, also skip the leading "x". | ||
var start = strIdx + 1; | ||
var base = 10; | ||
var cp = str.charCodeAt(start); | ||
if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) { | ||
base = 16; | ||
strIdx += 1; | ||
start += 1; | ||
function isNumber(code) { | ||
return code >= CharCodes.ZERO && code <= CharCodes.NINE; | ||
} | ||
function isHexadecimalCharacter(code) { | ||
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) || | ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F)); | ||
} | ||
function isAsciiAlphaNumeric(code) { | ||
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) || | ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) || | ||
isNumber(code)); | ||
} | ||
/** | ||
* Checks if the given character is a valid end character for an entity in an attribute. | ||
* | ||
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. | ||
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state | ||
*/ | ||
function isEntityInAttributeInvalidEnd(code) { | ||
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code); | ||
} | ||
var EntityDecoderState; | ||
(function (EntityDecoderState) { | ||
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart"; | ||
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart"; | ||
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal"; | ||
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex"; | ||
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity"; | ||
})(EntityDecoderState || (EntityDecoderState = {})); | ||
var DecodingMode; | ||
(function (DecodingMode) { | ||
/** Entities in text nodes that can end with any character. */ | ||
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; | ||
/** Only allow entities terminated with a semicolon. */ | ||
DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; | ||
/** Entities in attributes have limitations on ending characters. */ | ||
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute"; | ||
})(DecodingMode = exports.DecodingMode || (exports.DecodingMode = {})); | ||
/** | ||
* Token decoder with support of writing partial entities. | ||
*/ | ||
var EntityDecoder = /** @class */ (function () { | ||
function EntityDecoder( | ||
/** The tree used to decode entities. */ | ||
decodeTree, | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
emitCodePoint, | ||
/** An object that is used to produce errors. */ | ||
errors) { | ||
this.decodeTree = decodeTree; | ||
this.emitCodePoint = emitCodePoint; | ||
this.errors = errors; | ||
/** The current state of the decoder. */ | ||
this.state = EntityDecoderState.EntityStart; | ||
/** Characters that were consumed while parsing an entity. */ | ||
this.consumed = 1; | ||
/** | ||
* The result of the entity. | ||
* | ||
* Either the result index of a numeric entity, or the codepoint of a | ||
* numeric entity. | ||
*/ | ||
this.result = 0; | ||
/** The current index in the decode tree. */ | ||
this.treeIndex = 0; | ||
/** The number of characters that were consumed in excess. */ | ||
this.excess = 1; | ||
/** The mode in which the decoder is operating. */ | ||
this.decodeMode = DecodingMode.Strict; | ||
} | ||
/** Resets the instance to make it reusable. */ | ||
EntityDecoder.prototype.startEntity = function (decodeMode) { | ||
this.decodeMode = decodeMode; | ||
this.state = EntityDecoderState.EntityStart; | ||
this.result = 0; | ||
this.treeIndex = 0; | ||
this.excess = 1; | ||
this.consumed = 1; | ||
}; | ||
/** | ||
* Write an entity to the decoder. This can be called multiple times with partial entities. | ||
* If the entity is incomplete, the decoder will return -1. | ||
* | ||
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the | ||
* entity is incomplete, and resume when the next string is written. | ||
* | ||
* @param string The string containing the entity (or a continuation of the entity). | ||
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
EntityDecoder.prototype.write = function (str, offset) { | ||
switch (this.state) { | ||
case EntityDecoderState.EntityStart: { | ||
if (str.charCodeAt(offset) === CharCodes.NUM) { | ||
this.state = EntityDecoderState.NumericStart; | ||
this.consumed += 1; | ||
return this.stateNumericStart(str, offset + 1); | ||
} | ||
do | ||
cp = str.charCodeAt(++strIdx); | ||
while ((cp >= CharCodes.ZERO && cp <= CharCodes.NINE) || | ||
(base === 16 && | ||
(cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A && | ||
(cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F)); | ||
if (start !== strIdx) { | ||
var entity = str.substring(start, strIdx); | ||
var parsed = parseInt(entity, base); | ||
if (str.charCodeAt(strIdx) === CharCodes.SEMI) { | ||
strIdx += 1; | ||
} | ||
else if (strict) { | ||
continue; | ||
} | ||
ret += (0, decode_codepoint_js_1.default)(parsed); | ||
lastIdx = strIdx; | ||
} | ||
continue; | ||
this.state = EntityDecoderState.NamedEntity; | ||
return this.stateNamedEntity(str, offset); | ||
} | ||
var resultIdx = 0; | ||
var excess = 1; | ||
var treeIdx = 0; | ||
var current = decodeTree[treeIdx]; | ||
for (; strIdx < str.length; strIdx++, excess++) { | ||
treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); | ||
if (treeIdx < 0) | ||
break; | ||
current = decodeTree[treeIdx]; | ||
var masked = current & BinTrieFlags.VALUE_LENGTH; | ||
// If the branch is a value, store it and continue | ||
if (masked) { | ||
// If we have a legacy entity while parsing strictly, just skip the number of bytes | ||
if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) { | ||
resultIdx = treeIdx; | ||
excess = 0; | ||
} | ||
// The mask is the number of bytes of the value, including the current byte. | ||
var valueLength = (masked >> 14) - 1; | ||
if (valueLength === 0) | ||
break; | ||
treeIdx += valueLength; | ||
case EntityDecoderState.NumericStart: { | ||
return this.stateNumericStart(str, offset); | ||
} | ||
case EntityDecoderState.NumericDecimal: { | ||
return this.stateNumericDecimal(str, offset); | ||
} | ||
case EntityDecoderState.NumericHex: { | ||
return this.stateNumericHex(str, offset); | ||
} | ||
case EntityDecoderState.NamedEntity: { | ||
return this.stateNamedEntity(str, offset); | ||
} | ||
} | ||
}; | ||
/** | ||
* Switches between the numeric decimal and hexadecimal states. | ||
* | ||
* Equivalent to the `Numeric character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
EntityDecoder.prototype.stateNumericStart = function (str, offset) { | ||
if (offset >= str.length) { | ||
return -1; | ||
} | ||
if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) { | ||
this.state = EntityDecoderState.NumericHex; | ||
this.consumed += 1; | ||
return this.stateNumericHex(str, offset + 1); | ||
} | ||
this.state = EntityDecoderState.NumericDecimal; | ||
return this.stateNumericDecimal(str, offset); | ||
}; | ||
EntityDecoder.prototype.addToNumericResult = function (str, start, end, base) { | ||
if (start !== end) { | ||
var digitCount = end - start; | ||
this.result = | ||
this.result * Math.pow(base, digitCount) + | ||
parseInt(str.substr(start, digitCount), base); | ||
this.consumed += digitCount; | ||
} | ||
}; | ||
/** | ||
* Parses a hexadecimal numeric entity. | ||
* | ||
* Equivalent to the `Hexademical character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
EntityDecoder.prototype.stateNumericHex = function (str, offset) { | ||
var startIdx = offset; | ||
while (offset < str.length) { | ||
var char = str.charCodeAt(offset); | ||
if (isNumber(char) || isHexadecimalCharacter(char)) { | ||
offset += 1; | ||
} | ||
else { | ||
this.addToNumericResult(str, startIdx, offset, 16); | ||
return this.emitNumericEntity(char, 3); | ||
} | ||
} | ||
this.addToNumericResult(str, startIdx, offset, 16); | ||
return -1; | ||
}; | ||
/** | ||
* Parses a decimal numeric entity. | ||
* | ||
* Equivalent to the `Decimal character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
EntityDecoder.prototype.stateNumericDecimal = function (str, offset) { | ||
var startIdx = offset; | ||
while (offset < str.length) { | ||
var char = str.charCodeAt(offset); | ||
if (isNumber(char)) { | ||
offset += 1; | ||
} | ||
else { | ||
this.addToNumericResult(str, startIdx, offset, 10); | ||
return this.emitNumericEntity(char, 2); | ||
} | ||
} | ||
this.addToNumericResult(str, startIdx, offset, 10); | ||
return -1; | ||
}; | ||
/** | ||
* Validate and emit a numeric entity. | ||
* | ||
* Implements the logic from the `Hexademical character reference start | ||
* state` and `Numeric character reference end state` in the HTML spec. | ||
* | ||
* @param lastCp The last code point of the entity. Used to see if the | ||
* entity was terminated with a semicolon. | ||
* @param expectedLength The minimum number of characters that should be | ||
* consumed. Used to validate that at least one digit | ||
* was consumed. | ||
* @returns The number of characters that were consumed. | ||
*/ | ||
EntityDecoder.prototype.emitNumericEntity = function (lastCp, expectedLength) { | ||
var _a; | ||
// Ensure we consumed at least one digit. | ||
if (this.consumed <= expectedLength) { | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); | ||
return 0; | ||
} | ||
// Figure out if this is a legit end of the entity | ||
if (lastCp === CharCodes.SEMI) { | ||
this.consumed += 1; | ||
} | ||
else if (this.decodeMode === DecodingMode.Strict) { | ||
return 0; | ||
} | ||
this.emitCodePoint((0, decode_codepoint_js_1.replaceCodePoint)(this.result), this.consumed); | ||
if (this.errors) { | ||
if (lastCp !== CharCodes.SEMI) { | ||
this.errors.missingSemicolonAfterCharacterReference(); | ||
} | ||
this.errors.validateNumericCharacterReference(this.result); | ||
} | ||
return this.consumed; | ||
}; | ||
/** | ||
* Parses a named entity. | ||
* | ||
* Equivalent to the `Named character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
EntityDecoder.prototype.stateNamedEntity = function (str, offset) { | ||
var decodeTree = this.decodeTree; | ||
var current = decodeTree[this.treeIndex]; | ||
// The mask is the number of bytes of the value, including the current byte. | ||
var valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
for (; offset < str.length; offset++, this.excess++) { | ||
var char = str.charCodeAt(offset); | ||
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char); | ||
if (this.treeIndex < 0) { | ||
return this.result === 0 || | ||
// If we are parsing an attribute | ||
(this.decodeMode === DecodingMode.Attribute && | ||
// We shouldn't have consumed any characters after the entity, | ||
(valueLength === 0 || | ||
// And there should be no invalid characters. | ||
isEntityInAttributeInvalidEnd(char))) | ||
? 0 | ||
: this.emitNotTerminatedNamedEntity(); | ||
} | ||
current = decodeTree[this.treeIndex]; | ||
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
// If the branch is a value, store it and continue | ||
if (valueLength !== 0) { | ||
// If the entity is terminated by a semicolon, we are done. | ||
if (char === CharCodes.SEMI) { | ||
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess); | ||
} | ||
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it. | ||
if (this.decodeMode !== DecodingMode.Strict) { | ||
this.result = this.treeIndex; | ||
this.consumed += this.excess; | ||
this.excess = 0; | ||
} | ||
} | ||
if (resultIdx !== 0) { | ||
var valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
ret += | ||
valueLength === 1 | ||
? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH) | ||
: valueLength === 2 | ||
? String.fromCharCode(decodeTree[resultIdx + 1]) | ||
: String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]); | ||
lastIdx = strIdx - excess + 1; | ||
} | ||
return -1; | ||
}; | ||
/** | ||
* Emit a named entity that was not terminated with a semicolon. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
EntityDecoder.prototype.emitNotTerminatedNamedEntity = function () { | ||
var _a; | ||
var _b = this, result = _b.result, decodeTree = _b.decodeTree; | ||
var valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
this.emitNamedEntityData(result, valueLength, this.consumed); | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference(); | ||
return this.consumed; | ||
}; | ||
/** | ||
* Emit a named entity. | ||
* | ||
* @param result The index of the entity in the decode tree. | ||
* @param valueLength The number of bytes in the entity. | ||
* @param consumed The number of characters consumed. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
EntityDecoder.prototype.emitNamedEntityData = function (result, valueLength, consumed) { | ||
var decodeTree = this.decodeTree; | ||
this.emitCodePoint(valueLength === 1 | ||
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH | ||
: decodeTree[result + 1], consumed); | ||
if (valueLength === 3) { | ||
// For multi-byte values, we need to emit the second byte. | ||
this.emitCodePoint(decodeTree[result + 2], consumed); | ||
} | ||
return consumed; | ||
}; | ||
/** | ||
* Signal to the parser that the end of the input was reached. | ||
* | ||
* Remaining data will be emitted and relevant errors will be produced. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
EntityDecoder.prototype.end = function () { | ||
var _a; | ||
switch (this.state) { | ||
case EntityDecoderState.NamedEntity: { | ||
// Emit a named entity if we have one. | ||
return this.result !== 0 && | ||
(this.decodeMode !== DecodingMode.Attribute || | ||
this.result === this.treeIndex) | ||
? this.emitNotTerminatedNamedEntity() | ||
: 0; | ||
} | ||
// Otherwise, emit a numeric entity if we have one. | ||
case EntityDecoderState.NumericDecimal: { | ||
return this.emitNumericEntity(0, 2); | ||
} | ||
case EntityDecoderState.NumericHex: { | ||
return this.emitNumericEntity(0, 3); | ||
} | ||
case EntityDecoderState.NumericStart: { | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); | ||
return 0; | ||
} | ||
case EntityDecoderState.EntityStart: { | ||
// Return 0 if we have no entity. | ||
return 0; | ||
} | ||
} | ||
return ret + str.slice(lastIdx); | ||
}; | ||
return EntityDecoder; | ||
}()); | ||
exports.EntityDecoder = EntityDecoder; | ||
/** | ||
* Creates a function that decodes entities in a string. | ||
* | ||
* @param decodeTree The decode tree. | ||
* @returns A function that decodes entities in a string. | ||
*/ | ||
function getDecoder(decodeTree) { | ||
var ret = ""; | ||
var decoder = new EntityDecoder(decodeTree, function (str) { return (ret += (0, decode_codepoint_js_1.fromCodePoint)(str)); }); | ||
return function decodeWithTrie(str, decodeMode) { | ||
var lastIndex = 0; | ||
var offset = 0; | ||
while ((offset = str.indexOf("&", offset)) >= 0) { | ||
ret += str.slice(lastIndex, offset); | ||
decoder.startEntity(decodeMode); | ||
var len = decoder.write(str, | ||
// Skip the "&" | ||
offset + 1); | ||
if (len < 0) { | ||
lastIndex = offset + decoder.end(); | ||
break; | ||
} | ||
lastIndex = offset + len; | ||
// If `len` is 0, skip the current `&` and continue. | ||
offset = len === 0 ? lastIndex + 1 : lastIndex; | ||
} | ||
var result = ret + str.slice(lastIndex); | ||
// Make sure we don't keep a reference to the final string. | ||
ret = ""; | ||
return result; | ||
}; | ||
} | ||
/** | ||
* Determines the branch of the current node that is taken given the current | ||
* character. This function is used to traverse the trie. | ||
* | ||
* @param decodeTree The trie. | ||
* @param current The current node. | ||
* @param nodeIdx The index right after the current node and its value. | ||
* @param char The current character. | ||
* @returns The index of the next node, or -1 if no branch is taken. | ||
*/ | ||
function determineBranch(decodeTree, current, nodeIdx, char) { | ||
@@ -155,13 +500,15 @@ var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; | ||
/** | ||
* Decodes an HTML string, allowing for entities not terminated by a semi-colon. | ||
* Decodes an HTML string. | ||
* | ||
* @param str The string to decode. | ||
* @param mode The decoding mode. | ||
* @returns The decoded string. | ||
*/ | ||
function decodeHTML(str) { | ||
return htmlDecoder(str, false); | ||
function decodeHTML(str, mode) { | ||
if (mode === void 0) { mode = DecodingMode.Legacy; } | ||
return htmlDecoder(str, mode); | ||
} | ||
exports.decodeHTML = decodeHTML; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an HTML string in an attribute. | ||
* | ||
@@ -171,8 +518,18 @@ * @param str The string to decode. | ||
*/ | ||
function decodeHTMLAttribute(str) { | ||
return htmlDecoder(str, DecodingMode.Attribute); | ||
} | ||
exports.decodeHTMLAttribute = decodeHTMLAttribute; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
* @param str The string to decode. | ||
* @returns The decoded string. | ||
*/ | ||
function decodeHTMLStrict(str) { | ||
return htmlDecoder(str, true); | ||
return htmlDecoder(str, DecodingMode.Strict); | ||
} | ||
exports.decodeHTMLStrict = decodeHTMLStrict; | ||
/** | ||
* Decodes an XML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an XML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
@@ -183,5 +540,5 @@ * @param str The string to decode. | ||
function decodeXML(str) { | ||
return xmlDecoder(str, true); | ||
return xmlDecoder(str, DecodingMode.Strict); | ||
} | ||
exports.decodeXML = decodeXML; | ||
//# sourceMappingURL=decode.js.map |
@@ -63,3 +63,3 @@ "use strict"; | ||
} | ||
// We might have a tree node without a value; skip and use a numeric entitiy. | ||
// We might have a tree node without a value; skip and use a numeric entity. | ||
if (next !== undefined) { | ||
@@ -66,0 +66,0 @@ ret += next; |
@@ -64,2 +64,12 @@ "use strict"; | ||
exports.escape = encodeXML; | ||
/** | ||
* Creates a function that escapes all characters matched by the given regular | ||
* expression using the given map of characters to escape to their entities. | ||
* | ||
* @param regex Regular expression to match characters to escape. | ||
* @param map Map of characters to escape to their entities. | ||
* | ||
* @returns Function that escapes all characters matched by the given regular | ||
* expression using the given map of characters to escape to their entities. | ||
*/ | ||
function getEscaper(regex, map) { | ||
@@ -74,3 +84,3 @@ return function escape(data) { | ||
} | ||
// We know that this chararcter will be in the map. | ||
// We know that this character will be in the map. | ||
result += map.get(match[0].charCodeAt(0)); | ||
@@ -77,0 +87,0 @@ // Every match will be of length 1 |
@@ -0,4 +1,19 @@ | ||
/** | ||
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point. | ||
*/ | ||
export declare const fromCodePoint: (...codePoints: number[]) => string; | ||
/** | ||
* Replace the given code point with a replacement character if it is a | ||
* surrogate or is outside the valid range. Otherwise return the code | ||
* point unchanged. | ||
*/ | ||
export declare function replaceCodePoint(codePoint: number): number; | ||
/** | ||
* Replace the code point if relevant, then convert it to a string. | ||
* | ||
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead. | ||
* @param codePoint The code point to decode. | ||
* @returns The decoded code point. | ||
*/ | ||
export default function decodeCodePoint(codePoint: number): string; | ||
//# sourceMappingURL=decode_codepoint.d.ts.map |
@@ -5,2 +5,3 @@ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134 | ||
[0, 65533], | ||
// C1 Unicode control character reference replacements | ||
[128, 8364], | ||
@@ -34,2 +35,5 @@ [130, 8218], | ||
]); | ||
/** | ||
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point. | ||
*/ | ||
export const fromCodePoint = | ||
@@ -47,2 +51,7 @@ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins | ||
}; | ||
/** | ||
* Replace the given code point with a replacement character if it is a | ||
* surrogate or is outside the valid range. Otherwise return the code | ||
* point unchanged. | ||
*/ | ||
export function replaceCodePoint(codePoint) { | ||
@@ -55,2 +64,9 @@ var _a; | ||
} | ||
/** | ||
* Replace the code point if relevant, then convert it to a string. | ||
* | ||
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead. | ||
* @param codePoint The code point to decode. | ||
* @returns The decoded code point. | ||
*/ | ||
export default function decodeCodePoint(codePoint) { | ||
@@ -57,0 +73,0 @@ return fromCodePoint(replaceCodePoint(codePoint)); |
@@ -11,12 +11,183 @@ import htmlDecodeTree from "./generated/decode-data-html.js"; | ||
} | ||
export declare enum DecodingMode { | ||
/** Entities in text nodes that can end with any character. */ | ||
Legacy = 0, | ||
/** Only allow entities terminated with a semicolon. */ | ||
Strict = 1, | ||
/** Entities in attributes have limitations on ending characters. */ | ||
Attribute = 2 | ||
} | ||
/** | ||
* Producers for character reference errors as defined in the HTML spec. | ||
*/ | ||
export interface EntityErrorProducer { | ||
missingSemicolonAfterCharacterReference(): void; | ||
absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void; | ||
validateNumericCharacterReference(code: number): void; | ||
} | ||
/** | ||
* Token decoder with support of writing partial entities. | ||
*/ | ||
export declare class EntityDecoder { | ||
/** The tree used to decode entities. */ | ||
private readonly decodeTree; | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
private readonly emitCodePoint; | ||
/** An object that is used to produce errors. */ | ||
private readonly errors?; | ||
constructor( | ||
/** The tree used to decode entities. */ | ||
decodeTree: Uint16Array, | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
emitCodePoint: (cp: number, consumed: number) => void, | ||
/** An object that is used to produce errors. */ | ||
errors?: EntityErrorProducer | undefined); | ||
/** The current state of the decoder. */ | ||
private state; | ||
/** Characters that were consumed while parsing an entity. */ | ||
private consumed; | ||
/** | ||
* The result of the entity. | ||
* | ||
* Either the result index of a numeric entity, or the codepoint of a | ||
* numeric entity. | ||
*/ | ||
private result; | ||
/** The current index in the decode tree. */ | ||
private treeIndex; | ||
/** The number of characters that were consumed in excess. */ | ||
private excess; | ||
/** The mode in which the decoder is operating. */ | ||
private decodeMode; | ||
/** Resets the instance to make it reusable. */ | ||
startEntity(decodeMode: DecodingMode): void; | ||
/** | ||
* Write an entity to the decoder. This can be called multiple times with partial entities. | ||
* If the entity is incomplete, the decoder will return -1. | ||
* | ||
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the | ||
* entity is incomplete, and resume when the next string is written. | ||
* | ||
* @param string The string containing the entity (or a continuation of the entity). | ||
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
write(str: string, offset: number): number; | ||
/** | ||
* Switches between the numeric decimal and hexadecimal states. | ||
* | ||
* Equivalent to the `Numeric character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericStart; | ||
private addToNumericResult; | ||
/** | ||
* Parses a hexadecimal numeric entity. | ||
* | ||
* Equivalent to the `Hexademical character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericHex; | ||
/** | ||
* Parses a decimal numeric entity. | ||
* | ||
* Equivalent to the `Decimal character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNumericDecimal; | ||
/** | ||
* Validate and emit a numeric entity. | ||
* | ||
* Implements the logic from the `Hexademical character reference start | ||
* state` and `Numeric character reference end state` in the HTML spec. | ||
* | ||
* @param lastCp The last code point of the entity. Used to see if the | ||
* entity was terminated with a semicolon. | ||
* @param expectedLength The minimum number of characters that should be | ||
* consumed. Used to validate that at least one digit | ||
* was consumed. | ||
* @returns The number of characters that were consumed. | ||
*/ | ||
private emitNumericEntity; | ||
/** | ||
* Parses a named entity. | ||
* | ||
* Equivalent to the `Named character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
private stateNamedEntity; | ||
/** | ||
* Emit a named entity that was not terminated with a semicolon. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
private emitNotTerminatedNamedEntity; | ||
/** | ||
* Emit a named entity. | ||
* | ||
* @param result The index of the entity in the decode tree. | ||
* @param valueLength The number of bytes in the entity. | ||
* @param consumed The number of characters consumed. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
private emitNamedEntityData; | ||
/** | ||
* Signal to the parser that the end of the input was reached. | ||
* | ||
* Remaining data will be emitted and relevant errors will be produced. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
end(): number; | ||
} | ||
/** | ||
* Determines the branch of the current node that is taken given the current | ||
* character. This function is used to traverse the trie. | ||
* | ||
* @param decodeTree The trie. | ||
* @param current The current node. | ||
* @param nodeIdx The index right after the current node and its value. | ||
* @param char The current character. | ||
* @returns The index of the next node, or -1 if no branch is taken. | ||
*/ | ||
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number; | ||
/** | ||
* Decodes an HTML string, allowing for entities not terminated by a semi-colon. | ||
* Decodes an HTML string. | ||
* | ||
* @param str The string to decode. | ||
* @param mode The decoding mode. | ||
* @returns The decoded string. | ||
*/ | ||
export declare function decodeHTML(str: string): string; | ||
export declare function decodeHTML(str: string, mode?: DecodingMode): string; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an HTML string in an attribute. | ||
* | ||
@@ -26,5 +197,12 @@ * @param str The string to decode. | ||
*/ | ||
export declare function decodeHTMLAttribute(str: string): string; | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
* @param str The string to decode. | ||
* @returns The decoded string. | ||
*/ | ||
export declare function decodeHTMLStrict(str: string): string; | ||
/** | ||
* Decodes an XML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an XML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
@@ -31,0 +209,0 @@ * @param str The string to decode. |
import htmlDecodeTree from "./generated/decode-data-html.js"; | ||
import xmlDecodeTree from "./generated/decode-data-xml.js"; | ||
import decodeCodePoint from "./decode_codepoint.js"; | ||
import decodeCodePoint, { replaceCodePoint, fromCodePoint, } from "./decode_codepoint.js"; | ||
// Re-export for use by eg. htmlparser2 | ||
@@ -11,2 +11,3 @@ export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint }; | ||
CharCodes[CharCodes["SEMI"] = 59] = "SEMI"; | ||
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS"; | ||
CharCodes[CharCodes["ZERO"] = 48] = "ZERO"; | ||
@@ -17,5 +18,9 @@ CharCodes[CharCodes["NINE"] = 57] = "NINE"; | ||
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X"; | ||
/** Bit that needs to be set to convert an upper case ASCII character to lower case */ | ||
CharCodes[CharCodes["To_LOWER_BIT"] = 32] = "To_LOWER_BIT"; | ||
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z"; | ||
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A"; | ||
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F"; | ||
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z"; | ||
})(CharCodes || (CharCodes = {})); | ||
/** Bit that needs to be set to convert an upper case ASCII character to lower case */ | ||
const TO_LOWER_BIT = 0b100000; | ||
export var BinTrieFlags; | ||
@@ -27,81 +32,396 @@ (function (BinTrieFlags) { | ||
})(BinTrieFlags || (BinTrieFlags = {})); | ||
function getDecoder(decodeTree) { | ||
return function decodeHTMLBinary(str, strict) { | ||
let ret = ""; | ||
let lastIdx = 0; | ||
let strIdx = 0; | ||
while ((strIdx = str.indexOf("&", strIdx)) >= 0) { | ||
ret += str.slice(lastIdx, strIdx); | ||
lastIdx = strIdx; | ||
// Skip the "&" | ||
strIdx += 1; | ||
// If we have a numeric entity, handle this separately. | ||
if (str.charCodeAt(strIdx) === CharCodes.NUM) { | ||
// Skip the leading "&#". For hex entities, also skip the leading "x". | ||
let start = strIdx + 1; | ||
let base = 10; | ||
let cp = str.charCodeAt(start); | ||
if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) { | ||
base = 16; | ||
strIdx += 1; | ||
start += 1; | ||
function isNumber(code) { | ||
return code >= CharCodes.ZERO && code <= CharCodes.NINE; | ||
} | ||
function isHexadecimalCharacter(code) { | ||
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) || | ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F)); | ||
} | ||
function isAsciiAlphaNumeric(code) { | ||
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) || | ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) || | ||
isNumber(code)); | ||
} | ||
/** | ||
* Checks if the given character is a valid end character for an entity in an attribute. | ||
* | ||
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. | ||
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state | ||
*/ | ||
function isEntityInAttributeInvalidEnd(code) { | ||
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code); | ||
} | ||
var EntityDecoderState; | ||
(function (EntityDecoderState) { | ||
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart"; | ||
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart"; | ||
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal"; | ||
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex"; | ||
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity"; | ||
})(EntityDecoderState || (EntityDecoderState = {})); | ||
export var DecodingMode; | ||
(function (DecodingMode) { | ||
/** Entities in text nodes that can end with any character. */ | ||
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; | ||
/** Only allow entities terminated with a semicolon. */ | ||
DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; | ||
/** Entities in attributes have limitations on ending characters. */ | ||
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute"; | ||
})(DecodingMode || (DecodingMode = {})); | ||
/** | ||
* Token decoder with support of writing partial entities. | ||
*/ | ||
export class EntityDecoder { | ||
constructor( | ||
/** The tree used to decode entities. */ | ||
decodeTree, | ||
/** | ||
* The function that is called when a codepoint is decoded. | ||
* | ||
* For multi-byte named entities, this will be called multiple times, | ||
* with the second codepoint, and the same `consumed` value. | ||
* | ||
* @param codepoint The decoded codepoint. | ||
* @param consumed The number of bytes consumed by the decoder. | ||
*/ | ||
emitCodePoint, | ||
/** An object that is used to produce errors. */ | ||
errors) { | ||
this.decodeTree = decodeTree; | ||
this.emitCodePoint = emitCodePoint; | ||
this.errors = errors; | ||
/** The current state of the decoder. */ | ||
this.state = EntityDecoderState.EntityStart; | ||
/** Characters that were consumed while parsing an entity. */ | ||
this.consumed = 1; | ||
/** | ||
* The result of the entity. | ||
* | ||
* Either the result index of a numeric entity, or the codepoint of a | ||
* numeric entity. | ||
*/ | ||
this.result = 0; | ||
/** The current index in the decode tree. */ | ||
this.treeIndex = 0; | ||
/** The number of characters that were consumed in excess. */ | ||
this.excess = 1; | ||
/** The mode in which the decoder is operating. */ | ||
this.decodeMode = DecodingMode.Strict; | ||
} | ||
/** Resets the instance to make it reusable. */ | ||
startEntity(decodeMode) { | ||
this.decodeMode = decodeMode; | ||
this.state = EntityDecoderState.EntityStart; | ||
this.result = 0; | ||
this.treeIndex = 0; | ||
this.excess = 1; | ||
this.consumed = 1; | ||
} | ||
/** | ||
* Write an entity to the decoder. This can be called multiple times with partial entities. | ||
* If the entity is incomplete, the decoder will return -1. | ||
* | ||
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the | ||
* entity is incomplete, and resume when the next string is written. | ||
* | ||
* @param string The string containing the entity (or a continuation of the entity). | ||
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
write(str, offset) { | ||
switch (this.state) { | ||
case EntityDecoderState.EntityStart: { | ||
if (str.charCodeAt(offset) === CharCodes.NUM) { | ||
this.state = EntityDecoderState.NumericStart; | ||
this.consumed += 1; | ||
return this.stateNumericStart(str, offset + 1); | ||
} | ||
do | ||
cp = str.charCodeAt(++strIdx); | ||
while ((cp >= CharCodes.ZERO && cp <= CharCodes.NINE) || | ||
(base === 16 && | ||
(cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A && | ||
(cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F)); | ||
if (start !== strIdx) { | ||
const entity = str.substring(start, strIdx); | ||
const parsed = parseInt(entity, base); | ||
if (str.charCodeAt(strIdx) === CharCodes.SEMI) { | ||
strIdx += 1; | ||
} | ||
else if (strict) { | ||
continue; | ||
} | ||
ret += decodeCodePoint(parsed); | ||
lastIdx = strIdx; | ||
} | ||
continue; | ||
this.state = EntityDecoderState.NamedEntity; | ||
return this.stateNamedEntity(str, offset); | ||
} | ||
let resultIdx = 0; | ||
let excess = 1; | ||
let treeIdx = 0; | ||
let current = decodeTree[treeIdx]; | ||
for (; strIdx < str.length; strIdx++, excess++) { | ||
treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); | ||
if (treeIdx < 0) | ||
break; | ||
current = decodeTree[treeIdx]; | ||
const masked = current & BinTrieFlags.VALUE_LENGTH; | ||
// If the branch is a value, store it and continue | ||
if (masked) { | ||
// If we have a legacy entity while parsing strictly, just skip the number of bytes | ||
if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) { | ||
resultIdx = treeIdx; | ||
excess = 0; | ||
} | ||
// The mask is the number of bytes of the value, including the current byte. | ||
const valueLength = (masked >> 14) - 1; | ||
if (valueLength === 0) | ||
break; | ||
treeIdx += valueLength; | ||
case EntityDecoderState.NumericStart: { | ||
return this.stateNumericStart(str, offset); | ||
} | ||
case EntityDecoderState.NumericDecimal: { | ||
return this.stateNumericDecimal(str, offset); | ||
} | ||
case EntityDecoderState.NumericHex: { | ||
return this.stateNumericHex(str, offset); | ||
} | ||
case EntityDecoderState.NamedEntity: { | ||
return this.stateNamedEntity(str, offset); | ||
} | ||
} | ||
} | ||
/** | ||
* Switches between the numeric decimal and hexadecimal states. | ||
* | ||
* Equivalent to the `Numeric character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
stateNumericStart(str, offset) { | ||
if (offset >= str.length) { | ||
return -1; | ||
} | ||
if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) { | ||
this.state = EntityDecoderState.NumericHex; | ||
this.consumed += 1; | ||
return this.stateNumericHex(str, offset + 1); | ||
} | ||
this.state = EntityDecoderState.NumericDecimal; | ||
return this.stateNumericDecimal(str, offset); | ||
} | ||
addToNumericResult(str, start, end, base) { | ||
if (start !== end) { | ||
const digitCount = end - start; | ||
this.result = | ||
this.result * Math.pow(base, digitCount) + | ||
parseInt(str.substr(start, digitCount), base); | ||
this.consumed += digitCount; | ||
} | ||
} | ||
/** | ||
* Parses a hexadecimal numeric entity. | ||
* | ||
* Equivalent to the `Hexademical character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
stateNumericHex(str, offset) { | ||
const startIdx = offset; | ||
while (offset < str.length) { | ||
const char = str.charCodeAt(offset); | ||
if (isNumber(char) || isHexadecimalCharacter(char)) { | ||
offset += 1; | ||
} | ||
else { | ||
this.addToNumericResult(str, startIdx, offset, 16); | ||
return this.emitNumericEntity(char, 3); | ||
} | ||
} | ||
this.addToNumericResult(str, startIdx, offset, 16); | ||
return -1; | ||
} | ||
/** | ||
* Parses a decimal numeric entity. | ||
* | ||
* Equivalent to the `Decimal character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
stateNumericDecimal(str, offset) { | ||
const startIdx = offset; | ||
while (offset < str.length) { | ||
const char = str.charCodeAt(offset); | ||
if (isNumber(char)) { | ||
offset += 1; | ||
} | ||
else { | ||
this.addToNumericResult(str, startIdx, offset, 10); | ||
return this.emitNumericEntity(char, 2); | ||
} | ||
} | ||
this.addToNumericResult(str, startIdx, offset, 10); | ||
return -1; | ||
} | ||
/** | ||
* Validate and emit a numeric entity. | ||
* | ||
* Implements the logic from the `Hexademical character reference start | ||
* state` and `Numeric character reference end state` in the HTML spec. | ||
* | ||
* @param lastCp The last code point of the entity. Used to see if the | ||
* entity was terminated with a semicolon. | ||
* @param expectedLength The minimum number of characters that should be | ||
* consumed. Used to validate that at least one digit | ||
* was consumed. | ||
* @returns The number of characters that were consumed. | ||
*/ | ||
emitNumericEntity(lastCp, expectedLength) { | ||
var _a; | ||
// Ensure we consumed at least one digit. | ||
if (this.consumed <= expectedLength) { | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); | ||
return 0; | ||
} | ||
// Figure out if this is a legit end of the entity | ||
if (lastCp === CharCodes.SEMI) { | ||
this.consumed += 1; | ||
} | ||
else if (this.decodeMode === DecodingMode.Strict) { | ||
return 0; | ||
} | ||
this.emitCodePoint(replaceCodePoint(this.result), this.consumed); | ||
if (this.errors) { | ||
if (lastCp !== CharCodes.SEMI) { | ||
this.errors.missingSemicolonAfterCharacterReference(); | ||
} | ||
this.errors.validateNumericCharacterReference(this.result); | ||
} | ||
return this.consumed; | ||
} | ||
/** | ||
* Parses a named entity. | ||
* | ||
* Equivalent to the `Named character reference state` in the HTML spec. | ||
* | ||
* @param str The string containing the entity (or a continuation of the entity). | ||
* @param offset The current offset. | ||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. | ||
*/ | ||
stateNamedEntity(str, offset) { | ||
const { decodeTree } = this; | ||
let current = decodeTree[this.treeIndex]; | ||
// The mask is the number of bytes of the value, including the current byte. | ||
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
for (; offset < str.length; offset++, this.excess++) { | ||
const char = str.charCodeAt(offset); | ||
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char); | ||
if (this.treeIndex < 0) { | ||
return this.result === 0 || | ||
// If we are parsing an attribute | ||
(this.decodeMode === DecodingMode.Attribute && | ||
// We shouldn't have consumed any characters after the entity, | ||
(valueLength === 0 || | ||
// And there should be no invalid characters. | ||
isEntityInAttributeInvalidEnd(char))) | ||
? 0 | ||
: this.emitNotTerminatedNamedEntity(); | ||
} | ||
current = decodeTree[this.treeIndex]; | ||
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
// If the branch is a value, store it and continue | ||
if (valueLength !== 0) { | ||
// If the entity is terminated by a semicolon, we are done. | ||
if (char === CharCodes.SEMI) { | ||
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess); | ||
} | ||
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it. | ||
if (this.decodeMode !== DecodingMode.Strict) { | ||
this.result = this.treeIndex; | ||
this.consumed += this.excess; | ||
this.excess = 0; | ||
} | ||
} | ||
if (resultIdx !== 0) { | ||
const valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
ret += | ||
valueLength === 1 | ||
? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH) | ||
: valueLength === 2 | ||
? String.fromCharCode(decodeTree[resultIdx + 1]) | ||
: String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]); | ||
lastIdx = strIdx - excess + 1; | ||
} | ||
return -1; | ||
} | ||
/** | ||
* Emit a named entity that was not terminated with a semicolon. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
emitNotTerminatedNamedEntity() { | ||
var _a; | ||
const { result, decodeTree } = this; | ||
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
this.emitNamedEntityData(result, valueLength, this.consumed); | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference(); | ||
return this.consumed; | ||
} | ||
/** | ||
* Emit a named entity. | ||
* | ||
* @param result The index of the entity in the decode tree. | ||
* @param valueLength The number of bytes in the entity. | ||
* @param consumed The number of characters consumed. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
emitNamedEntityData(result, valueLength, consumed) { | ||
const { decodeTree } = this; | ||
this.emitCodePoint(valueLength === 1 | ||
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH | ||
: decodeTree[result + 1], consumed); | ||
if (valueLength === 3) { | ||
// For multi-byte values, we need to emit the second byte. | ||
this.emitCodePoint(decodeTree[result + 2], consumed); | ||
} | ||
return consumed; | ||
} | ||
/** | ||
* Signal to the parser that the end of the input was reached. | ||
* | ||
* Remaining data will be emitted and relevant errors will be produced. | ||
* | ||
* @returns The number of characters consumed. | ||
*/ | ||
end() { | ||
var _a; | ||
switch (this.state) { | ||
case EntityDecoderState.NamedEntity: { | ||
// Emit a named entity if we have one. | ||
return this.result !== 0 && | ||
(this.decodeMode !== DecodingMode.Attribute || | ||
this.result === this.treeIndex) | ||
? this.emitNotTerminatedNamedEntity() | ||
: 0; | ||
} | ||
// Otherwise, emit a numeric entity if we have one. | ||
case EntityDecoderState.NumericDecimal: { | ||
return this.emitNumericEntity(0, 2); | ||
} | ||
case EntityDecoderState.NumericHex: { | ||
return this.emitNumericEntity(0, 3); | ||
} | ||
case EntityDecoderState.NumericStart: { | ||
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed); | ||
return 0; | ||
} | ||
case EntityDecoderState.EntityStart: { | ||
// Return 0 if we have no entity. | ||
return 0; | ||
} | ||
} | ||
return ret + str.slice(lastIdx); | ||
} | ||
} | ||
/** | ||
* Creates a function that decodes entities in a string. | ||
* | ||
* @param decodeTree The decode tree. | ||
* @returns A function that decodes entities in a string. | ||
*/ | ||
function getDecoder(decodeTree) { | ||
let ret = ""; | ||
const decoder = new EntityDecoder(decodeTree, (str) => (ret += fromCodePoint(str))); | ||
return function decodeWithTrie(str, decodeMode) { | ||
let lastIndex = 0; | ||
let offset = 0; | ||
while ((offset = str.indexOf("&", offset)) >= 0) { | ||
ret += str.slice(lastIndex, offset); | ||
decoder.startEntity(decodeMode); | ||
const len = decoder.write(str, | ||
// Skip the "&" | ||
offset + 1); | ||
if (len < 0) { | ||
lastIndex = offset + decoder.end(); | ||
break; | ||
} | ||
lastIndex = offset + len; | ||
// If `len` is 0, skip the current `&` and continue. | ||
offset = len === 0 ? lastIndex + 1 : lastIndex; | ||
} | ||
const result = ret + str.slice(lastIndex); | ||
// Make sure we don't keep a reference to the final string. | ||
ret = ""; | ||
return result; | ||
}; | ||
} | ||
/** | ||
* Determines the branch of the current node that is taken given the current | ||
* character. This function is used to traverse the trie. | ||
* | ||
* @param decodeTree The trie. | ||
* @param current The current node. | ||
* @param nodeIdx The index right after the current node and its value. | ||
* @param char The current character. | ||
* @returns The index of the next node, or -1 if no branch is taken. | ||
*/ | ||
export function determineBranch(decodeTree, current, nodeIdx, char) { | ||
@@ -143,12 +463,13 @@ const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; | ||
/** | ||
* Decodes an HTML string, allowing for entities not terminated by a semi-colon. | ||
* Decodes an HTML string. | ||
* | ||
* @param str The string to decode. | ||
* @param mode The decoding mode. | ||
* @returns The decoded string. | ||
*/ | ||
export function decodeHTML(str) { | ||
return htmlDecoder(str, false); | ||
export function decodeHTML(str, mode = DecodingMode.Legacy) { | ||
return htmlDecoder(str, mode); | ||
} | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an HTML string in an attribute. | ||
* | ||
@@ -158,7 +479,16 @@ * @param str The string to decode. | ||
*/ | ||
export function decodeHTMLAttribute(str) { | ||
return htmlDecoder(str, DecodingMode.Attribute); | ||
} | ||
/** | ||
* Decodes an HTML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
* @param str The string to decode. | ||
* @returns The decoded string. | ||
*/ | ||
export function decodeHTMLStrict(str) { | ||
return htmlDecoder(str, true); | ||
return htmlDecoder(str, DecodingMode.Strict); | ||
} | ||
/** | ||
* Decodes an XML string, requiring all entities to be terminated by a semi-colon. | ||
* Decodes an XML string, requiring all entities to be terminated by a semicolon. | ||
* | ||
@@ -169,4 +499,4 @@ * @param str The string to decode. | ||
export function decodeXML(str) { | ||
return xmlDecoder(str, true); | ||
return xmlDecoder(str, DecodingMode.Strict); | ||
} | ||
//# sourceMappingURL=decode.js.map |
@@ -55,3 +55,3 @@ import htmlTrie from "./generated/encode-html.js"; | ||
} | ||
// We might have a tree node without a value; skip and use a numeric entitiy. | ||
// We might have a tree node without a value; skip and use a numeric entity. | ||
if (next !== undefined) { | ||
@@ -58,0 +58,0 @@ ret += next; |
@@ -58,2 +58,12 @@ export const xmlReplacer = /["&'<>$\x80-\uFFFF]/g; | ||
export const escape = encodeXML; | ||
/** | ||
* Creates a function that escapes all characters matched by the given regular | ||
* expression using the given map of characters to escape to their entities. | ||
* | ||
* @param regex Regular expression to match characters to escape. | ||
* @param map Map of characters to escape to their entities. | ||
* | ||
* @returns Function that escapes all characters matched by the given regular | ||
* expression using the given map of characters to escape to their entities. | ||
*/ | ||
function getEscaper(regex, map) { | ||
@@ -68,3 +78,3 @@ return function escape(data) { | ||
} | ||
// We know that this chararcter will be in the map. | ||
// We know that this character will be in the map. | ||
result += map.get(match[0].charCodeAt(0)); | ||
@@ -71,0 +81,0 @@ // Every match will be of length 1 |
@@ -1,2 +0,2 @@ | ||
declare type EncodeTrieNode = string | { | ||
type EncodeTrieNode = string | { | ||
v?: string; | ||
@@ -3,0 +3,0 @@ n: number | Map<number, EncodeTrieNode>; |
@@ -0,1 +1,2 @@ | ||
import { DecodingMode } from "./decode.js"; | ||
/** The level of entities to support. */ | ||
@@ -8,9 +9,2 @@ export declare enum EntityLevel { | ||
} | ||
/** Determines whether some entities are allowed to be written without a trailing `;`. */ | ||
export declare enum DecodingMode { | ||
/** Support legacy HTML entities. */ | ||
Legacy = 0, | ||
/** Do not support legacy HTML entities. */ | ||
Strict = 1 | ||
} | ||
export declare enum EncodingMode { | ||
@@ -61,3 +55,3 @@ /** | ||
*/ | ||
mode?: DecodingMode; | ||
mode?: DecodingMode | undefined; | ||
} | ||
@@ -103,3 +97,3 @@ /** | ||
export { encodeHTML, encodeNonAsciiHTML, encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js"; | ||
export { decodeXML, decodeHTML, decodeHTMLStrict, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js"; | ||
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js"; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -1,2 +0,2 @@ | ||
import { decodeXML, decodeHTML, decodeHTMLStrict } from "./decode.js"; | ||
import { decodeXML, decodeHTML, DecodingMode } from "./decode.js"; | ||
import { encodeHTML, encodeNonAsciiHTML } from "./encode.js"; | ||
@@ -12,10 +12,2 @@ import { encodeXML, escapeUTF8, escapeAttribute, escapeText, } from "./escape.js"; | ||
})(EntityLevel || (EntityLevel = {})); | ||
/** Determines whether some entities are allowed to be written without a trailing `;`. */ | ||
export var DecodingMode; | ||
(function (DecodingMode) { | ||
/** Support legacy HTML entities. */ | ||
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; | ||
/** Do not support legacy HTML entities. */ | ||
DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; | ||
})(DecodingMode || (DecodingMode = {})); | ||
export var EncodingMode; | ||
@@ -57,8 +49,6 @@ (function (EncodingMode) { | ||
export function decode(data, options = EntityLevel.XML) { | ||
const opts = typeof options === "number" ? { level: options } : options; | ||
if (opts.level === EntityLevel.HTML) { | ||
if (opts.mode === DecodingMode.Strict) { | ||
return decodeHTMLStrict(data); | ||
} | ||
return decodeHTML(data); | ||
const level = typeof options === "number" ? options : options.level; | ||
if (level === EntityLevel.HTML) { | ||
const mode = typeof options === "object" ? options.mode : undefined; | ||
return decodeHTML(data, mode); | ||
} | ||
@@ -75,10 +65,6 @@ return decodeXML(data); | ||
export function decodeStrict(data, options = EntityLevel.XML) { | ||
var _a; | ||
const opts = typeof options === "number" ? { level: options } : options; | ||
if (opts.level === EntityLevel.HTML) { | ||
if (opts.mode === DecodingMode.Legacy) { | ||
return decodeHTML(data); | ||
} | ||
return decodeHTMLStrict(data); | ||
} | ||
return decodeXML(data); | ||
(_a = opts.mode) !== null && _a !== void 0 ? _a : (opts.mode = DecodingMode.Strict); | ||
return decode(data, opts); | ||
} | ||
@@ -113,5 +99,5 @@ /** | ||
encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js"; | ||
export { decodeXML, decodeHTML, decodeHTMLStrict, | ||
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute, | ||
// Legacy aliases (deprecated) | ||
decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js"; | ||
//# sourceMappingURL=index.js.map |
@@ -1,2 +0,2 @@ | ||
declare type EncodeTrieNode = string | { | ||
type EncodeTrieNode = string | { | ||
v?: string; | ||
@@ -3,0 +3,0 @@ n: number | Map<number, EncodeTrieNode>; |
@@ -0,1 +1,2 @@ | ||
import { DecodingMode } from "./decode.js"; | ||
/** The level of entities to support. */ | ||
@@ -8,9 +9,2 @@ export declare enum EntityLevel { | ||
} | ||
/** Determines whether some entities are allowed to be written without a trailing `;`. */ | ||
export declare enum DecodingMode { | ||
/** Support legacy HTML entities. */ | ||
Legacy = 0, | ||
/** Do not support legacy HTML entities. */ | ||
Strict = 1 | ||
} | ||
export declare enum EncodingMode { | ||
@@ -61,3 +55,3 @@ /** | ||
*/ | ||
mode?: DecodingMode; | ||
mode?: DecodingMode | undefined; | ||
} | ||
@@ -103,3 +97,3 @@ /** | ||
export { encodeHTML, encodeNonAsciiHTML, encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js"; | ||
export { decodeXML, decodeHTML, decodeHTMLStrict, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js"; | ||
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js"; | ||
//# sourceMappingURL=index.d.ts.map |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.decodeXMLStrict = exports.decodeHTML5Strict = exports.decodeHTML4Strict = exports.decodeHTML5 = exports.decodeHTML4 = exports.decodeHTMLStrict = exports.decodeHTML = exports.decodeXML = exports.encodeHTML5 = exports.encodeHTML4 = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.encodeXML = exports.encode = exports.decodeStrict = exports.decode = exports.EncodingMode = exports.DecodingMode = exports.EntityLevel = void 0; | ||
exports.decodeXMLStrict = exports.decodeHTML5Strict = exports.decodeHTML4Strict = exports.decodeHTML5 = exports.decodeHTML4 = exports.decodeHTMLAttribute = exports.decodeHTMLStrict = exports.decodeHTML = exports.decodeXML = exports.DecodingMode = exports.EntityDecoder = exports.encodeHTML5 = exports.encodeHTML4 = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.encodeXML = exports.encode = exports.decodeStrict = exports.decode = exports.EncodingMode = exports.EntityLevel = void 0; | ||
var decode_js_1 = require("./decode.js"); | ||
@@ -15,10 +15,2 @@ var encode_js_1 = require("./encode.js"); | ||
})(EntityLevel = exports.EntityLevel || (exports.EntityLevel = {})); | ||
/** Determines whether some entities are allowed to be written without a trailing `;`. */ | ||
var DecodingMode; | ||
(function (DecodingMode) { | ||
/** Support legacy HTML entities. */ | ||
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy"; | ||
/** Do not support legacy HTML entities. */ | ||
DecodingMode[DecodingMode["Strict"] = 1] = "Strict"; | ||
})(DecodingMode = exports.DecodingMode || (exports.DecodingMode = {})); | ||
var EncodingMode; | ||
@@ -61,8 +53,6 @@ (function (EncodingMode) { | ||
if (options === void 0) { options = EntityLevel.XML; } | ||
var opts = typeof options === "number" ? { level: options } : options; | ||
if (opts.level === EntityLevel.HTML) { | ||
if (opts.mode === DecodingMode.Strict) { | ||
return (0, decode_js_1.decodeHTMLStrict)(data); | ||
} | ||
return (0, decode_js_1.decodeHTML)(data); | ||
var level = typeof options === "number" ? options : options.level; | ||
if (level === EntityLevel.HTML) { | ||
var mode = typeof options === "object" ? options.mode : undefined; | ||
return (0, decode_js_1.decodeHTML)(data, mode); | ||
} | ||
@@ -80,11 +70,7 @@ return (0, decode_js_1.decodeXML)(data); | ||
function decodeStrict(data, options) { | ||
var _a; | ||
if (options === void 0) { options = EntityLevel.XML; } | ||
var opts = typeof options === "number" ? { level: options } : options; | ||
if (opts.level === EntityLevel.HTML) { | ||
if (opts.mode === DecodingMode.Legacy) { | ||
return (0, decode_js_1.decodeHTML)(data); | ||
} | ||
return (0, decode_js_1.decodeHTMLStrict)(data); | ||
} | ||
return (0, decode_js_1.decodeXML)(data); | ||
(_a = opts.mode) !== null && _a !== void 0 ? _a : (opts.mode = decode_js_1.DecodingMode.Strict); | ||
return decode(data, opts); | ||
} | ||
@@ -131,5 +117,8 @@ exports.decodeStrict = decodeStrict; | ||
var decode_js_2 = require("./decode.js"); | ||
Object.defineProperty(exports, "EntityDecoder", { enumerable: true, get: function () { return decode_js_2.EntityDecoder; } }); | ||
Object.defineProperty(exports, "DecodingMode", { enumerable: true, get: function () { return decode_js_2.DecodingMode; } }); | ||
Object.defineProperty(exports, "decodeXML", { enumerable: true, get: function () { return decode_js_2.decodeXML; } }); | ||
Object.defineProperty(exports, "decodeHTML", { enumerable: true, get: function () { return decode_js_2.decodeHTML; } }); | ||
Object.defineProperty(exports, "decodeHTMLStrict", { enumerable: true, get: function () { return decode_js_2.decodeHTMLStrict; } }); | ||
Object.defineProperty(exports, "decodeHTMLAttribute", { enumerable: true, get: function () { return decode_js_2.decodeHTMLAttribute; } }); | ||
// Legacy aliases (deprecated) | ||
@@ -136,0 +125,0 @@ Object.defineProperty(exports, "decodeHTML4", { enumerable: true, get: function () { return decode_js_2.decodeHTML; } }); |
{ | ||
"name": "entities", | ||
"version": "4.4.0", | ||
"version": "4.5.0", | ||
"description": "Encode & decode XML and HTML entities with ease & speed", | ||
@@ -44,13 +44,13 @@ "author": "Felix Boehm <me@feedic.com>", | ||
"@types/jest": "^28.1.8", | ||
"@types/node": "^18.7.14", | ||
"@typescript-eslint/eslint-plugin": "^5.36.1", | ||
"@typescript-eslint/parser": "^5.36.1", | ||
"eslint": "^8.23.0", | ||
"eslint-config-prettier": "^8.5.0", | ||
"@types/node": "^18.15.11", | ||
"@typescript-eslint/eslint-plugin": "^5.58.0", | ||
"@typescript-eslint/parser": "^5.58.0", | ||
"eslint": "^8.38.0", | ||
"eslint-config-prettier": "^8.8.0", | ||
"eslint-plugin-node": "^11.1.0", | ||
"jest": "^28.1.3", | ||
"prettier": "^2.7.1", | ||
"prettier": "^2.8.7", | ||
"ts-jest": "^28.0.8", | ||
"typedoc": "^0.23.12", | ||
"typescript": "^4.8.2" | ||
"typedoc": "^0.24.1", | ||
"typescript": "^5.0.4" | ||
}, | ||
@@ -57,0 +57,0 @@ "scripts": { |
@@ -1,2 +0,2 @@ | ||
# entities [![NPM version](http://img.shields.io/npm/v/entities.svg)](https://npmjs.org/package/entities) [![Downloads](https://img.shields.io/npm/dm/entities.svg)](https://npmjs.org/package/entities) [![Build Status](http://img.shields.io/travis/fb55/entities.svg)](http://travis-ci.org/fb55/entities) [![Coverage](http://img.shields.io/coveralls/fb55/entities.svg)](https://coveralls.io/r/fb55/entities) | ||
# entities [![NPM version](https://img.shields.io/npm/v/entities.svg)](https://npmjs.org/package/entities) [![Downloads](https://img.shields.io/npm/dm/entities.svg)](https://npmjs.org/package/entities) [![Node.js CI](https://github.com/fb55/entities/actions/workflows/nodejs-test.yml/badge.svg)](https://github.com/fb55/entities/actions/workflows/nodejs-test.yml) | ||
@@ -12,6 +12,6 @@ Encode & decode HTML & XML entities with ease & speed. | ||
process HTML entities. | ||
- ⚡️ Fast: `entities` is the fastes library for decoding HTML entities (as of | ||
April 2022); see [performance](#performance). | ||
- ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as | ||
of April 2022); see [performance](#performance). | ||
- 🎛 Configurable: Get an output tailored for your needs. You are fine with | ||
UTF8? That'll safe you some bytes. Prefer to only have ASCII characters? We | ||
UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We | ||
can do that as well! | ||
@@ -91,3 +91,3 @@ | ||
This libary wouldn't be possible without the work of these individuals. Thanks | ||
This library wouldn't be possible without the work of these individuals. Thanks | ||
to | ||
@@ -94,0 +94,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
412892
3177