@nodable/entities
Advanced tools
+8
-3
| { | ||
| "name": "@nodable/entities", | ||
| "version": "2.0.0", | ||
| "description": "Entity parser for XML, HTML, External entites with security controls", | ||
| "version": "2.1.0", | ||
| "description": "Entity parser for XML, HTML, External entites with security and NCR control", | ||
| "main": "./src/index.js", | ||
@@ -27,3 +27,8 @@ "type": "module", | ||
| "html", | ||
| "entity" | ||
| "entity", | ||
| "encode", | ||
| "decode", | ||
| "ncr", | ||
| "security", | ||
| "performance" | ||
| ], | ||
@@ -30,0 +35,0 @@ "author": "Amit Gupta (https://solothought.com)", |
+184
-26
@@ -88,2 +88,31 @@ // --------------------------------------------------------------------------- | ||
| // --------------------------------------------------------------------------- | ||
| // NCR (Numeric Character Reference) classification | ||
| // --------------------------------------------------------------------------- | ||
| // Severity order — higher number = stricter action. | ||
| // Used to enforce minimum action levels for specific codepoint ranges. | ||
| const NCR_LEVEL = Object.freeze({ allow: 0, leave: 1, remove: 2, throw: 3 }); | ||
| // XML 1.0 §2.2: allowed chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] | ||
| // Restricted C0: U+0001–U+001F excluding U+0009, U+000A, U+000D | ||
| const XML10_ALLOWED_C0 = new Set([0x09, 0x0A, 0x0D]); | ||
| /** | ||
| * Parse the `ncr` constructor option into flat, hot-path-friendly fields. | ||
| * @param {object|undefined} ncr | ||
| * @returns {{ xmlVersion: number, onLevel: number, nullLevel: number }} | ||
| */ | ||
| function parseNCRConfig(ncr) { | ||
| if (!ncr) { | ||
| return { xmlVersion: 1.0, onLevel: NCR_LEVEL.allow, nullLevel: NCR_LEVEL.remove }; | ||
| } | ||
| const xmlVersion = ncr.xmlVersion === 1.1 ? 1.1 : 1.0; | ||
| const onLevel = NCR_LEVEL[ncr.onNCR] ?? NCR_LEVEL.allow; | ||
| const nullLevel = NCR_LEVEL[ncr.nullNCR] ?? NCR_LEVEL.remove; | ||
| // 'allow' is not meaningful for null — clamp to at least 'remove' | ||
| const clampedNull = Math.max(nullLevel, NCR_LEVEL.remove); | ||
| return { xmlVersion, onLevel, nullLevel: clampedNull }; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // EntityReplacer | ||
@@ -121,5 +150,6 @@ // --------------------------------------------------------------------------- | ||
| * @param {object|null} [options.namedEntities] — extra named entities merged into base map | ||
| * @param {number} [options.maxTotalExpansions=0] — 0 = unlimited | ||
| * @param {number} [options.maxExpandedLength=0] — 0 = unlimited | ||
| * @param {'external'|'base'|'all'|string[]} [options.applyLimitsTo='external'] | ||
| * @param {object} [options.limit] — security limits | ||
| * @param {number} [options.limit.maxTotalExpansions=0] — 0 = unlimited | ||
| * @param {number} [options.limit.maxExpandedLength=0] — 0 = unlimited | ||
| * @param {'external'|'base'|'all'|string[]} [options.limit.applyLimitsTo='external'] | ||
| * Which entity tiers count against the security limits: | ||
@@ -131,8 +161,22 @@ * - 'external' (default) — only input/runtime + persistent external entities | ||
| * @param {((resolved: string, original: string) => string)|null} [options.postCheck=null] | ||
| * @param {string[]} [options.remove=[]] — entity names (e.g. ['nbsp', '#13']) to delete (replace with empty string) | ||
| * @param {string[]} [options.leave=[]] — entity names to keep as literal (unchanged in output) | ||
| * @param {object} [options.ncr] — Numeric Character Reference controls | ||
| * @param {1.0|1.1} [options.ncr.xmlVersion=1.0] | ||
| * XML version governing which codepoint ranges are restricted: | ||
| * - 1.0 — C0 controls U+0001–U+001F (except U+0009/000A/000D) are prohibited | ||
| * - 1.1 — C0 controls are allowed when written as NCRs; C1 (U+007F–U+009F) decoded as-is | ||
| * @param {'allow'|'leave'|'remove'|'throw'} [options.ncr.onNCR='allow'] | ||
| * Base action for numeric references. Severity order: allow < leave < remove < throw. | ||
| * For codepoint ranges that carry a minimum level (surrogates → remove, XML 1.0 C0 → remove), | ||
| * the effective action is max(onNCR, rangeMinimum). | ||
| * @param {'remove'|'throw'} [options.ncr.nullNCR='remove'] | ||
| * Action for U+0000 (null). 'allow' and 'leave' are clamped to 'remove' since null is never safe. | ||
| */ | ||
| constructor(options = {}) { | ||
| this._maxTotalExpansions = options.maxTotalExpansions || 0; | ||
| this._maxExpandedLength = options.maxExpandedLength || 0; | ||
| this._limit = options.limit || {}; | ||
| this._maxTotalExpansions = this._limit.maxTotalExpansions || 0; | ||
| this._maxExpandedLength = this._limit.maxExpandedLength || 0; | ||
| this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r; | ||
| this._limitTiers = parseLimitTiers(options.applyLimitsTo ?? LIMIT_TIER_EXTERNAL); | ||
| this._limitTiers = parseLimitTiers(this._limit.applyLimitsTo ?? LIMIT_TIER_EXTERNAL); | ||
| this._numericAllowed = options.numericAllowed ?? true; | ||
@@ -154,2 +198,14 @@ // Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction. | ||
| this._expandedLength = 0; | ||
| // --- New: remove / leave sets --- | ||
| /** @type {Set<string>} */ | ||
| this._removeSet = new Set(options.remove && Array.isArray(options.remove) ? options.remove : []); | ||
| /** @type {Set<string>} */ | ||
| this._leaveSet = new Set(options.leave && Array.isArray(options.leave) ? options.leave : []); | ||
| // --- NCR config (parsed into flat fields for hot-path speed) --- | ||
| const ncrCfg = parseNCRConfig(options.ncr); | ||
| this._ncrXmlVersion = ncrCfg.xmlVersion; | ||
| this._ncrOnLevel = ncrCfg.onLevel; | ||
| this._ncrNullLevel = ncrCfg.nullLevel; | ||
| } | ||
@@ -219,2 +275,15 @@ | ||
| // ------------------------------------------------------------------------- | ||
| // XML version (can be set after construction, e.g. once parser reads <?xml?>) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Update the XML version used for NCR classification. | ||
| * Call this as soon as the document's `<?xml version="...">` declaration is parsed. | ||
| * @param {1.0|1.1|number} version | ||
| */ | ||
| setXmlVersion(version) { | ||
| this._ncrXmlVersion = version === 1.1 ? 1.1 : 1.0; | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Primary API | ||
@@ -267,5 +336,25 @@ // ------------------------------------------------------------------------- | ||
| if (token.charCodeAt(0) === 35 /* '#' */ && this._numericAllowed) { | ||
| // ---- Numeric reference — base tier ---- | ||
| replacement = this._resolveNumeric(token); | ||
| if (this._removeSet.has(token)) { | ||
| // Remove entity: replace with empty string | ||
| replacement = ''; | ||
| // If entity was unknown (replacement undefined), we still need a tier for limits. | ||
| // Treat as external tier because it's user-directed removal of an unknown reference. | ||
| if (tier === undefined) { | ||
| tier = LIMIT_TIER_EXTERNAL; | ||
| } | ||
| } else if (this._leaveSet.has(token)) { | ||
| // Do not replace — keep original &token; as literal | ||
| i++; | ||
| continue; | ||
| } else if (token.charCodeAt(0) === 35 /* '#' */) { | ||
| // ---- Numeric / NCR reference ---- | ||
| // NCR classification always runs first — prohibited codepoints must be | ||
| // caught regardless of numericAllowed. | ||
| const ncrResult = this._resolveNCR(token); | ||
| if (ncrResult === undefined) { | ||
| // 'leave' action — keep original &token; as-is | ||
| i++; | ||
| continue; | ||
| } | ||
| replacement = ncrResult; // '' for remove, char string for allow | ||
| tier = LIMIT_TIER_BASE; | ||
@@ -364,29 +453,98 @@ } else { | ||
| /** | ||
| * Resolve a numeric entity token (the part after '&', including '#'). | ||
| * Handles &#NNN; and &#xHH; (case-insensitive x). | ||
| * Classify a codepoint and return the minimum action level that must be applied. | ||
| * Returns -1 when no minimum is imposed (normal allow path). | ||
| * | ||
| * Ranges checked (in priority order): | ||
| * 1. U+0000 — null, governed by nullNCR (always ≥ remove) | ||
| * 2. U+D800–U+DFFF — surrogates, always prohibited (min: remove) | ||
| * 3. U+0001–U+001F \ {0x09,0x0A,0x0D} — XML 1.0 restricted C0 (min: remove) | ||
| * (skipped in XML 1.1 — C0 controls are allowed when written as NCRs) | ||
| * | ||
| * @param {number} cp — codepoint | ||
| * @returns {number} — minimum NCR_LEVEL value, or -1 for no restriction | ||
| */ | ||
| _classifyNCR(cp) { | ||
| // 1. Null | ||
| if (cp === 0) return this._ncrNullLevel; | ||
| // 2. Surrogates — always prohibited, minimum 'remove' | ||
| if (cp >= 0xD800 && cp <= 0xDFFF) return NCR_LEVEL.remove; | ||
| // 3. XML 1.0 restricted C0 controls | ||
| if (this._ncrXmlVersion === 1.0) { | ||
| if (cp >= 0x01 && cp <= 0x1F && !XML10_ALLOWED_C0.has(cp)) return NCR_LEVEL.remove; | ||
| } | ||
| return -1; // no restriction | ||
| } | ||
| /** | ||
| * Execute a resolved NCR action. | ||
| * | ||
| * @param {number} action — NCR_LEVEL value | ||
| * @param {string} token — raw token (e.g. '#38') for error messages | ||
| * @param {number} cp — codepoint, used only for error messages | ||
| * @returns {string|undefined} | ||
| * - decoded character string → 'allow' | ||
| * - '' → 'remove' | ||
| * - undefined → 'leave' (caller must skip past '&' only) | ||
| * - throws Error → 'throw' | ||
| */ | ||
| _applyNCRAction(action, token, cp) { | ||
| switch (action) { | ||
| case NCR_LEVEL.allow: return String.fromCodePoint(cp); | ||
| case NCR_LEVEL.remove: return ''; | ||
| case NCR_LEVEL.leave: return undefined; // signal: keep literal | ||
| case NCR_LEVEL.throw: | ||
| throw new Error( | ||
| `[EntityDecoder] Prohibited numeric character reference ` + | ||
| `&${token}; (U+${cp.toString(16).toUpperCase().padStart(4, '0')})` | ||
| ); | ||
| default: return String.fromCodePoint(cp); | ||
| } | ||
| } | ||
| /** | ||
| * Full NCR resolution pipeline for a numeric token. | ||
| * | ||
| * Steps: | ||
| * 1. Parse the codepoint (decimal or hex). | ||
| * 2. Validate the raw codepoint range (NaN, <0, >0x10FFFF). | ||
| * 3. If numericAllowed is false and no minimum restriction applies → leave as-is. | ||
| * 4. Classify the codepoint to find the minimum required action level. | ||
| * 5. Resolve effective action = max(onNCR, minimum). | ||
| * 6. Apply and return. | ||
| * | ||
| * @param {string} token — e.g. '#38', '#x26', '#X26' | ||
| * @returns {string|undefined} | ||
| * - string (incl. '') — replacement ('' = remove) | ||
| * - undefined — leave original &token; as-is | ||
| */ | ||
| _resolveNumeric(token) { | ||
| _resolveNCR(token) { | ||
| // Step 1: parse codepoint | ||
| const second = token.charCodeAt(1); | ||
| let codePoint; | ||
| if (second === 120 || second === 88) { | ||
| // &#xHH; or &#XHH; — hex | ||
| // token is like 'x0026' — slice off 'x', leading zeros handled by parseInt | ||
| codePoint = parseInt(token.slice(2), 16); | ||
| let cp; | ||
| if (second === 120 /* x */ || second === 88 /* X */) { | ||
| cp = parseInt(token.slice(2), 16); | ||
| } else { | ||
| // &#NNN; — decimal | ||
| // token is like '0038' | ||
| codePoint = parseInt(token.slice(1), 10); | ||
| cp = parseInt(token.slice(1), 10); | ||
| } | ||
| if (Number.isNaN(codePoint) || codePoint < 0 || codePoint > 0x10FFFF) { | ||
| return undefined; // invalid — leave as-is | ||
| } | ||
| // Step 2: out-of-range → leave as-is unconditionally | ||
| if (Number.isNaN(cp) || cp < 0 || cp > 0x10FFFF) return undefined; | ||
| return String.fromCodePoint(codePoint); | ||
| // Step 3: classify to get minimum action level | ||
| const minimum = this._classifyNCR(cp); | ||
| // Step 4: if numericAllowed is false and no hard minimum → leave | ||
| if (!this._numericAllowed && minimum < NCR_LEVEL.remove) return undefined; | ||
| // Step 5: effective action = max(configured onNCR, range minimum) | ||
| const effective = minimum === -1 | ||
| ? this._ncrOnLevel | ||
| : Math.max(this._ncrOnLevel, minimum); | ||
| // Step 6: apply | ||
| return this._applyNCRAction(effective, token, cp); | ||
| } | ||
| } |
+55
-10
@@ -95,13 +95,4 @@ // --------------------------------------------------------------------------- | ||
| export interface EntityDecoderOptions { | ||
| export interface EntityDecoderLimitOptions { | ||
| /** | ||
| * Extra named entities merged into the **base map** (trusted, counts as `'base'` tier). | ||
| * These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`). | ||
| * Values containing `&` are silently skipped to prevent recursive expansion. | ||
| * | ||
| * @default null | ||
| */ | ||
| namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null; | ||
| /** | ||
| * Maximum number of entity references expanded **per document**. | ||
@@ -131,4 +122,36 @@ * `0` means unlimited. | ||
| applyLimitsTo?: ApplyLimitsTo; | ||
| } | ||
| export interface EntityDecoderNCROptions { | ||
| /** | ||
| * XML version used for NCR classification. | ||
| * @default 1.0 | ||
| */ | ||
| xmlVersion?: 1.0 | 1.1; | ||
| /** | ||
| * Base action for all numeric references. | ||
| * @default 'allow' | ||
| */ | ||
| onNCR?: 'allow' | 'leave' | 'remove' | 'throw'; | ||
| /** | ||
| * Action for null NCR (U+0000). | ||
| * @default 'remove' | ||
| */ | ||
| nullNCR?: 'remove' | 'throw'; | ||
| } | ||
| export interface EntityDecoderOptions { | ||
| /** | ||
| * Extra named entities merged into the **base map** (trusted, counts as `'base'` tier). | ||
| * These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`). | ||
| * Values containing `&` are silently skipped to prevent recursive expansion. | ||
| * | ||
| * @default null | ||
| */ | ||
| namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null; | ||
| /** | ||
| * Hook called once on the fully decoded string (after all replacements). | ||
@@ -151,2 +174,24 @@ * | ||
| numericAllowed?: boolean; | ||
| /** | ||
| * Array of entity names or numeric references to leave unexpanded. | ||
| * @default [] | ||
| */ | ||
| leave?: string[]; | ||
| /** | ||
| * Array of entity names or numeric references to remove. | ||
| * @default [] | ||
| */ | ||
| remove?: string[]; | ||
| /** | ||
| * Security limits for entity expansion. | ||
| */ | ||
| limit?: EntityDecoderLimitOptions; | ||
| /** | ||
| * Numeric Character Reference (NCR) policy. | ||
| */ | ||
| ncr?: EntityDecoderNCROptions; | ||
| } | ||
@@ -153,0 +198,0 @@ |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
60255
15.85%2106
9.4%0
-100%