🚀. Socket Launch Week Day 3:Socket Firewall Now Blocks Malicious VS Code and Open VSX Extensions.Learn more
Sign In

@nodable/entities

Package Overview
Dependencies
Maintainers
1
Versions
7
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@nodable/entities - npm Package Compare versions

Comparing version
2.0.0
to
2.1.0
+8
-3
package.json
{
"name": "@nodable/entities",
"version": "2.0.0",
"description": "Entity parser for XML, HTML, External entites with security controls",
"version": "2.1.0",
"description": "Entity parser for XML, HTML, External entites with security and NCR control",
"main": "./src/index.js",

@@ -27,3 +27,8 @@ "type": "module",

"html",
"entity"
"entity",
"encode",
"decode",
"ncr",
"security",
"performance"
],

@@ -30,0 +35,0 @@ "author": "Amit Gupta (https://solothought.com)",

@@ -88,2 +88,31 @@ // ---------------------------------------------------------------------------

// ---------------------------------------------------------------------------
// NCR (Numeric Character Reference) classification
// ---------------------------------------------------------------------------
// Severity order — higher number = stricter action.
// Used to enforce minimum action levels for specific codepoint ranges.
const NCR_LEVEL = Object.freeze({ allow: 0, leave: 1, remove: 2, throw: 3 });
// XML 1.0 §2.2: allowed chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
// Restricted C0: U+0001–U+001F excluding U+0009, U+000A, U+000D
const XML10_ALLOWED_C0 = new Set([0x09, 0x0A, 0x0D]);
/**
* Parse the `ncr` constructor option into flat, hot-path-friendly fields.
* @param {object|undefined} ncr
* @returns {{ xmlVersion: number, onLevel: number, nullLevel: number }}
*/
function parseNCRConfig(ncr) {
if (!ncr) {
return { xmlVersion: 1.0, onLevel: NCR_LEVEL.allow, nullLevel: NCR_LEVEL.remove };
}
const xmlVersion = ncr.xmlVersion === 1.1 ? 1.1 : 1.0;
const onLevel = NCR_LEVEL[ncr.onNCR] ?? NCR_LEVEL.allow;
const nullLevel = NCR_LEVEL[ncr.nullNCR] ?? NCR_LEVEL.remove;
// 'allow' is not meaningful for null — clamp to at least 'remove'
const clampedNull = Math.max(nullLevel, NCR_LEVEL.remove);
return { xmlVersion, onLevel, nullLevel: clampedNull };
}
// ---------------------------------------------------------------------------
// EntityReplacer

@@ -121,5 +150,6 @@ // ---------------------------------------------------------------------------

* @param {object|null} [options.namedEntities] — extra named entities merged into base map
* @param {number} [options.maxTotalExpansions=0] — 0 = unlimited
* @param {number} [options.maxExpandedLength=0] — 0 = unlimited
* @param {'external'|'base'|'all'|string[]} [options.applyLimitsTo='external']
* @param {object} [options.limit] — security limits
* @param {number} [options.limit.maxTotalExpansions=0] — 0 = unlimited
* @param {number} [options.limit.maxExpandedLength=0] — 0 = unlimited
* @param {'external'|'base'|'all'|string[]} [options.limit.applyLimitsTo='external']
* Which entity tiers count against the security limits:

@@ -131,8 +161,22 @@ * - 'external' (default) — only input/runtime + persistent external entities

* @param {((resolved: string, original: string) => string)|null} [options.postCheck=null]
* @param {string[]} [options.remove=[]] — entity names (e.g. ['nbsp', '#13']) to delete (replace with empty string)
* @param {string[]} [options.leave=[]] — entity names to keep as literal (unchanged in output)
* @param {object} [options.ncr] — Numeric Character Reference controls
* @param {1.0|1.1} [options.ncr.xmlVersion=1.0]
* XML version governing which codepoint ranges are restricted:
* - 1.0 — C0 controls U+0001–U+001F (except U+0009/000A/000D) are prohibited
* - 1.1 — C0 controls are allowed when written as NCRs; C1 (U+007F–U+009F) decoded as-is
* @param {'allow'|'leave'|'remove'|'throw'} [options.ncr.onNCR='allow']
* Base action for numeric references. Severity order: allow < leave < remove < throw.
* For codepoint ranges that carry a minimum level (surrogates → remove, XML 1.0 C0 → remove),
* the effective action is max(onNCR, rangeMinimum).
* @param {'remove'|'throw'} [options.ncr.nullNCR='remove']
* Action for U+0000 (null). 'allow' and 'leave' are clamped to 'remove' since null is never safe.
*/
constructor(options = {}) {
this._maxTotalExpansions = options.maxTotalExpansions || 0;
this._maxExpandedLength = options.maxExpandedLength || 0;
this._limit = options.limit || {};
this._maxTotalExpansions = this._limit.maxTotalExpansions || 0;
this._maxExpandedLength = this._limit.maxExpandedLength || 0;
this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r;
this._limitTiers = parseLimitTiers(options.applyLimitsTo ?? LIMIT_TIER_EXTERNAL);
this._limitTiers = parseLimitTiers(this._limit.applyLimitsTo ?? LIMIT_TIER_EXTERNAL);
this._numericAllowed = options.numericAllowed ?? true;

@@ -154,2 +198,14 @@ // Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction.

this._expandedLength = 0;
// --- New: remove / leave sets ---
/** @type {Set<string>} */
this._removeSet = new Set(options.remove && Array.isArray(options.remove) ? options.remove : []);
/** @type {Set<string>} */
this._leaveSet = new Set(options.leave && Array.isArray(options.leave) ? options.leave : []);
// --- NCR config (parsed into flat fields for hot-path speed) ---
const ncrCfg = parseNCRConfig(options.ncr);
this._ncrXmlVersion = ncrCfg.xmlVersion;
this._ncrOnLevel = ncrCfg.onLevel;
this._ncrNullLevel = ncrCfg.nullLevel;
}

@@ -219,2 +275,15 @@

// -------------------------------------------------------------------------
// XML version (can be set after construction, e.g. once parser reads <?xml?>)
// -------------------------------------------------------------------------
/**
* Update the XML version used for NCR classification.
* Call this as soon as the document's `<?xml version="...">` declaration is parsed.
* @param {1.0|1.1|number} version
*/
setXmlVersion(version) {
this._ncrXmlVersion = version === 1.1 ? 1.1 : 1.0;
}
// -------------------------------------------------------------------------
// Primary API

@@ -267,5 +336,25 @@ // -------------------------------------------------------------------------

if (token.charCodeAt(0) === 35 /* '#' */ && this._numericAllowed) {
// ---- Numeric reference — base tier ----
replacement = this._resolveNumeric(token);
if (this._removeSet.has(token)) {
// Remove entity: replace with empty string
replacement = '';
// If entity was unknown (replacement undefined), we still need a tier for limits.
// Treat as external tier because it's user-directed removal of an unknown reference.
if (tier === undefined) {
tier = LIMIT_TIER_EXTERNAL;
}
} else if (this._leaveSet.has(token)) {
// Do not replace — keep original &token; as literal
i++;
continue;
} else if (token.charCodeAt(0) === 35 /* '#' */) {
// ---- Numeric / NCR reference ----
// NCR classification always runs first — prohibited codepoints must be
// caught regardless of numericAllowed.
const ncrResult = this._resolveNCR(token);
if (ncrResult === undefined) {
// 'leave' action — keep original &token; as-is
i++;
continue;
}
replacement = ncrResult; // '' for remove, char string for allow
tier = LIMIT_TIER_BASE;

@@ -364,29 +453,98 @@ } else {

/**
* Resolve a numeric entity token (the part after '&', including '#').
* Handles &#NNN; and &#xHH; (case-insensitive x).
* Classify a codepoint and return the minimum action level that must be applied.
* Returns -1 when no minimum is imposed (normal allow path).
*
* Ranges checked (in priority order):
* 1. U+0000 — null, governed by nullNCR (always ≥ remove)
* 2. U+D800–U+DFFF — surrogates, always prohibited (min: remove)
* 3. U+0001–U+001F \ {0x09,0x0A,0x0D} — XML 1.0 restricted C0 (min: remove)
* (skipped in XML 1.1 — C0 controls are allowed when written as NCRs)
*
* @param {number} cp — codepoint
* @returns {number} — minimum NCR_LEVEL value, or -1 for no restriction
*/
_classifyNCR(cp) {
// 1. Null
if (cp === 0) return this._ncrNullLevel;
// 2. Surrogates — always prohibited, minimum 'remove'
if (cp >= 0xD800 && cp <= 0xDFFF) return NCR_LEVEL.remove;
// 3. XML 1.0 restricted C0 controls
if (this._ncrXmlVersion === 1.0) {
if (cp >= 0x01 && cp <= 0x1F && !XML10_ALLOWED_C0.has(cp)) return NCR_LEVEL.remove;
}
return -1; // no restriction
}
/**
* Execute a resolved NCR action.
*
* @param {number} action — NCR_LEVEL value
* @param {string} token — raw token (e.g. '#38') for error messages
* @param {number} cp — codepoint, used only for error messages
* @returns {string|undefined}
* - decoded character string → 'allow'
* - '' → 'remove'
* - undefined → 'leave' (caller must skip past '&' only)
* - throws Error → 'throw'
*/
_applyNCRAction(action, token, cp) {
switch (action) {
case NCR_LEVEL.allow: return String.fromCodePoint(cp);
case NCR_LEVEL.remove: return '';
case NCR_LEVEL.leave: return undefined; // signal: keep literal
case NCR_LEVEL.throw:
throw new Error(
`[EntityDecoder] Prohibited numeric character reference ` +
`&${token}; (U+${cp.toString(16).toUpperCase().padStart(4, '0')})`
);
default: return String.fromCodePoint(cp);
}
}
/**
* Full NCR resolution pipeline for a numeric token.
*
* Steps:
* 1. Parse the codepoint (decimal or hex).
* 2. Validate the raw codepoint range (NaN, <0, >0x10FFFF).
* 3. If numericAllowed is false and no minimum restriction applies → leave as-is.
* 4. Classify the codepoint to find the minimum required action level.
* 5. Resolve effective action = max(onNCR, minimum).
* 6. Apply and return.
*
* @param {string} token — e.g. '#38', '#x26', '#X26'
* @returns {string|undefined}
* - string (incl. '') — replacement ('' = remove)
* - undefined — leave original &token; as-is
*/
_resolveNumeric(token) {
_resolveNCR(token) {
// Step 1: parse codepoint
const second = token.charCodeAt(1);
let codePoint;
if (second === 120 || second === 88) {
// &#xHH; or &#XHH; — hex
// token is like 'x0026' — slice off 'x', leading zeros handled by parseInt
codePoint = parseInt(token.slice(2), 16);
let cp;
if (second === 120 /* x */ || second === 88 /* X */) {
cp = parseInt(token.slice(2), 16);
} else {
// &#NNN; — decimal
// token is like '0038'
codePoint = parseInt(token.slice(1), 10);
cp = parseInt(token.slice(1), 10);
}
if (Number.isNaN(codePoint) || codePoint < 0 || codePoint > 0x10FFFF) {
return undefined; // invalid — leave as-is
}
// Step 2: out-of-range → leave as-is unconditionally
if (Number.isNaN(cp) || cp < 0 || cp > 0x10FFFF) return undefined;
return String.fromCodePoint(codePoint);
// Step 3: classify to get minimum action level
const minimum = this._classifyNCR(cp);
// Step 4: if numericAllowed is false and no hard minimum → leave
if (!this._numericAllowed && minimum < NCR_LEVEL.remove) return undefined;
// Step 5: effective action = max(configured onNCR, range minimum)
const effective = minimum === -1
? this._ncrOnLevel
: Math.max(this._ncrOnLevel, minimum);
// Step 6: apply
return this._applyNCRAction(effective, token, cp);
}
}

@@ -95,13 +95,4 @@ // ---------------------------------------------------------------------------

export interface EntityDecoderOptions {
export interface EntityDecoderLimitOptions {
/**
* Extra named entities merged into the **base map** (trusted, counts as `'base'` tier).
* These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`).
* Values containing `&` are silently skipped to prevent recursive expansion.
*
* @default null
*/
namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null;
/**
* Maximum number of entity references expanded **per document**.

@@ -131,4 +122,36 @@ * `0` means unlimited.

applyLimitsTo?: ApplyLimitsTo;
}
export interface EntityDecoderNCROptions {
/**
* XML version used for NCR classification.
* @default 1.0
*/
xmlVersion?: 1.0 | 1.1;
/**
* Base action for all numeric references.
* @default 'allow'
*/
onNCR?: 'allow' | 'leave' | 'remove' | 'throw';
/**
* Action for null NCR (U+0000).
* @default 'remove'
*/
nullNCR?: 'remove' | 'throw';
}
export interface EntityDecoderOptions {
/**
* Extra named entities merged into the **base map** (trusted, counts as `'base'` tier).
* These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`).
* Values containing `&` are silently skipped to prevent recursive expansion.
*
* @default null
*/
namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null;
/**
* Hook called once on the fully decoded string (after all replacements).

@@ -151,2 +174,24 @@ *

numericAllowed?: boolean;
/**
* Array of entity names or numeric references to leave unexpanded.
* @default []
*/
leave?: string[];
/**
* Array of entity names or numeric references to remove.
* @default []
*/
remove?: string[];
/**
* Security limits for entity expansion.
*/
limit?: EntityDecoderLimitOptions;
/**
* Numeric Character Reference (NCR) policy.
*/
ncr?: EntityDecoderNCROptions;
}

@@ -153,0 +198,0 @@