@nodable/entities
Advanced tools
+1177
| // --------------------------------------------------------------------------- | ||
| // Complete HTML5 named entity reference | ||
| // Organized by logical categories for easy maintenance and selective importing | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Basic Latin & Special Characters | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const BASIC_LATIN = { | ||
| amp: '&', | ||
| AMP: '&', | ||
| lt: '<', | ||
| LT: '<', | ||
| gt: '>', | ||
| GT: '>', | ||
| quot: '"', | ||
| QUOT: '"', | ||
| apos: "'", | ||
| lsquo: '‘', | ||
| rsquo: '’', | ||
| ldquo: '“', | ||
| rdquo: '”', | ||
| lsquor: '‚', | ||
| rsquor: '’', | ||
| ldquor: '„', | ||
| bdquo: '„', | ||
| comma: ',', | ||
| period: '.', | ||
| colon: ':', | ||
| semi: ';', | ||
| excl: '!', | ||
| quest: '?', | ||
| num: '#', | ||
| dollar: '$', | ||
| percent: '%', | ||
| amp: '&', | ||
| ast: '*', | ||
| commat: '@', | ||
| lowbar: '_', | ||
| verbar: '|', | ||
| vert: '|', | ||
| sol: '/', | ||
| bsol: '\\', | ||
| lbrace: '{', | ||
| rbrace: '}', | ||
| lbrack: '[', | ||
| rbrack: ']', | ||
| lpar: '(', | ||
| rpar: ')', | ||
| nbsp: '\u00a0', | ||
| iexcl: '¡', | ||
| cent: '¢', | ||
| pound: '£', | ||
| curren: '¤', | ||
| yen: '¥', | ||
| brvbar: '¦', | ||
| sect: '§', | ||
| uml: '¨', | ||
| copy: '©', | ||
| COPY: '©', | ||
| ordf: 'ª', | ||
| laquo: '«', | ||
| not: '¬', | ||
| shy: '\u00ad', | ||
| reg: '®', | ||
| REG: '®', | ||
| macr: '¯', | ||
| deg: '°', | ||
| plusmn: '±', | ||
| sup2: '²', | ||
| sup3: '³', | ||
| acute: '´', | ||
| micro: 'µ', | ||
| para: '¶', | ||
| middot: '·', | ||
| cedil: '¸', | ||
| sup1: '¹', | ||
| ordm: 'º', | ||
| raquo: '»', | ||
| frac14: '¼', | ||
| frac12: '½', | ||
| half: '½', | ||
| frac34: '¾', | ||
| iquest: '¿', | ||
| times: '×', | ||
| div: '÷', | ||
| divide: '÷', | ||
| }; | ||
| /** | ||
| * Latin Extended & Accented Letters (A-Z) | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const LATIN_ACCENTS = { | ||
| Agrave: 'À', | ||
| agrave: 'à', | ||
| Aacute: 'Á', | ||
| aacute: 'á', | ||
| Acirc: 'Â', | ||
| acirc: 'â', | ||
| Atilde: 'Ã', | ||
| atilde: 'ã', | ||
| Auml: 'Ä', | ||
| auml: 'ä', | ||
| Aring: 'Å', | ||
| aring: 'å', | ||
| AElig: 'Æ', | ||
| aelig: 'æ', | ||
| Ccedil: 'Ç', | ||
| ccedil: 'ç', | ||
| Egrave: 'È', | ||
| egrave: 'è', | ||
| Eacute: 'É', | ||
| eacute: 'é', | ||
| Ecirc: 'Ê', | ||
| ecirc: 'ê', | ||
| Euml: 'Ë', | ||
| euml: 'ë', | ||
| Igrave: 'Ì', | ||
| igrave: 'ì', | ||
| Iacute: 'Í', | ||
| iacute: 'í', | ||
| Icirc: 'Î', | ||
| icirc: 'î', | ||
| Iuml: 'Ï', | ||
| iuml: 'ï', | ||
| ETH: 'Ð', | ||
| eth: 'ð', | ||
| Ntilde: 'Ñ', | ||
| ntilde: 'ñ', | ||
| Ograve: 'Ò', | ||
| ograve: 'ò', | ||
| Oacute: 'Ó', | ||
| oacute: 'ó', | ||
| Ocirc: 'Ô', | ||
| ocirc: 'ô', | ||
| Otilde: 'Õ', | ||
| otilde: 'õ', | ||
| Ouml: 'Ö', | ||
| ouml: 'ö', | ||
| Oslash: 'Ø', | ||
| oslash: 'ø', | ||
| Ugrave: 'Ù', | ||
| ugrave: 'ù', | ||
| Uacute: 'Ú', | ||
| uacute: 'ú', | ||
| Ucirc: 'Û', | ||
| ucirc: 'û', | ||
| Uuml: 'Ü', | ||
| uuml: 'ü', | ||
| Yacute: 'Ý', | ||
| yacute: 'ý', | ||
| THORN: 'Þ', | ||
| thorn: 'þ', | ||
| szlig: 'ß', | ||
| yuml: 'ÿ', | ||
| Yuml: 'Ÿ', | ||
| }; | ||
| /** | ||
| * Latin Extended (Letters with diacritics) | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const LATIN_EXTENDED = { | ||
| Amacr: 'Ā', | ||
| amacr: 'ā', | ||
| Abreve: 'Ă', | ||
| abreve: 'ă', | ||
| Aogon: 'Ą', | ||
| aogon: 'ą', | ||
| Cacute: 'Ć', | ||
| cacute: 'ć', | ||
| Ccirc: 'Ĉ', | ||
| ccirc: 'ĉ', | ||
| Cdot: 'Ċ', | ||
| cdot: 'ċ', | ||
| Ccaron: 'Č', | ||
| ccaron: 'č', | ||
| Dcaron: 'Ď', | ||
| dcaron: 'ď', | ||
| Dstrok: 'Đ', | ||
| dstrok: 'đ', | ||
| Emacr: 'Ē', | ||
| emacr: 'ē', | ||
| Ecaron: 'Ě', | ||
| ecaron: 'ě', | ||
| Edot: 'Ė', | ||
| edot: 'ė', | ||
| Eogon: 'Ę', | ||
| eogon: 'ę', | ||
| Gcirc: 'Ĝ', | ||
| gcirc: 'ĝ', | ||
| Gbreve: 'Ğ', | ||
| gbreve: 'ğ', | ||
| Gdot: 'Ġ', | ||
| gdot: 'ġ', | ||
| Gcedil: 'Ģ', | ||
| Hcirc: 'Ĥ', | ||
| hcirc: 'ĥ', | ||
| Hstrok: 'Ħ', | ||
| hstrok: 'ħ', | ||
| Itilde: 'Ĩ', | ||
| itilde: 'ĩ', | ||
| Imacr: 'Ī', | ||
| imacr: 'ī', | ||
| Iogon: 'Į', | ||
| iogon: 'į', | ||
| Idot: 'İ', | ||
| IJlig: 'IJ', | ||
| ijlig: 'ij', | ||
| Jcirc: 'Ĵ', | ||
| jcirc: 'ĵ', | ||
| Kcedil: 'Ķ', | ||
| kcedil: 'ķ', | ||
| kgreen: 'ĸ', | ||
| Lacute: 'Ĺ', | ||
| lacute: 'ĺ', | ||
| Lcedil: 'Ļ', | ||
| lcedil: 'ļ', | ||
| Lcaron: 'Ľ', | ||
| lcaron: 'ľ', | ||
| Lmidot: 'Ŀ', | ||
| lmidot: 'ŀ', | ||
| Lstrok: 'Ł', | ||
| lstrok: 'ł', | ||
| Nacute: 'Ń', | ||
| nacute: 'ń', | ||
| Ncaron: 'Ň', | ||
| ncaron: 'ň', | ||
| Ncedil: 'Ņ', | ||
| ncedil: 'ņ', | ||
| ENG: 'Ŋ', | ||
| eng: 'ŋ', | ||
| Omacr: 'Ō', | ||
| omacr: 'ō', | ||
| Odblac: 'Ő', | ||
| odblac: 'ő', | ||
| OElig: 'Œ', | ||
| oelig: 'œ', | ||
| Racute: 'Ŕ', | ||
| racute: 'ŕ', | ||
| Rcaron: 'Ř', | ||
| rcaron: 'ř', | ||
| Rcedil: 'Ŗ', | ||
| rcedil: 'ŗ', | ||
| Sacute: 'Ś', | ||
| sacute: 'ś', | ||
| Scirc: 'Ŝ', | ||
| scirc: 'ŝ', | ||
| Scedil: 'Ş', | ||
| scedil: 'ş', | ||
| Scaron: 'Š', | ||
| scaron: 'š', | ||
| Tcedil: 'Ţ', | ||
| tcedil: 'ţ', | ||
| Tcaron: 'Ť', | ||
| tcaron: 'ť', | ||
| Tstrok: 'Ŧ', | ||
| tstrok: 'ŧ', | ||
| Utilde: 'Ũ', | ||
| utilde: 'ũ', | ||
| Umacr: 'Ū', | ||
| umacr: 'ū', | ||
| Ubreve: 'Ŭ', | ||
| ubreve: 'ŭ', | ||
| Uring: 'Ů', | ||
| uring: 'ů', | ||
| Udblac: 'Ű', | ||
| udblac: 'ű', | ||
| Uogon: 'Ų', | ||
| uogon: 'ų', | ||
| Wcirc: 'Ŵ', | ||
| wcirc: 'ŵ', | ||
| Ycirc: 'Ŷ', | ||
| ycirc: 'ŷ', | ||
| Zacute: 'Ź', | ||
| zacute: 'ź', | ||
| Zdot: 'Ż', | ||
| zdot: 'ż', | ||
| Zcaron: 'Ž', | ||
| zcaron: 'ž', | ||
| }; | ||
| /** | ||
| * Greek Letters | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const GREEK = { | ||
| Alpha: 'Α', | ||
| alpha: 'α', | ||
| Beta: 'Β', | ||
| beta: 'β', | ||
| Gamma: 'Γ', | ||
| gamma: 'γ', | ||
| Delta: 'Δ', | ||
| delta: 'δ', | ||
| Epsilon: 'Ε', | ||
| epsilon: 'ε', | ||
| epsiv: 'ϵ', | ||
| varepsilon: 'ϵ', | ||
| Zeta: 'Ζ', | ||
| zeta: 'ζ', | ||
| Eta: 'Η', | ||
| eta: 'η', | ||
| Theta: 'Θ', | ||
| theta: 'θ', | ||
| thetasym: 'ϑ', | ||
| vartheta: 'ϑ', | ||
| Iota: 'Ι', | ||
| iota: 'ι', | ||
| Kappa: 'Κ', | ||
| kappa: 'κ', | ||
| kappav: 'ϰ', | ||
| varkappa: 'ϰ', | ||
| Lambda: 'Λ', | ||
| lambda: 'λ', | ||
| Mu: 'Μ', | ||
| mu: 'μ', | ||
| Nu: 'Ν', | ||
| nu: 'ν', | ||
| Xi: 'Ξ', | ||
| xi: 'ξ', | ||
| Omicron: 'Ο', | ||
| omicron: 'ο', | ||
| Pi: 'Π', | ||
| pi: 'π', | ||
| piv: 'ϖ', | ||
| varpi: 'ϖ', | ||
| Rho: 'Ρ', | ||
| rho: 'ρ', | ||
| rhov: 'ϱ', | ||
| varrho: 'ϱ', | ||
| Sigma: 'Σ', | ||
| sigma: 'σ', | ||
| sigmaf: 'ς', | ||
| sigmav: 'ς', | ||
| varsigma: 'ς', | ||
| Tau: 'Τ', | ||
| tau: 'τ', | ||
| Upsilon: 'Υ', | ||
| upsilon: 'υ', | ||
| upsi: 'υ', | ||
| Upsi: 'ϒ', | ||
| upsih: 'ϒ', | ||
| Phi: 'Φ', | ||
| phi: 'φ', | ||
| phiv: 'ϕ', | ||
| varphi: 'ϕ', | ||
| Chi: 'Χ', | ||
| chi: 'χ', | ||
| Psi: 'Ψ', | ||
| psi: 'ψ', | ||
| Omega: 'Ω', | ||
| omega: 'ω', | ||
| ohm: 'Ω', | ||
| Gammad: 'Ϝ', | ||
| gammad: 'ϝ', | ||
| digamma: 'ϝ', | ||
| }; | ||
| /** | ||
| * Cyrillic Letters | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const CYRILLIC = { | ||
| Afr: '𝔄', | ||
| afr: '𝔞', | ||
| Acy: 'А', | ||
| acy: 'а', | ||
| Bcy: 'Б', | ||
| bcy: 'б', | ||
| Vcy: 'В', | ||
| vcy: 'в', | ||
| Gcy: 'Г', | ||
| gcy: 'г', | ||
| Dcy: 'Д', | ||
| dcy: 'д', | ||
| IEcy: 'Е', | ||
| iecy: 'е', | ||
| IOcy: 'Ё', | ||
| iocy: 'ё', | ||
| ZHcy: 'Ж', | ||
| zhcy: 'ж', | ||
| Zcy: 'З', | ||
| zcy: 'з', | ||
| Icy: 'И', | ||
| icy: 'и', | ||
| Jcy: 'Й', | ||
| jcy: 'й', | ||
| Kcy: 'К', | ||
| kcy: 'к', | ||
| Lcy: 'Л', | ||
| lcy: 'л', | ||
| Mcy: 'М', | ||
| mcy: 'м', | ||
| Ncy: 'Н', | ||
| ncy: 'н', | ||
| Ocy: 'О', | ||
| ocy: 'о', | ||
| Pcy: 'П', | ||
| pcy: 'п', | ||
| Rcy: 'Р', | ||
| rcy: 'р', | ||
| Scy: 'С', | ||
| scy: 'с', | ||
| Tcy: 'Т', | ||
| tcy: 'т', | ||
| Ucy: 'У', | ||
| ucy: 'у', | ||
| Fcy: 'Ф', | ||
| fcy: 'ф', | ||
| KHcy: 'Х', | ||
| khcy: 'х', | ||
| TScy: 'Ц', | ||
| tscy: 'ц', | ||
| CHcy: 'Ч', | ||
| chcy: 'ч', | ||
| SHcy: 'Ш', | ||
| shcy: 'ш', | ||
| SHCHcy: 'Щ', | ||
| shchcy: 'щ', | ||
| HARDcy: 'Ъ', | ||
| hardcy: 'ъ', | ||
| Ycy: 'Ы', | ||
| ycy: 'ы', | ||
| SOFTcy: 'Ь', | ||
| softcy: 'ь', | ||
| Ecy: 'Э', | ||
| ecy: 'э', | ||
| YUcy: 'Ю', | ||
| yucy: 'ю', | ||
| YAcy: 'Я', | ||
| yacy: 'я', | ||
| DJcy: 'Ђ', | ||
| djcy: 'ђ', | ||
| GJcy: 'Ѓ', | ||
| gjcy: 'ѓ', | ||
| Jukcy: 'Є', | ||
| jukcy: 'є', | ||
| DScy: 'Ѕ', | ||
| dscy: 'ѕ', | ||
| Iukcy: 'І', | ||
| iukcy: 'і', | ||
| YIcy: 'Ї', | ||
| yicy: 'ї', | ||
| Jsercy: 'Ј', | ||
| jsercy: 'ј', | ||
| LJcy: 'Љ', | ||
| ljcy: 'љ', | ||
| NJcy: 'Њ', | ||
| njcy: 'њ', | ||
| TSHcy: 'Ћ', | ||
| tshcy: 'ћ', | ||
| KJcy: 'Ќ', | ||
| kjcy: 'ќ', | ||
| Ubrcy: 'Ў', | ||
| ubrcy: 'ў', | ||
| DZcy: 'Џ', | ||
| dzcy: 'џ', | ||
| }; | ||
| /** | ||
| * Mathematical Operators & Relations | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const MATH = { | ||
| plus: '+', | ||
| minus: '−', | ||
| mnplus: '∓', | ||
| mp: '∓', | ||
| pm: '±', | ||
| times: '×', | ||
| div: '÷', | ||
| divide: '÷', | ||
| sdot: '⋅', | ||
| star: '☆', | ||
| starf: '★', | ||
| bigstar: '★', | ||
| lowast: '∗', | ||
| ast: '*', | ||
| midast: '*', | ||
| compfn: '∘', | ||
| smallcircle: '∘', | ||
| bullet: '•', | ||
| bull: '•', | ||
| nbsp: '\u00a0', | ||
| hellip: '…', | ||
| mldr: '…', | ||
| prime: '′', | ||
| Prime: '″', | ||
| tprime: '‴', | ||
| bprime: '‵', | ||
| backprime: '‵', | ||
| minus: '−', | ||
| minusd: '∸', | ||
| dotminus: '∸', | ||
| plusdo: '∔', | ||
| dotplus: '∔', | ||
| plusmn: '±', | ||
| minusplus: '∓', | ||
| mnplus: '∓', | ||
| mp: '∓', | ||
| setminus: '∖', | ||
| smallsetminus: '∖', | ||
| Backslash: '∖', | ||
| setmn: '∖', | ||
| ssetmn: '∖', | ||
| lowbar: '_', | ||
| verbar: '|', | ||
| vert: '|', | ||
| VerticalLine: '|', | ||
| colon: ':', | ||
| Colon: '∷', | ||
| Proportion: '∷', | ||
| ratio: '∶', | ||
| equals: '=', | ||
| ne: '≠', | ||
| nequiv: '≢', | ||
| equiv: '≡', | ||
| Congruent: '≡', | ||
| sim: '∼', | ||
| thicksim: '∼', | ||
| thksim: '∼', | ||
| sime: '≃', | ||
| simeq: '≃', | ||
| TildeEqual: '≃', | ||
| asymp: '≈', | ||
| approx: '≈', | ||
| thickapprox: '≈', | ||
| thkap: '≈', | ||
| TildeTilde: '≈', | ||
| ncong: '≇', | ||
| cong: '≅', | ||
| TildeFullEqual: '≅', | ||
| asympeq: '≍', | ||
| CupCap: '≍', | ||
| bump: '≎', | ||
| Bumpeq: '≎', | ||
| HumpDownHump: '≎', | ||
| bumpe: '≏', | ||
| bumpeq: '≏', | ||
| HumpEqual: '≏', | ||
| dotminus: '∸', | ||
| minusd: '∸', | ||
| plusdo: '∔', | ||
| dotplus: '∔', | ||
| le: '≤', | ||
| LessEqual: '≤', | ||
| ge: '≥', | ||
| GreaterEqual: '≥', | ||
| lesseqgtr: '⋚', | ||
| lesseqqgtr: '⪋', | ||
| greater: '>', | ||
| less: '<', | ||
| }; | ||
| /** | ||
| * Mathematical Operators (Advanced) | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const MATH_ADVANCED = { | ||
| alefsym: 'ℵ', | ||
| aleph: 'ℵ', | ||
| beth: 'ℶ', | ||
| gimel: 'ℷ', | ||
| daleth: 'ℸ', | ||
| forall: '∀', | ||
| ForAll: '∀', | ||
| part: '∂', | ||
| PartialD: '∂', | ||
| exist: '∃', | ||
| Exists: '∃', | ||
| nexist: '∄', | ||
| nexists: '∄', | ||
| empty: '∅', | ||
| emptyset: '∅', | ||
| emptyv: '∅', | ||
| varnothing: '∅', | ||
| nabla: '∇', | ||
| Del: '∇', | ||
| isin: '∈', | ||
| isinv: '∈', | ||
| in: '∈', | ||
| Element: '∈', | ||
| notin: '∉', | ||
| notinva: '∉', | ||
| ni: '∋', | ||
| niv: '∋', | ||
| SuchThat: '∋', | ||
| ReverseElement: '∋', | ||
| notni: '∌', | ||
| notniva: '∌', | ||
| prod: '∏', | ||
| Product: '∏', | ||
| coprod: '∐', | ||
| Coproduct: '∐', | ||
| sum: '∑', | ||
| Sum: '∑', | ||
| minus: '−', | ||
| mp: '∓', | ||
| plusdo: '∔', | ||
| dotplus: '∔', | ||
| setminus: '∖', | ||
| lowast: '∗', | ||
| radic: '√', | ||
| Sqrt: '√', | ||
| prop: '∝', | ||
| propto: '∝', | ||
| Proportional: '∝', | ||
| varpropto: '∝', | ||
| infin: '∞', | ||
| infintie: '⧝', | ||
| ang: '∠', | ||
| angle: '∠', | ||
| angmsd: '∡', | ||
| measuredangle: '∡', | ||
| angsph: '∢', | ||
| mid: '∣', | ||
| VerticalBar: '∣', | ||
| nmid: '∤', | ||
| nsmid: '∤', | ||
| npar: '∦', | ||
| parallel: '∥', | ||
| spar: '∥', | ||
| nparallel: '∦', | ||
| nspar: '∦', | ||
| and: '∧', | ||
| wedge: '∧', | ||
| or: '∨', | ||
| vee: '∨', | ||
| cap: '∩', | ||
| cup: '∪', | ||
| int: '∫', | ||
| Integral: '∫', | ||
| conint: '∮', | ||
| ContourIntegral: '∮', | ||
| Conint: '∯', | ||
| DoubleContourIntegral: '∯', | ||
| Cconint: '∰', | ||
| there4: '∴', | ||
| therefore: '∴', | ||
| Therefore: '∴', | ||
| becaus: '∵', | ||
| because: '∵', | ||
| Because: '∵', | ||
| ratio: '∶', | ||
| Proportion: '∷', | ||
| minusd: '∸', | ||
| dotminus: '∸', | ||
| mDDot: '∺', | ||
| homtht: '∻', | ||
| sim: '∼', | ||
| bsimg: '∽', | ||
| backsim: '∽', | ||
| ac: '∾', | ||
| mstpos: '∾', | ||
| acd: '∿', | ||
| VerticalTilde: '≀', | ||
| wr: '≀', | ||
| wreath: '≀', | ||
| nsime: '≄', | ||
| nsimeq: '≄', | ||
| nsimeq: '≄', | ||
| ncong: '≇', | ||
| simne: '≆', | ||
| ncongdot: '⩭̸', | ||
| ngsim: '≵', | ||
| nsim: '≁', | ||
| napprox: '≉', | ||
| nap: '≉', | ||
| ngeq: '≱', | ||
| nge: '≱', | ||
| nleq: '≰', | ||
| nle: '≰', | ||
| ngtr: '≯', | ||
| ngt: '≯', | ||
| nless: '≮', | ||
| nlt: '≮', | ||
| nprec: '⊀', | ||
| npr: '⊀', | ||
| nsucc: '⊁', | ||
| nsc: '⊁', | ||
| }; | ||
| /** | ||
| * Arrows | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const ARROWS = { | ||
| larr: '←', | ||
| leftarrow: '←', | ||
| LeftArrow: '←', | ||
| uarr: '↑', | ||
| uparrow: '↑', | ||
| UpArrow: '↑', | ||
| rarr: '→', | ||
| rightarrow: '→', | ||
| RightArrow: '→', | ||
| darr: '↓', | ||
| downarrow: '↓', | ||
| DownArrow: '↓', | ||
| harr: '↔', | ||
| leftrightarrow: '↔', | ||
| LeftRightArrow: '↔', | ||
| varr: '↕', | ||
| updownarrow: '↕', | ||
| UpDownArrow: '↕', | ||
| nwarr: '↖', | ||
| nwarrow: '↖', | ||
| UpperLeftArrow: '↖', | ||
| nearr: '↗', | ||
| nearrow: '↗', | ||
| UpperRightArrow: '↗', | ||
| searr: '↘', | ||
| searrow: '↘', | ||
| LowerRightArrow: '↘', | ||
| swarr: '↙', | ||
| swarrow: '↙', | ||
| LowerLeftArrow: '↙', | ||
| lArr: '⇐', | ||
| Leftarrow: '⇐', | ||
| uArr: '⇑', | ||
| Uparrow: '⇑', | ||
| rArr: '⇒', | ||
| Rightarrow: '⇒', | ||
| dArr: '⇓', | ||
| Downarrow: '⇓', | ||
| hArr: '⇔', | ||
| Leftrightarrow: '⇔', | ||
| iff: '⇔', | ||
| vArr: '⇕', | ||
| Updownarrow: '⇕', | ||
| lAarr: '⇚', | ||
| Lleftarrow: '⇚', | ||
| rAarr: '⇛', | ||
| Rrightarrow: '⇛', | ||
| lrarr: '⇆', | ||
| leftrightarrows: '⇆', | ||
| rlarr: '⇄', | ||
| rightleftarrows: '⇄', | ||
| lrhar: '⇋', | ||
| leftrightharpoons: '⇋', | ||
| ReverseEquilibrium: '⇋', | ||
| rlhar: '⇌', | ||
| rightleftharpoons: '⇌', | ||
| Equilibrium: '⇌', | ||
| udarr: '⇅', | ||
| UpArrowDownArrow: '⇅', | ||
| duarr: '⇵', | ||
| DownArrowUpArrow: '⇵', | ||
| llarr: '⇇', | ||
| leftleftarrows: '⇇', | ||
| rrarr: '⇉', | ||
| rightrightarrows: '⇉', | ||
| ddarr: '⇊', | ||
| downdownarrows: '⇊', | ||
| har: '↽', | ||
| lhard: '↽', | ||
| leftharpoondown: '↽', | ||
| lharu: '↼', | ||
| leftharpoonup: '↼', | ||
| rhard: '⇁', | ||
| rightharpoondown: '⇁', | ||
| rharu: '⇀', | ||
| rightharpoonup: '⇀', | ||
| lsh: '↰', | ||
| Lsh: '↰', | ||
| rsh: '↱', | ||
| Rsh: '↱', | ||
| ldsh: '↲', | ||
| rdsh: '↳', | ||
| hookleftarrow: '↩', | ||
| hookrightarrow: '↪', | ||
| mapstoleft: '↤', | ||
| mapstoup: '↥', | ||
| map: '↦', | ||
| mapsto: '↦', | ||
| mapstodown: '↧', | ||
| crarr: '↵', | ||
| nwarrow: '↖', | ||
| nearrow: '↗', | ||
| searrow: '↘', | ||
| swarrow: '↙', | ||
| nleftarrow: '↚', | ||
| nleftrightarrow: '↮', | ||
| nrightarrow: '↛', | ||
| nrarr: '↛', | ||
| larrtl: '↢', | ||
| rarrtl: '↣', | ||
| leftarrowtail: '↢', | ||
| rightarrowtail: '↣', | ||
| twoheadleftarrow: '↞', | ||
| twoheadrightarrow: '↠', | ||
| Larr: '↞', | ||
| Rarr: '↠', | ||
| larrhk: '↩', | ||
| rarrhk: '↪', | ||
| larrlp: '↫', | ||
| looparrowleft: '↫', | ||
| rarrlp: '↬', | ||
| looparrowright: '↬', | ||
| harrw: '↭', | ||
| leftrightsquigarrow: '↭', | ||
| nrarrw: '↝̸', | ||
| rarrw: '↝', | ||
| rightsquigarrow: '↝', | ||
| larrbfs: '⤟', | ||
| rarrbfs: '⤠', | ||
| nvHarr: '⤄', | ||
| nvlArr: '⤂', | ||
| nvrArr: '⤃', | ||
| larrfs: '⤝', | ||
| rarrfs: '⤞', | ||
| Map: '⤅', | ||
| larrsim: '⥳', | ||
| rarrsim: '⥴', | ||
| harrcir: '⥈', | ||
| Uarrocir: '⥉', | ||
| lurdshar: '⥊', | ||
| ldrdhar: '⥧', | ||
| ldrushar: '⥋', | ||
| rdldhar: '⥩', | ||
| lrhard: '⥭', | ||
| rlhar: '⇌', | ||
| uharr: '↾', | ||
| uharl: '↿', | ||
| dharr: '⇂', | ||
| dharl: '⇃', | ||
| Uarr: '↟', | ||
| Darr: '↡', | ||
| zigrarr: '⇝', | ||
| nwArr: '⇖', | ||
| neArr: '⇗', | ||
| seArr: '⇘', | ||
| swArr: '⇙', | ||
| nharr: '↮', | ||
| nhArr: '⇎', | ||
| nlarr: '↚', | ||
| nlArr: '⇍', | ||
| nrarr: '↛', | ||
| nrArr: '⇏', | ||
| larrb: '⇤', | ||
| LeftArrowBar: '⇤', | ||
| rarrb: '⇥', | ||
| RightArrowBar: '⇥', | ||
| }; | ||
| /** | ||
| * Geometric Shapes | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const SHAPES = { | ||
| square: '□', | ||
| Square: '□', | ||
| squ: '□', | ||
| squf: '▪', | ||
| squarf: '▪', | ||
| blacksquar: '▪', | ||
| blacksquare: '▪', | ||
| FilledVerySmallSquare: '▪', | ||
| blk34: '▓', | ||
| blk12: '▒', | ||
| blk14: '░', | ||
| block: '█', | ||
| srect: '▭', | ||
| rect: '▭', | ||
| sdot: '⋅', | ||
| sdotb: '⊡', | ||
| dotsquare: '⊡', | ||
| triangle: '▵', | ||
| tri: '▵', | ||
| trine: '▵', | ||
| utri: '▵', | ||
| triangledown: '▿', | ||
| dtri: '▿', | ||
| tridown: '▿', | ||
| triangleleft: '◃', | ||
| ltri: '◃', | ||
| triangleright: '▹', | ||
| rtri: '▹', | ||
| blacktriangle: '▴', | ||
| utrif: '▴', | ||
| blacktriangledown: '▾', | ||
| dtrif: '▾', | ||
| blacktriangleleft: '◂', | ||
| ltrif: '◂', | ||
| blacktriangleright: '▸', | ||
| rtrif: '▸', | ||
| loz: '◊', | ||
| lozenge: '◊', | ||
| blacklozenge: '⧫', | ||
| lozf: '⧫', | ||
| bigcirc: '◯', | ||
| xcirc: '◯', | ||
| circ: 'ˆ', | ||
| Circle: '○', | ||
| cir: '○', | ||
| o: '○', | ||
| bullet: '•', | ||
| bull: '•', | ||
| hellip: '…', | ||
| mldr: '…', | ||
| nldr: '‥', | ||
| boxh: '─', | ||
| HorizontalLine: '─', | ||
| boxv: '│', | ||
| boxdr: '┌', | ||
| boxdl: '┐', | ||
| boxur: '└', | ||
| boxul: '┘', | ||
| boxvr: '├', | ||
| boxvl: '┤', | ||
| boxhd: '┬', | ||
| boxhu: '┴', | ||
| boxvh: '┼', | ||
| boxH: '═', | ||
| boxV: '║', | ||
| boxdR: '╒', | ||
| boxDr: '╓', | ||
| boxDR: '╔', | ||
| boxDl: '╕', | ||
| boxdL: '╖', | ||
| boxDL: '╗', | ||
| boxuR: '╘', | ||
| boxUr: '╙', | ||
| boxUR: '╚', | ||
| boxUl: '╜', | ||
| boxuL: '╛', | ||
| boxUL: '╝', | ||
| boxvR: '╞', | ||
| boxVr: '╟', | ||
| boxVR: '╠', | ||
| boxVl: '╢', | ||
| boxvL: '╡', | ||
| boxVL: '╣', | ||
| boxHd: '╤', | ||
| boxhD: '╥', | ||
| boxHD: '╦', | ||
| boxHu: '╧', | ||
| boxhU: '╨', | ||
| boxHU: '╩', | ||
| boxvH: '╪', | ||
| boxVh: '╫', | ||
| boxVH: '╬', | ||
| }; | ||
| /** | ||
| * Punctuation & Diacritics | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const PUNCTUATION = { | ||
| excl: '!', | ||
| iexcl: '¡', | ||
| brvbar: '¦', | ||
| sect: '§', | ||
| uml: '¨', | ||
| copy: '©', | ||
| ordf: 'ª', | ||
| laquo: '«', | ||
| not: '¬', | ||
| shy: '\u00ad', | ||
| reg: '®', | ||
| macr: '¯', | ||
| deg: '°', | ||
| plusmn: '±', | ||
| sup2: '²', | ||
| sup3: '³', | ||
| acute: '´', | ||
| micro: 'µ', | ||
| para: '¶', | ||
| middot: '·', | ||
| cedil: '¸', | ||
| sup1: '¹', | ||
| ordm: 'º', | ||
| raquo: '»', | ||
| frac14: '¼', | ||
| frac12: '½', | ||
| frac34: '¾', | ||
| iquest: '¿', | ||
| nbsp: '\u00a0', | ||
| comma: ',', | ||
| period: '.', | ||
| colon: ':', | ||
| semi: ';', | ||
| vert: '|', | ||
| Verbar: '‖', | ||
| verbar: '|', | ||
| dblac: '˝', | ||
| circ: 'ˆ', | ||
| caron: 'ˇ', | ||
| breve: '˘', | ||
| dot: '˙', | ||
| ring: '˚', | ||
| ogon: '˛', | ||
| tilde: '˜', | ||
| DiacriticalGrave: '`', | ||
| DiacriticalAcute: '´', | ||
| DiacriticalTilde: '˜', | ||
| DiacriticalDot: '˙', | ||
| DiacriticalDoubleAcute: '˝', | ||
| grave: '`', | ||
| acute: '´', | ||
| }; | ||
| /** | ||
| * Currency Symbols | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const CURRENCY = { | ||
| cent: '¢', | ||
| pound: '£', | ||
| curren: '¤', | ||
| yen: '¥', | ||
| euro: '€', | ||
| dollar: '$', | ||
| euro: '€', | ||
| fnof: 'ƒ', | ||
| inr: '₹', | ||
| af: '؋', | ||
| birr: 'ብር', | ||
| peso: '₱', | ||
| rub: '₽', | ||
| won: '₩', | ||
| yuan: '¥', | ||
| cedil: '¸', | ||
| }; | ||
| /** | ||
| * Fractions | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const FRACTIONS = { | ||
| frac12: '½', | ||
| half: '½', | ||
| frac13: '⅓', | ||
| frac14: '¼', | ||
| frac15: '⅕', | ||
| frac16: '⅙', | ||
| frac18: '⅛', | ||
| frac23: '⅔', | ||
| frac25: '⅖', | ||
| frac34: '¾', | ||
| frac35: '⅗', | ||
| frac38: '⅜', | ||
| frac45: '⅘', | ||
| frac56: '⅚', | ||
| frac58: '⅝', | ||
| frac78: '⅞', | ||
| frasl: '⁄', | ||
| }; | ||
| /** | ||
| * Miscellaneous Symbols | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const MISC_SYMBOLS = { | ||
| trade: '™', | ||
| TRADE: '™', | ||
| telrec: '⌕', | ||
| target: '⌖', | ||
| ulcorn: '⌜', | ||
| ulcorner: '⌜', | ||
| urcorn: '⌝', | ||
| urcorner: '⌝', | ||
| dlcorn: '⌞', | ||
| llcorner: '⌞', | ||
| drcorn: '⌟', | ||
| lrcorner: '⌟', | ||
| intercal: '⊺', | ||
| intcal: '⊺', | ||
| oplus: '⊕', | ||
| CirclePlus: '⊕', | ||
| ominus: '⊖', | ||
| CircleMinus: '⊖', | ||
| otimes: '⊗', | ||
| CircleTimes: '⊗', | ||
| osol: '⊘', | ||
| odot: '⊙', | ||
| CircleDot: '⊙', | ||
| oast: '⊛', | ||
| circledast: '⊛', | ||
| odash: '⊝', | ||
| circleddash: '⊝', | ||
| ocirc: '⊚', | ||
| circledcirc: '⊚', | ||
| boxplus: '⊞', | ||
| plusb: '⊞', | ||
| boxminus: '⊟', | ||
| minusb: '⊟', | ||
| boxtimes: '⊠', | ||
| timesb: '⊠', | ||
| boxdot: '⊡', | ||
| sdotb: '⊡', | ||
| veebar: '⊻', | ||
| vee: '∨', | ||
| barvee: '⊽', | ||
| and: '∧', | ||
| wedge: '∧', | ||
| Cap: '⋒', | ||
| Cup: '⋓', | ||
| Fork: '⋔', | ||
| pitchfork: '⋔', | ||
| epar: '⋕', | ||
| ltlarr: '⥶', | ||
| nvap: '≍⃒', | ||
| nvsim: '∼⃒', | ||
| nvge: '≥⃒', | ||
| nvle: '≤⃒', | ||
| nvlt: '<⃒', | ||
| nvgt: '>⃒', | ||
| nvltrie: '⊴⃒', | ||
| nvrtrie: '⊵⃒', | ||
| Vdash: '⊩', | ||
| dashv: '⊣', | ||
| vDash: '⊨', | ||
| Vdash: '⊩', | ||
| Vvdash: '⊪', | ||
| nvdash: '⊬', | ||
| nvDash: '⊭', | ||
| nVdash: '⊮', | ||
| nVDash: '⊯', | ||
| }; | ||
| /** | ||
| * All entities combined (if you need everything) | ||
| * @type {Record<string, string>} | ||
| */ | ||
| export const ALL_ENTITIES = { | ||
| ...BASIC_LATIN, | ||
| ...LATIN_ACCENTS, | ||
| ...LATIN_EXTENDED, | ||
| ...GREEK, | ||
| ...CYRILLIC, | ||
| ...MATH, | ||
| ...MATH_ADVANCED, | ||
| ...ARROWS, | ||
| ...SHAPES, | ||
| ...PUNCTUATION, | ||
| ...CURRENCY, | ||
| ...FRACTIONS, | ||
| ...MISC_SYMBOLS, | ||
| }; | ||
| export const XML = { | ||
| amp: "&", | ||
| apos: "'", | ||
| gt: ">", | ||
| lt: "<", | ||
| quot: "\"" | ||
| } | ||
| export const COMMON_HTML = { | ||
| nbsp: '\u00a0', | ||
| copy: '\u00a9', | ||
| reg: '\u00ae', | ||
| trade: '\u2122', | ||
| mdash: '\u2014', | ||
| ndash: '\u2013', | ||
| hellip: '\u2026', | ||
| laquo: '\u00ab', | ||
| raquo: '\u00bb', | ||
| lsquo: '\u2018', | ||
| rsquo: '\u2019', | ||
| ldquo: '\u201c', | ||
| rdquo: '\u201d', | ||
| bull: '\u2022', | ||
| para: '\u00b6', | ||
| sect: '\u00a7', | ||
| deg: '\u00b0', | ||
| frac12: '\u00bd', | ||
| frac14: '\u00bc', | ||
| frac34: '\u00be', | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // Note: NUMERIC_ENTITIES (&#NNN; / &#xHH;) are handled by the scanner directly | ||
| // via String.fromCodePoint() without any map lookup. | ||
| // --------------------------------------------------------------------------- |
| // --------------------------------------------------------------------------- | ||
| // Built-in named entity map (name → replacement string) | ||
| // No regex, no {regex,val} objects — just flat key/value pairs. | ||
| // --------------------------------------------------------------------------- | ||
| import { XML as DEFAULT_XML_ENTITIES } from "./entities.js" | ||
| // --------------------------------------------------------------------------- | ||
| // Helpers | ||
| // --------------------------------------------------------------------------- | ||
| const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+'); | ||
| /** | ||
| * Validate that an entity name contains no dangerous characters. | ||
| * @param {string} name | ||
| * @returns {string} the name, unchanged | ||
| * @throws {Error} on invalid characters | ||
| */ | ||
| function validateEntityName(name) { | ||
| if (name[0] === '#') { | ||
| throw new Error(`[EntityReplacer] Invalid character '#' in entity name: "${name}"`); | ||
| } | ||
| for (const ch of name) { | ||
| if (SPECIAL_CHARS.has(ch)) { | ||
| throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`); | ||
| } | ||
| } | ||
| return name; | ||
| } | ||
| /** | ||
| * Merge one or more entity maps into a flat name→string map. | ||
| * Accepts either: | ||
| * - plain string values: { amp: '&' } | ||
| * - legacy {regex,val} / {regx,val}: { lt: { regex: /.../, val: '<' } } | ||
| * | ||
| * Values containing '&' are skipped (recursive expansion risk). | ||
| * | ||
| * @param {...object} maps | ||
| * @returns {Record<string, string>} | ||
| */ | ||
| function mergeEntityMaps(...maps) { | ||
| const out = Object.create(null); | ||
| for (const map of maps) { | ||
| if (!map) continue; | ||
| for (const key of Object.keys(map)) { | ||
| const raw = map[key]; | ||
| if (typeof raw === 'string') { | ||
| out[key] = raw; | ||
| } else if (raw && typeof raw === 'object' && raw.val !== undefined) { | ||
| // Legacy {regex,val} or {regx,val} — extract the string val only | ||
| const val = raw.val; | ||
| if (typeof val === 'string') { | ||
| out[key] = val; | ||
| } | ||
| // function vals are not supported in the scanner — skip | ||
| } | ||
| } | ||
| } | ||
| return out; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // applyLimitsTo helpers | ||
| // --------------------------------------------------------------------------- | ||
| const LIMIT_TIER_EXTERNAL = 'external'; // input/runtime + persistent external maps | ||
| const LIMIT_TIER_BASE = 'base'; // DEFAULT_XML_ENTITIES + namedEntities (system) maps | ||
| const LIMIT_TIER_ALL = 'all'; // every entity regardless of tier | ||
| /** | ||
| * Resolve `applyLimitsTo` option into a normalised Set of tier strings. | ||
| * Accepted values: 'external' | 'base' | 'all' | string[] | ||
| * Default: 'external' (only untrusted injected entities are counted). | ||
| * @param {string|string[]|undefined} raw | ||
| * @returns {Set<string>} | ||
| */ | ||
| function parseLimitTiers(raw) { | ||
| if (!raw || raw === LIMIT_TIER_EXTERNAL) return new Set([LIMIT_TIER_EXTERNAL]); | ||
| if (raw === LIMIT_TIER_ALL) return new Set([LIMIT_TIER_ALL]); | ||
| if (raw === LIMIT_TIER_BASE) return new Set([LIMIT_TIER_BASE]); | ||
| if (Array.isArray(raw)) return new Set(raw); | ||
| return new Set([LIMIT_TIER_EXTERNAL]); // safe default for unrecognised values | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // EntityReplacer | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Single-pass, zero-regex entity replacer for XML/HTML content. | ||
| * | ||
| * Algorithm: scan the string once for '&', read to ';', resolve via map | ||
| * or direct codepoint conversion, build output chunks, join once at the end. | ||
| * | ||
| * Entity lookup priority (highest → lowest): | ||
| * 1. input / runtime (DOCTYPE entities for current document) | ||
| * 2. persistent external (survive across documents) | ||
| * 3. base named map (DEFAULT_XML_ENTITIES + user-supplied namedEntities) | ||
| * | ||
| * Both input and external resolve as the 'external' tier for limit purposes. | ||
| * Base map entities resolve as the 'base' tier. | ||
| * | ||
| * Numeric / hex references (&#NNN; / &#xHH;) are resolved directly via | ||
| * String.fromCodePoint() — no map needed. They count as 'base' tier. | ||
| * | ||
| * @example | ||
| * const replacer = new EntityReplacer({ namedEntities: COMMON_HTML }); | ||
| * replacer.setExternalEntities({ brand: 'Acme' }); | ||
| * | ||
| * const instance = replacer.reset(); | ||
| * instance.addInputEntities({ version: '1.0' }); | ||
| * instance.encode('&brand; v&version; <'); // 'Acme v1.0 <' | ||
| */ | ||
| export default class EntityDecoder { | ||
| /** | ||
| * @param {object} [options] | ||
| * @param {object|null} [options.namedEntities] — extra named entities merged into base map | ||
| * @param {number} [options.maxTotalExpansions=0] — 0 = unlimited | ||
| * @param {number} [options.maxExpandedLength=0] — 0 = unlimited | ||
| * @param {'external'|'base'|'all'|string[]} [options.applyLimitsTo='external'] | ||
| * Which entity tiers count against the security limits: | ||
| * - 'external' (default) — only input/runtime + persistent external entities | ||
| * - 'base' — only DEFAULT_XML_ENTITIES + namedEntities | ||
| * - 'all' — every entity regardless of tier | ||
| * - string[] — explicit combination, e.g. ['external', 'base'] | ||
| * @param {((resolved: string, original: string) => string)|null} [options.postCheck=null] | ||
| */ | ||
| constructor(options = {}) { | ||
| this._maxTotalExpansions = options.maxTotalExpansions || 0; | ||
| this._maxExpandedLength = options.maxExpandedLength || 0; | ||
| this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r; | ||
| this._limitTiers = parseLimitTiers(options.applyLimitsTo ?? LIMIT_TIER_EXTERNAL); | ||
| this._numericAllowed = options.numericAllowed ?? true; | ||
| // Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction. | ||
| this._baseMap = mergeEntityMaps(DEFAULT_XML_ENTITIES, options.namedEntities || null); | ||
| // Persistent external entities — survive across documents. | ||
| // Stored as a separate map so reset() never touches them. | ||
| /** @type {Record<string, string>} */ | ||
| this._externalMap = Object.create(null); | ||
| // Input / runtime entities — current document only, wiped on reset(). | ||
| /** @type {Record<string, string>} */ | ||
| this._inputMap = Object.create(null); | ||
| // Per-document counters | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Persistent external entity registration | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace the full set of persistent external entities. | ||
| * All keys are validated — throws on invalid characters. | ||
| * @param {Record<string, string | { regex?: RegExp, val: string }>} map | ||
| */ | ||
| setExternalEntities(map) { | ||
| if (map) { | ||
| for (const key of Object.keys(map)) { | ||
| validateEntityName(key); | ||
| } | ||
| } | ||
| this._externalMap = mergeEntityMaps(map); | ||
| } | ||
| /** | ||
| * Add a single persistent external entity. | ||
| * @param {string} key | ||
| * @param {string} value | ||
| */ | ||
| addExternalEntity(key, value) { | ||
| validateEntityName(key); | ||
| if (typeof value === 'string' && value.indexOf('&') === -1) { | ||
| this._externalMap[key] = value; | ||
| } | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Input / runtime entity registration (per document) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Inject DOCTYPE entities for the current document. | ||
| * Also resets per-document expansion counters. | ||
| * @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string }>} map | ||
| */ | ||
| addInputEntities(map) { | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| this._inputMap = mergeEntityMaps(map); | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Per-document reset | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Wipe input/runtime entities and reset counters. | ||
| * Call this before processing each new document. | ||
| * @returns {this} | ||
| */ | ||
| reset() { | ||
| this._inputMap = Object.create(null); | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| return this; | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Primary API | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace all entity references in `str` in a single pass. | ||
| * | ||
| * @param {string} str | ||
| * @returns {string} | ||
| */ | ||
| decode(str) { | ||
| if (typeof str !== 'string' || str.length === 0) return str; | ||
| //TODO: check if needed | ||
| //if (str.indexOf('&') === -1) return str; // fast path — no entities at all | ||
| const original = str; | ||
| const chunks = []; | ||
| const len = str.length; | ||
| let last = 0; // start of next unprocessed literal chunk | ||
| let i = 0; | ||
| const limitExpansions = this._maxTotalExpansions > 0; | ||
| const limitLength = this._maxExpandedLength > 0; | ||
| const checkLimits = limitExpansions || limitLength; | ||
| while (i < len) { | ||
| // Scan forward to next '&' | ||
| if (str.charCodeAt(i) !== 38 /* '&' */) { i++; continue; } | ||
| // --- Found '&' at position i --- | ||
| // Scan forward to ';' | ||
| let j = i + 1; | ||
| while (j < len && str.charCodeAt(j) !== 59 /* ';' */ && (j - i) <= 32) j++; | ||
| if (j >= len || str.charCodeAt(j) !== 59) { | ||
| // No closing ';' within window — treat '&' as literal | ||
| i++; | ||
| continue; | ||
| } | ||
| // Raw token between '&' and ';' (exclusive) | ||
| const token = str.slice(i + 1, j); | ||
| if (token.length === 0) { i++; continue; } | ||
| let replacement; | ||
| let tier; // which limit tier this entity belongs to | ||
| if (token.charCodeAt(0) === 35 /* '#' */ && this._numericAllowed) { | ||
| // ---- Numeric reference — base tier ---- | ||
| replacement = this._resolveNumeric(token); | ||
| tier = LIMIT_TIER_BASE; | ||
| } else { | ||
| // ---- Named reference ---- | ||
| const resolved = this._resolveName(token); | ||
| replacement = resolved?.value; | ||
| tier = resolved?.tier; | ||
| } | ||
| if (replacement === undefined) { | ||
| // Unknown entity — leave as-is, advance past '&' only | ||
| i++; | ||
| continue; | ||
| } | ||
| // Flush literal chunk before this entity | ||
| if (i > last) chunks.push(str.slice(last, i)); | ||
| chunks.push(replacement); | ||
| last = j + 1; // skip past ';' | ||
| i = last; | ||
| // Apply expansion limits only if this tier is being tracked | ||
| if (checkLimits && this._tierCounts(tier)) { | ||
| if (limitExpansions) { | ||
| this._totalExpansions++; | ||
| if (this._totalExpansions > this._maxTotalExpansions) { | ||
| throw new Error( | ||
| `[EntityReplacer] Entity expansion count limit exceeded: ` + | ||
| `${this._totalExpansions} > ${this._maxTotalExpansions}` | ||
| ); | ||
| } | ||
| } | ||
| if (limitLength) { | ||
| // delta: replacement.length minus the raw &token; length (token.length + 2 for '&' and ';') | ||
| const delta = replacement.length - (token.length + 2); | ||
| if (delta > 0) { | ||
| this._expandedLength += delta; | ||
| if (this._expandedLength > this._maxExpandedLength) { | ||
| throw new Error( | ||
| `[EntityReplacer] Expanded content length limit exceeded: ` + | ||
| `${this._expandedLength} > ${this._maxExpandedLength}` | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| // Flush trailing literal | ||
| if (last < len) chunks.push(str.slice(last)); | ||
| // If nothing was replaced, chunks is empty — return original | ||
| const result = chunks.length === 0 ? str : chunks.join(''); | ||
| return this._postCheck(result, original); | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Private: limit tier check | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Returns true if a resolved entity of the given tier should count | ||
| * against the expansion/length limits. | ||
| * @param {string} tier — LIMIT_TIER_EXTERNAL | LIMIT_TIER_BASE | ||
| * @returns {boolean} | ||
| */ | ||
| _tierCounts(tier) { | ||
| if (this._limitTiers.has(LIMIT_TIER_ALL)) return true; | ||
| return this._limitTiers.has(tier); | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Private: entity resolution | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Resolve a named entity token (without & and ;). | ||
| * Priority: inputMap > externalMap > baseMap | ||
| * Returns the resolved value tagged with its limit tier. | ||
| * | ||
| * @param {string} name | ||
| * @returns {{ value: string, tier: string }|undefined} | ||
| */ | ||
| _resolveName(name) { | ||
| // input and external both count as 'external' tier for limit purposes — | ||
| // they are injected at runtime and are the untrusted surface. | ||
| if (name in this._inputMap) return { value: this._inputMap[name], tier: LIMIT_TIER_EXTERNAL }; | ||
| if (name in this._externalMap) return { value: this._externalMap[name], tier: LIMIT_TIER_EXTERNAL }; | ||
| if (name in this._baseMap) return { value: this._baseMap[name], tier: LIMIT_TIER_BASE }; | ||
| return undefined; | ||
| } | ||
| /** | ||
| * Resolve a numeric entity token (the part after '&', including '#'). | ||
| * Handles &#NNN; and &#xHH; (case-insensitive x). | ||
| * | ||
| * @param {string} token — e.g. '#38', '#x26', '#X26' | ||
| * @returns {string|undefined} | ||
| */ | ||
| _resolveNumeric(token) { | ||
| const second = token.charCodeAt(1); | ||
| let codePoint; | ||
| if (second === 120 || second === 88) { | ||
| // &#xHH; or &#XHH; — hex | ||
| // token is like 'x0026' — slice off 'x', leading zeros handled by parseInt | ||
| codePoint = parseInt(token.slice(2), 16); | ||
| } else { | ||
| // &#NNN; — decimal | ||
| // token is like '0038' | ||
| codePoint = parseInt(token.slice(1), 10); | ||
| } | ||
| if (Number.isNaN(codePoint) || codePoint < 0 || codePoint > 0x10FFFF) { | ||
| return undefined; // invalid — leave as-is | ||
| } | ||
| return String.fromCodePoint(codePoint); | ||
| } | ||
| } |
| // EntityDecoder.js | ||
| import { trie1, trie2, trie3 } from './entityTries.js'; | ||
| // Replacement strings indexed by char code — direct array access, no hashing | ||
| const XML_UNSAFE_REPLACEMENT = new Array(128); | ||
| XML_UNSAFE_REPLACEMENT[38] = '&'; // & | ||
| XML_UNSAFE_REPLACEMENT[60] = '<'; // < | ||
| XML_UNSAFE_REPLACEMENT[62] = '>'; // > | ||
| XML_UNSAFE_REPLACEMENT[34] = '"'; // " | ||
| XML_UNSAFE_REPLACEMENT[39] = '''; // ' | ||
| // Typed bitmask for O(1) "is this ASCII code XML-unsafe?" check | ||
| const IS_XML_UNSAFE = new Uint8Array(128); | ||
| IS_XML_UNSAFE[38] = 1; | ||
| IS_XML_UNSAFE[60] = 1; | ||
| IS_XML_UNSAFE[62] = 1; | ||
| IS_XML_UNSAFE[34] = 1; | ||
| IS_XML_UNSAFE[39] = 1; | ||
| // Fast pre-scan: bail out immediately if nothing needs encoding | ||
| const NEEDS_PROCESSING = /[&<>"'\u0080-\uFFFF]/; | ||
| export default class EntityEncoder { | ||
| constructor(options = {}) { | ||
| this.encodeXmlSafe = options.encodeXmlSafe !== false; | ||
| this.encodeAllNamed = options.encodeAllNamed !== false; | ||
| this.maxReplacements = options.maxReplacements || 0; | ||
| this.replacementsCount = 0; | ||
| } | ||
| encode(str) { | ||
| if (typeof str !== 'string' || str.length === 0) return str; | ||
| if (!NEEDS_PROCESSING.test(str)) return str; | ||
| const maxRep = this.maxReplacements; | ||
| if (maxRep > 0 && this.replacementsCount >= maxRep) return str; | ||
| // Hoist to locals — avoids `this` property lookup inside the hot loop | ||
| const encodeXmlSafe = this.encodeXmlSafe; | ||
| const encodeAllNamed = this.encodeAllNamed; | ||
| const len = str.length; | ||
| let result = ''; | ||
| let last = 0; | ||
| let i = 0; | ||
| let limitReached = false; | ||
| // ── Main loop: runs to len-2 so trie3 never needs a bounds check ──────── | ||
| // The last 2 characters are handled by the tail block below. | ||
| const mainEnd = len - 2; // i <= mainEnd guarantees i+1 and i+2 are valid | ||
| while (i <= mainEnd && !limitReached) { | ||
| const c0 = str.charCodeAt(i); | ||
| // ── ASCII branch ─────────────────────────────────────────────────── | ||
| if (c0 < 128) { | ||
| if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) { | ||
| result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0]; | ||
| last = ++i; | ||
| if (maxRep > 0) { | ||
| this.replacementsCount++; | ||
| if (this.replacementsCount >= maxRep) { | ||
| limitReached = true; | ||
| break; | ||
| } | ||
| } | ||
| } else { | ||
| // Bulk-skip: advance to the next interesting position without | ||
| // touching the outer loop overhead on every safe character | ||
| i++; | ||
| while (i <= mainEnd && !limitReached) { | ||
| const c = str.charCodeAt(i); | ||
| if (c >= 128 || (encodeXmlSafe && IS_XML_UNSAFE[c] === 1)) break; | ||
| i++; | ||
| } | ||
| } | ||
| continue; | ||
| } | ||
| // ── Non-ASCII: integer-keyed trie lookup ─────────────────────────── | ||
| // No bounds checks needed for c1/c2 because i <= mainEnd guarantees | ||
| // i+1 and i+2 are both within the string. | ||
| let matchedEntity = null; | ||
| let advance = 1; | ||
| // Try 3-char match first (longest wins) | ||
| const mid3 = trie3.get(c0); | ||
| if (mid3 !== undefined) { | ||
| const c1 = str.charCodeAt(i + 1); | ||
| const inner3 = mid3.get(c1); | ||
| if (inner3 !== undefined) { | ||
| const c2 = str.charCodeAt(i + 2); | ||
| const candidate = inner3.get(c2); | ||
| if (candidate !== undefined) { matchedEntity = candidate; advance = 3; } | ||
| } | ||
| } | ||
| // Try 2-char match | ||
| if (matchedEntity === null) { | ||
| const inner2 = trie2.get(c0); | ||
| if (inner2 !== undefined) { | ||
| const c1 = str.charCodeAt(i + 1); | ||
| const candidate = inner2.get(c1); | ||
| if (candidate !== undefined) { matchedEntity = candidate; advance = 2; } | ||
| } | ||
| } | ||
| // Try 1-char match | ||
| if (matchedEntity === null && encodeAllNamed) { | ||
| const candidate = trie1.get(c0); | ||
| if (candidate !== undefined) { matchedEntity = candidate; } | ||
| } | ||
| if (matchedEntity !== null) { | ||
| result += str.substring(last, i) + matchedEntity; | ||
| i += advance; | ||
| last = i; | ||
| if (maxRep > 0) { | ||
| this.replacementsCount++; | ||
| if (this.replacementsCount >= maxRep) { | ||
| limitReached = true; | ||
| break; | ||
| } | ||
| } | ||
| } else { | ||
| i++; | ||
| } | ||
| } | ||
| // ── Tail: handle the last 1-2 characters (no 3-char match possible) ──── | ||
| while (i < len && !limitReached) { | ||
| const c0 = str.charCodeAt(i); | ||
| if (c0 < 128) { | ||
| if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) { | ||
| result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0]; | ||
| last = ++i; | ||
| if (maxRep > 0) { | ||
| this.replacementsCount++; | ||
| if (this.replacementsCount >= maxRep) { | ||
| limitReached = true; | ||
| break; | ||
| } | ||
| } | ||
| } else { | ||
| i++; | ||
| } | ||
| continue; | ||
| } | ||
| // Non-ASCII tail — only 2-char and 1-char matches are possible here | ||
| let matchedEntity = null; | ||
| let advance = 1; | ||
| if (i + 1 < len) { | ||
| const inner2 = trie2.get(c0); | ||
| if (inner2 !== undefined) { | ||
| const c1 = str.charCodeAt(i + 1); | ||
| const candidate = inner2.get(c1); | ||
| if (candidate !== undefined) { matchedEntity = candidate; advance = 2; } | ||
| } | ||
| } | ||
| if (matchedEntity === null && encodeAllNamed) { | ||
| const candidate = trie1.get(c0); | ||
| if (candidate !== undefined) { matchedEntity = candidate; } | ||
| } | ||
| if (matchedEntity !== null) { | ||
| result += str.substring(last, i) + matchedEntity; | ||
| i += advance; | ||
| last = i; | ||
| if (maxRep > 0) { | ||
| this.replacementsCount++; | ||
| if (this.replacementsCount >= maxRep) { | ||
| limitReached = true; | ||
| break; | ||
| } | ||
| } | ||
| } else { | ||
| i++; | ||
| } | ||
| } | ||
| // ── Flush any remaining literal suffix ──────────────────────────────── | ||
| if (last < len) result += str.substring(last); | ||
| return result; | ||
| } | ||
| reset() { | ||
| this.replacementsCount = 0; | ||
| } | ||
| } |
| // entityTries.js | ||
| // Builds integer-keyed tries so the decoder never allocates a string object | ||
| // during lookup — every key is a plain charCode number. | ||
| // | ||
| // trie1: Map<code0, entity> | ||
| // trie2: Map<code0, Map<code1, entity>> | ||
| // trie3: Map<code0, Map<code1, Map<code2, entity>>> | ||
| import { ALL_ENTITIES } from './entities.js'; | ||
| // Reverse map: character sequence → "&name;" | ||
| const CHAR_TO_ENTITY = new Map(); | ||
| for (const [name, chars] of Object.entries(ALL_ENTITIES)) { | ||
| CHAR_TO_ENTITY.set(chars, `&${name};`); | ||
| } | ||
| export const trie1 = new Map(); // code0 → entity string | ||
| export const trie2 = new Map(); // code0 → Map → entity string | ||
| export const trie3 = new Map(); // code0 → Map → Map → entity string | ||
| for (const [chars, entity] of CHAR_TO_ENTITY) { | ||
| const len = chars.length; | ||
| if (len === 1) { | ||
| const c0 = chars.charCodeAt(0); | ||
| // Keep shortest match only if no longer match already claimed this code | ||
| // (longer matches are inserted in the same pass so we just overwrite — | ||
| // trie1 is only consulted after trie2/trie3 both miss, so no conflict) | ||
| trie1.set(c0, entity); | ||
| } else if (len === 2) { | ||
| const c0 = chars.charCodeAt(0); | ||
| const c1 = chars.charCodeAt(1); | ||
| let inner = trie2.get(c0); | ||
| if (inner === undefined) { inner = new Map(); trie2.set(c0, inner); } | ||
| inner.set(c1, entity); | ||
| } else if (len === 3) { | ||
| const c0 = chars.charCodeAt(0); | ||
| const c1 = chars.charCodeAt(1); | ||
| const c2 = chars.charCodeAt(2); | ||
| let mid = trie3.get(c0); | ||
| if (mid === undefined) { mid = new Map(); trie3.set(c0, mid); } | ||
| let inner = mid.get(c1); | ||
| if (inner === undefined) { inner = new Map(); mid.set(c1, inner); } | ||
| inner.set(c2, entity); | ||
| } | ||
| // HTML5 has no named entity whose character sequence is longer than 3 chars | ||
| } |
+2
-2
| { | ||
| "name": "@nodable/entities", | ||
| "version": "1.1.0", | ||
| "description": "Replace XML, HTML, External entites with security controls", | ||
| "version": "2.0.0", | ||
| "description": "Entity parser for XML, HTML, External entites with security controls", | ||
| "main": "./src/index.js", | ||
@@ -6,0 +6,0 @@ "type": "module", |
+23
-417
@@ -1,435 +0,41 @@ | ||
| # `@nodable/entities` | ||
| # @nodable/entities | ||
| Standalone, zero-dependency XML/HTML entity replacement with: | ||
| Fast, zero-dependency XML/HTML entity encoder and decoder for Node.js. | ||
| - **5 entity categories** processed in a fixed, predictable order | ||
| - **Persistent vs. input entity separation** — no state leaks between documents | ||
| - **`reset()`** — clean per-document reset without cloning | ||
| - **Composable named entity groups** (HTML, currency, math, arrows, numeric refs) | ||
| - **Security limits** — cap total expansions and expanded length per document | ||
| - **Granular limit targeting** — apply limits to any subset of categories | ||
| - **`postCheck` hook** — inspect or sanitize the fully resolved string | ||
| ## Install | ||
| --- | ||
| ## Installation | ||
| ```sh | ||
| ```bash | ||
| npm install @nodable/entities | ||
| ``` | ||
| --- | ||
| ## Quick start | ||
| ## Quick Start | ||
| ```js | ||
| import EntityReplacer from '@nodable/entities'; | ||
| import { EntityEncoder, EntityDecoder, ALL_ENTITIES } from '@nodable/entities'; | ||
| const replacer = new EntityReplacer({ default: true }); | ||
| // Encode: plain text → entity references | ||
| const enc = new EntityEncoder(); | ||
| enc.encode('Hello © 2024 & <stuff>'); | ||
| // → 'Hello © 2024 & <stuff>' | ||
| replacer.replace('5 < 10 && x > 0'); | ||
| // → '5 < 10 && x > 0' | ||
| // Decode: entity references → plain text | ||
| const dec = new EntityDecoder({ namedEntities: ALL_ENTITIES }); | ||
| dec.decode('Hello © 2024 & <stuff>'); | ||
| // → 'Hello © 2024 & <stuff>' | ||
| ``` | ||
| With named entity groups: | ||
| ## Performance | ||
| ```js | ||
| import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities'; | ||
| | | encode | decode | | ||
| |---|---|---| | ||
| | `entities` (npm) | 3.65 M req/s | 1.76 M req/s | | ||
| | `@nodable/entities` | 3.33 M req/s | **5.19 M req/s** | | ||
| const replacer = new EntityReplacer({ | ||
| default: true, | ||
| system: { ...COMMON_HTML, ...CURRENCY_ENTITIES }, | ||
| }); | ||
| ## Documentation | ||
| replacer.replace('© 2024 — Price: £9.99'); | ||
| // → '© 2024 — Price: £9.99' | ||
| ``` | ||
| - [EntityEncoder](docs/EntityEncoder.md) — options, API, recipes | ||
| - [EntityDecoder](docs/EntityDecoder.md) — options, API, security limits, entity sets | ||
| --- | ||
| ## Entity Categories | ||
| Entities are processed in this fixed order — not configurable: | ||
| ``` | ||
| persistent input/runtime → external → system → default → amp | ||
| ``` | ||
| ### `persistent external` — Caller-supplied configuration entities | ||
| Entities set at configuration time that survive across all documents. Never wiped by `reset()`. Set via `setExternalEntities()` or `addExternalEntity()` / `addEntity()`. | ||
| ```js | ||
| const replacer = new EntityReplacer({ default: true }); | ||
| replacer.setExternalEntities({ brand: 'Acme Corp', product: 'Widget Pro' }); | ||
| replacer.replace('&brand; makes &product;'); | ||
| // → 'Acme Corp makes Widget Pro' | ||
| ``` | ||
| ### `input / runtime` — Per-document DOCTYPE entities | ||
| Entities injected by the parser from the document's DOCTYPE block. Stored separately from persistent entities and **wiped on every `reset()` call** so they cannot leak between documents. | ||
| Set via `addInputEntities()`. Never call this manually — `BaseOutputBuilder` calls it automatically. | ||
| ### `system` — Named entity groups | ||
| Opt-in. Trusted programmer-supplied groups. Compose freely: | ||
| ```js | ||
| import { | ||
| COMMON_HTML, | ||
| CURRENCY_ENTITIES, | ||
| MATH_ENTITIES, | ||
| ARROW_ENTITIES, | ||
| NUMERIC_ENTITIES, | ||
| } from '@nodable/entities'; | ||
| const replacer = new EntityReplacer({ | ||
| system: { ...COMMON_HTML, ...MATH_ENTITIES }, | ||
| }); | ||
| ``` | ||
| | Group | Contents | | ||
| |---------------------|----------| | ||
| | `COMMON_HTML` | ` ` `©` `®` `™` `—` `–` `…` `«` `»` `‘` `’` `“` `”` `•` `¶` `§` `°` `½` `¼` `¾` | | ||
| | `CURRENCY_ENTITIES` | `¢` `£` `¥` `€` `&inr;` `¤` `ƒ` | | ||
| | `MATH_ENTITIES` | `×` `÷` `±` `−` `²` `³` `‰` `∞` `∑` `∏` `√` `≠` `≤` `≥` | | ||
| | `ARROW_ENTITIES` | `←` `↑` `→` `↓` `↔` `⇐` `⇑` `⇒` `⇓` `⇔` | | ||
| | `NUMERIC_ENTITIES` | `&#NNN;` decimal and `&#xHH;` hex refs — any valid Unicode code point | | ||
| ### `default` — Built-in XML entities | ||
| Always on unless explicitly disabled. | ||
| | Entity | Output | | ||
| |----------|--------| | ||
| | `<` | `<` | | ||
| | `>` | `>` | | ||
| | `"` | `"` | | ||
| | `'` | `'` | | ||
| ### `amp` — Final pass | ||
| `&` → `&` | ||
| Processed **after all other categories** to prevent double-expansion: | ||
| - `&lt;` → `<` ✓ (not `<`) | ||
| - `&amp;` → `&` ✓ (not `&`) | ||
| --- | ||
| ## Constructor API | ||
| ```js | ||
| const replacer = new EntityReplacer({ | ||
| // Category toggles | ||
| default: true, // true (default) | false | custom EntityTable object | ||
| amp: true, // true (default) | false | null | ||
| system: false, // false (default) | true for COMMON_HTML | EntityTable object | ||
| // Security limits — 0 = unlimited | ||
| maxTotalExpansions: 0, | ||
| maxExpandedLength: 0, | ||
| // Which categories count against the limits | ||
| applyLimitsTo: 'external', // 'external' (default) | 'all' | ['external', 'system'] | ... | ||
| // Post-processing hook — fires once on the fully resolved string | ||
| postCheck: resolved => resolved, // (resolved: string, original: string) => string | ||
| }); | ||
| ``` | ||
| --- | ||
| ## EntityReplacer Instance Methods | ||
| ### `replace(str)` | ||
| Replace all entity references in `str`. Returns `str` unchanged (same reference) if no `&` is present — fast path. | ||
| ```js | ||
| replacer.replace('Tom & Jerry <cartoons>'); | ||
| // → 'Tom & Jerry <cartoons>' | ||
| ``` | ||
| ### `setExternalEntities(map)` | ||
| Replace the full set of **persistent** external entities. These survive across all documents and are not cleared by `reset()`. | ||
| ```js | ||
| replacer.setExternalEntities({ brand: 'Acme', year: '2025' }); | ||
| ``` | ||
| Calling this a second time replaces the entire persistent map. Values containing `&` are silently skipped. | ||
| ### `addExternalEntity(key, value)` | ||
| Append a single persistent external entity without disturbing the rest. | ||
| ```js | ||
| replacer.addExternalEntity('brand', 'Acme'); | ||
| replacer.addExternalEntity('year', '2025'); | ||
| ``` | ||
| ### `addInputEntities(map)` | ||
| Inject **input/runtime** (DOCTYPE) entities for the current document. These are stored separately from persistent entities and wiped on the next `reset()` call. Also resets per-document expansion counters. | ||
| ```js | ||
| // Called automatically by BaseOutputBuilder — no manual wiring needed. | ||
| replacer.addInputEntities(doctypeEntityMap); | ||
| ``` | ||
| Values containing `&` are silently skipped. Accepts pre-built `{ regex, val }` or `{ regx, val }` objects as produced by `DocTypeReader`. | ||
| ### `reset()` | ||
| Reset all per-document state and return `this`. | ||
| **Clears:** | ||
| - input/runtime entities (DOCTYPE) | ||
| - `_totalExpansions` counter | ||
| - `_expandedLength` counter | ||
| **Preserves:** | ||
| - persistent external entities set via `setExternalEntities()` / `addExternalEntity()` | ||
| - all constructor config | ||
| The builder factory calls this when creating a new builder instance, ensuring each document starts clean whether or not it has a DOCTYPE. | ||
| ```js | ||
| // In a builder factory: | ||
| reset() { | ||
| const builder = new MyBuilder(this.config); | ||
| builder.entityParser = this.entityVP.reset(); | ||
| return builder; | ||
| } | ||
| ``` | ||
| --- | ||
| ## Document-to-Document Safety | ||
| A key design goal is that entities from one document never bleed into the next. Here's how the two categories work together: | ||
| ``` | ||
| Document 1 parse: | ||
| factory.reset() → evp.reset() [clears input, resets counters] | ||
| builder sees DOCTYPE → evp.addInputEntities({ version: '1.0' }) | ||
| builder processes values → evp.parse('&brand; v&version;') → 'Acme v1.0' | ||
| Document 2 parse (no DOCTYPE): | ||
| factory.reset() → evp.reset() [clears &version;, resets counters] | ||
| no DOCTYPE → addInputEntities() not called | ||
| builder processes values → evp.parse('&brand; v&version;') → 'Acme v&version;' | ||
| ↑ persistent &brand; works | ||
| ↑ &version; is gone — correct | ||
| ``` | ||
| --- | ||
| ## Security Controls | ||
| ### Expansion count limit | ||
| Caps the number of entity references that may be expanded per document. | ||
| ```js | ||
| const replacer = new EntityReplacer({ maxTotalExpansions: 1000 }); | ||
| ``` | ||
| Throws `Error` if exceeded: | ||
| > `[EntityReplacer] Entity expansion count limit exceeded: 1001 > 1000` | ||
| ### Expanded length limit | ||
| Caps the total number of characters *added* by entity expansion per document. | ||
| ```js | ||
| const replacer = new EntityReplacer({ maxExpandedLength: 65536 }); | ||
| ``` | ||
| Throws `Error` if exceeded: | ||
| > `[EntityReplacer] Expanded content length limit exceeded: 65537 > 65536` | ||
| ### `applyLimitsTo` | ||
| Controls which categories count against the limits. | ||
| ```js | ||
| // Default — only untrusted injected entities (safest) | ||
| applyLimitsTo: 'external' | ||
| // All categories | ||
| applyLimitsTo: 'all' | ||
| // Specific combination | ||
| applyLimitsTo: ['external', 'system'] | ||
| applyLimitsTo: ['external', 'default'] | ||
| ``` | ||
| --- | ||
| ## `postCheck` Hook | ||
| Fires **once** on the fully resolved string, after all categories have been processed. Not called if the string is unchanged (no `&` present or no matches found). | ||
| ```js | ||
| // Signature | ||
| postCheck: (resolved: string, original: string) => string | ||
| ``` | ||
| - `resolved` — string after all entity replacements | ||
| - `original` — the original input string before any replacement | ||
| - Must **return a string** | ||
| - To reject expansion: `return original` | ||
| - To sanitize: return a modified version of `resolved` | ||
| Examples: | ||
| ```js | ||
| // Reject if expansion produces any HTML tags | ||
| postCheck: (resolved, original) => | ||
| /<[a-z]/i.test(resolved) ? original : resolved | ||
| // Strip all tag-like content from the result | ||
| postCheck: (resolved) => | ||
| resolved.replace(/<[^>]*>/g, '') | ||
| ``` | ||
| --- | ||
| ## Integration with — flex-xml-parser adapter | ||
| ### Setup | ||
| ```js | ||
| import EntityReplacer, { COMMON_HTML } from '@nodable/entities'; | ||
| const evp = new EntityReplacer({ | ||
| system: COMMON_HTML, | ||
| maxTotalExpansions: 500, | ||
| }); | ||
| // Persistent entities — survive across all documents: | ||
| evp.setExternalEntities({ brand: 'Acme', product: 'Widget' }); | ||
| // Register with the builder factory: | ||
| myBuilder.registerValueParser('entity', evp); | ||
| const parser = new XMLParser({ OutputBuilder: myBuilder }); | ||
| parser.parse(xml); | ||
| ``` | ||
| ### Constructor options | ||
| All `EntityReplacerOptions` are accepted, plus one extra: | ||
| ```js | ||
| new EntityReplacer({ | ||
| // All EntityReplacer options... | ||
| default: true, | ||
| system: COMMON_HTML, | ||
| maxTotalExpansions: 1000, | ||
| postCheck: (resolved, original) => resolved, | ||
| // Extra: initial persistent entity map (same as calling setExternalEntities after construction) | ||
| entities: { copy: '©', trade: '™', brand: 'Acme Corp' }, | ||
| }) | ||
| ``` | ||
| ### `reset()` — called by builder factory | ||
| Reset per-document state (input entities + counters) and return `this`. The builder factory calls this each time it creates a new builder instance. | ||
| ```js | ||
| // In your CompactObjBuilderFactory.reset(): | ||
| reset() { | ||
| const builder = new CompactObjBuilder(this._config); | ||
| // Reset EVP for the new document: | ||
| builder.entityParser = this._entityVP.reset(); | ||
| return builder; | ||
| } | ||
| ``` | ||
| --- | ||
| ## Custom Entity Tables | ||
| Pass any plain object as `default` or `system` to replace the built-in set: | ||
| ```js | ||
| const myEntities = { | ||
| br: { regex: /&br;/g, val: '\n' }, | ||
| tab: { regex: /&tab;/g, val: '\t' }, | ||
| }; | ||
| const replacer = new EntityReplacer({ default: myEntities }); | ||
| replacer.replace('line1&br;line2&tab;indented'); | ||
| // → 'line1\nline2\tindented' | ||
| ``` | ||
| Extend the built-in tables via spreading: | ||
| ```js | ||
| import { DEFAULT_XML_ENTITIES } from '@nodable/entities'; | ||
| const replacer = new EntityReplacer({ | ||
| default: { ...DEFAULT_XML_ENTITIES, br: { regex: /&br;/g, val: '\n' } }, | ||
| }); | ||
| ``` | ||
| --- | ||
| ## Comparison with `entities` npm package | ||
| | Feature | `entities` pkg | `@nodable/entities` | | ||
| |------------------------------------------------|-------------------|---------------------| | ||
| | XML entity decoding | ✅ | ✅ | | ||
| | HTML entity decoding | ✅ full ~2000 | ✅ grouped, composable | | ||
| | Numeric refs with leading zeros | ✅ | ✅ | | ||
| | DOCTYPE / external entity injection | ❌ | ✅ | | ||
| | Persistent vs. input entity separation | ❌ | ✅ | | ||
| | Per-document reset via `reset()` | ❌ | ✅ | | ||
| | Expansion count limit | ❌ | ✅ | | ||
| | Expanded length limit | ❌ | ✅ | | ||
| | `applyLimitsTo` granularity | ❌ | ✅ | | ||
| | `postCheck` hook | ❌ | ✅ | | ||
| | Encoding / HTML escaping | ✅ | ❌ out of scope | | ||
| | Zero dependencies | ✅ | ✅ | | ||
| --- | ||
| ## TypeScript | ||
| Full TypeScript declarations are included via `index.d.ts`. No `@types/` package needed. | ||
| ```ts | ||
| import EntityReplacer, { | ||
| COMMON_HTML, | ||
| EntityTable, | ||
| EntityReplacerOptions, | ||
| } from '@nodable/entities'; | ||
| // EntityReplacer | ||
| const opts: EntityReplacerOptions = { | ||
| default: true, | ||
| system: COMMON_HTML, | ||
| maxTotalExpansions: 500, | ||
| postCheck: (resolved, original) => | ||
| /<script/i.test(resolved) ? original : resolved, | ||
| }; | ||
| const replacer = new EntityReplacer(opts); | ||
| replacer.setExternalEntities({ brand: 'Acme' }); | ||
| replacer.reset(); // reset for new document | ||
| replacer.addInputEntities({ version: '1.0' }); // from DOCTYPE | ||
| ``` | ||
| ## Note | ||
| This library silently skip numeric entities which are out range. For example `�` is skipped. | ||
| ## License | ||
| MIT | ||
| MIT |
+128
-215
@@ -5,68 +5,104 @@ // --------------------------------------------------------------------------- | ||
| /** A function-based entity replacement value (used for numeric refs). */ | ||
| export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string; | ||
| // --------------------------------------------------------------------------- | ||
| // Entity table shape | ||
| // Encoder options | ||
| // --------------------------------------------------------------------------- | ||
| /** A function-based entity replacement value (used for numeric refs). */ | ||
| export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string; | ||
| export interface EntityEncoderOptions { | ||
| /** | ||
| * Whether to encode XML unsafe characters: `&`, `<`, `>`, `"`, `'`. | ||
| * @default true | ||
| */ | ||
| encodeXmlSafe?: boolean; | ||
| /** A single entity entry: a regex and its replacement value. */ | ||
| export interface EntityEntry { | ||
| regex: RegExp; | ||
| val: string | EntityValFn; | ||
| /** | ||
| * Whether to encode non‑ASCII characters (e.g. `é` → `é`) using the | ||
| * built‑in named entity trie. | ||
| * @default true | ||
| */ | ||
| encodeAllNamed?: boolean; | ||
| /** | ||
| * Maximum number of replacements performed **cumulatively** across all | ||
| * `encode()` calls. `0` means unlimited. | ||
| * | ||
| * Use `reset()` to reset the internal counter. | ||
| * @default 0 | ||
| */ | ||
| maxReplacements?: number; | ||
| } | ||
| /** A map of entity name → EntityEntry. */ | ||
| export type EntityTable = Record<string, EntityEntry>; | ||
| // --------------------------------------------------------------------------- | ||
| // Constructor options | ||
| // EntityEncoder class | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Controls which entity categories count toward the expansion limits. | ||
| * High‑performance encoder that replaces characters with XML/HTML entities. | ||
| * | ||
| * - `'external'` — only untrusted / injected entities (default, safest) | ||
| * - `'all'` — shorthand for all categories | ||
| * - `string[]` — any combination of `'external'`, `'system'`, `'default'` | ||
| * - Escapes XML unsafe characters (`&`, `<`, `>`, `"`, `'`) when `encodeXmlSafe` is true. | ||
| * - Replaces non‑ASCII characters (e.g. `é`, `©`) with named entities using | ||
| * a compact trie‑based lookup when `encodeAllNamed` is true. | ||
| * - Supports a cumulative replacement limit (`maxReplacements`) that persists | ||
| * across multiple `encode()` calls until `reset()` is called. | ||
| * | ||
| * @example | ||
| * const encoder = new EntityEncoder({ encodeXmlSafe: true, encodeAllNamed: true }); | ||
| * encoder.encode('<foo>'); // "<foo>" | ||
| * encoder.encode('© 2025'); // "© 2025" | ||
| * | ||
| * // With limit | ||
| * const limited = new EntityEncoder({ maxReplacements: 2 }); | ||
| * limited.encode('<>&'); // "<>&" (third replacement omitted) | ||
| * limited.reset(); // reset counter | ||
| */ | ||
| export type ApplyLimitsTo = 'external' | 'all' | Array<'external' | 'system' | 'default'>; | ||
| export class EntityEncoder { | ||
| constructor(options?: EntityEncoderOptions); | ||
| /** | ||
| * Options accepted by the `EntityReplacer` constructor. | ||
| */ | ||
| export interface EntityReplacerOptions { | ||
| /** | ||
| * Built-in XML entities: `<` `>` `"` `'` | ||
| * Encode a string by replacing XML‑unsafe characters and (optionally) | ||
| * non‑ASCII characters with named entities. | ||
| * | ||
| * - `true` — use built-in table (default) | ||
| * - `false` — disable | ||
| * - `object` — use a custom table instead of the built-in set | ||
| * @default true | ||
| * If `maxReplacements` is set and the cumulative limit has been reached, | ||
| * the input string is returned unchanged. | ||
| * | ||
| * @returns Encoded string (may be identical to input if no replacements needed | ||
| * or the limit has been exhausted). | ||
| */ | ||
| default?: boolean | EntityTable | null; | ||
| encode(str: string): string; | ||
| /** | ||
| * `&` → `&` final pass (always processed last to prevent double-expansion). | ||
| * @default true | ||
| * Reset the internal replacement counter. | ||
| * Does **not** change `encodeXmlSafe`, `encodeAllNamed`, or `maxReplacements`. | ||
| */ | ||
| amp?: boolean | null; | ||
| reset(): void; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // Constructor options for EntityDecoder (existing) | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Controls which entity categories count toward the expansion limits. | ||
| * | ||
| * - `'external'` — only untrusted / injected entities (default) | ||
| * - `'base'` — only built‑in XML entities + user‑supplied `namedEntities` | ||
| * - `'all'` — all entities regardless of tier | ||
| * - `string[]` — explicit combination, e.g. `['external', 'base']` | ||
| */ | ||
| export type ApplyLimitsTo = 'external' | 'base' | 'all' | Array<'external' | 'base'>; | ||
| export interface EntityDecoderOptions { | ||
| /** | ||
| * Named entity groups (system-level, trusted). | ||
| * Extra named entities merged into the **base map** (trusted, counts as `'base'` tier). | ||
| * These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`). | ||
| * Values containing `&` are silently skipped to prevent recursive expansion. | ||
| * | ||
| * - `false` — disabled (default) | ||
| * - `true` — enables `COMMON_HTML` built-in group | ||
| * - `object` — use the supplied table (compose freely with exported groups) | ||
| * | ||
| * @example | ||
| * import { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities'; | ||
| * new EntityReplacer({ system: { ...COMMON_HTML, ...CURRENCY_ENTITIES } }); | ||
| * | ||
| * @default false | ||
| * @default null | ||
| */ | ||
| system?: boolean | EntityTable | null; | ||
| namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null; | ||
| /** | ||
| * Maximum number of entity references expanded per document. | ||
| * Maximum number of entity references expanded **per document**. | ||
| * `0` means unlimited. | ||
@@ -78,3 +114,3 @@ * @default 0 | ||
| /** | ||
| * Maximum number of characters *added* by entity expansion per document. | ||
| * Maximum number of characters **added** by entity expansion per document. | ||
| * `0` means unlimited. | ||
@@ -86,3 +122,9 @@ * @default 0 | ||
| /** | ||
| * Which entity categories count toward the expansion limits. | ||
| * Which entity tiers count toward the expansion limits. | ||
| * | ||
| * - `'external'` (default) – only input/runtime + persistent external entities | ||
| * - `'base'` – only built‑in XML + `namedEntities` | ||
| * - `'all'` – every entity regardless of tier | ||
| * - `string[]` – explicit combination, e.g. `['external', 'base']` | ||
| * | ||
| * @default 'external' | ||
@@ -93,3 +135,3 @@ */ | ||
| /** | ||
| * Hook called once on the fully resolved string (after all categories). | ||
| * Hook called once on the fully decoded string (after all replacements). | ||
| * | ||
@@ -105,59 +147,39 @@ * - Receives `(resolved, original)` and **must return a string**. | ||
| postCheck?: ((resolved: string, original: string) => string) | null; | ||
| /** | ||
| * Whether numeric character references (`&#NNN;`, `&#xHH;`) are allowed. | ||
| * @default true | ||
| */ | ||
| numericAllowed?: boolean; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // EntityReplacer class | ||
| // EntityDecoder class (default export) | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Standalone, zero-dependency XML/HTML entity replacer. | ||
| * Single‑pass, zero‑regex entity decoder for XML/HTML content. | ||
| * | ||
| * ## Entity categories and replacement order | ||
| * ## Entity lookup priority (highest → lowest) | ||
| * 1. **input / runtime** – injected via `addInputEntities()` (DOCTYPE per document) | ||
| * 2. **persistent external** – set via `setExternalEntities()` / `addExternalEntity()` | ||
| * 3. **base map** – built‑in XML entities + user‑supplied `namedEntities` | ||
| * | ||
| * Entities are processed in this fixed order per `replace()` call: | ||
| * 1. **persistent external** — set via `setExternalEntities()` / `addExternalEntity()` | ||
| * 2. **input / runtime** — injected via `addInputEntities()` (DOCTYPE per-document) | ||
| * 3. **system** — named entity groups (e.g. `COMMON_HTML`) | ||
| * 4. **default** — built-in XML entities (`lt`, `gt`, `apos`, `quot`) | ||
| * 5. **amp** — `&` → `&` (always last) | ||
| * 6. **postCheck** — optional hook on the fully resolved string | ||
| * Numeric references (`&#NNN;`, `&#xHH;`) are resolved directly and count as the `'base'` tier. | ||
| * | ||
| * ## Lifecycle with `@nodable/flexible-xml-parser` | ||
| * @example | ||
| * const decoder = new EntityDecoder({ | ||
| * namedEntities: COMMON_HTML, | ||
| * maxTotalExpansions: 100 | ||
| * }); | ||
| * decoder.setExternalEntities({ brand: 'Acme' }); | ||
| * | ||
| * Construct once, then let the builder factory drive the lifecycle: | ||
| * decoder.addInputEntities({ version: '1.0' }); | ||
| * decoder.decode('&brand; v&version; <'); // 'Acme v1.0 <' | ||
| * | ||
| * ```ts | ||
| * const replacer = new EntityReplacer({ default: true, system: COMMON_HTML }); | ||
| * replacer.setExternalEntities({ brand: 'Acme' }); // persistent — survives all docs | ||
| * | ||
| * // Builder factory calls getInstance() when creating a new builder instance: | ||
| * const instance = replacer.getInstance(); | ||
| * | ||
| * // Builder calls addInputEntities() if the document has a DOCTYPE block: | ||
| * instance.addInputEntities(doctypeEntities); | ||
| * | ||
| * // Builder calls replace() (indirectly via ValueParser) for each text node: | ||
| * instance.replace('&brand; v&version; <'); // 'Acme v1.0 <' | ||
| * ``` | ||
| * decoder.reset(); // clears input entities + counters, keeps external entities | ||
| */ | ||
| export default class EntityReplacer { | ||
| constructor(options?: EntityReplacerOptions); | ||
| export default class EntityDecoder { | ||
| constructor(options?: EntityDecoderOptions); | ||
| // ------------------------------------------------------------------------- | ||
| // Persistent external entities (survive across documents) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace the full set of persistent external entities. | ||
| * | ||
| * These entities survive across all documents — they are **not** wiped by | ||
| * `getInstance()`. Use them for caller-supplied entities that are fixed at | ||
| * configuration time (e.g. brand names, product codes). | ||
| * | ||
| * Calling this a second time replaces the previous persistent entity map. | ||
| * | ||
| * Values containing `&` are silently skipped to prevent recursive expansion. | ||
| * | ||
| * @param map Entity name → replacement string, or pre-built `{ regex, val }` object. | ||
| */ | ||
| setExternalEntities( | ||
@@ -167,28 +189,4 @@ map: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | ||
| /** | ||
| * Append a single persistent external entity without disturbing the rest. | ||
| * | ||
| * @param key Bare entity name without `&` / `;` — e.g. `'copy'` | ||
| * @param value Replacement string — must not contain `&` | ||
| * @throws if `key` contains regex-special characters | ||
| */ | ||
| addExternalEntity(key: string, value: string): void; | ||
| // ------------------------------------------------------------------------- | ||
| // Input / runtime entities (per document, cleared by getInstance) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Inject DOCTYPE (input/runtime) entities for the **current document only**. | ||
| * | ||
| * These are stored separately from persistent entities. They are wiped on | ||
| * the next `getInstance()` call so they never leak into subsequent documents. | ||
| * | ||
| * Also resets the per-document expansion counters. | ||
| * | ||
| * Accepts both plain string values and `{ regx, val }` / `{ regex, val }` | ||
| * objects as produced by `DocTypeReader`. | ||
| * | ||
| * @param map Raw entity map from the DOCTYPE reader. | ||
| */ | ||
| addInputEntities( | ||
@@ -203,111 +201,26 @@ map: Record< | ||
| // ------------------------------------------------------------------------- | ||
| // Builder factory integration | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Reset all per-document state and return `this`. | ||
| * | ||
| * Clears: | ||
| * - input / runtime entities (DOCTYPE) | ||
| * - `_totalExpansions` counter | ||
| * - `_expandedLength` counter | ||
| * | ||
| * Does **not** clear persistent external entities set via | ||
| * `setExternalEntities()` / `addExternalEntity()`. | ||
| * | ||
| * The builder factory calls this when creating a new builder instance, | ||
| * ensuring each document starts clean regardless of whether it has a DOCTYPE. | ||
| * | ||
| */ | ||
| reset(): this; | ||
| // ------------------------------------------------------------------------- | ||
| // Primary API | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace all entity references in `str`. | ||
| * Returns `str` unchanged if it contains no `&` character (fast path). | ||
| */ | ||
| replace(str: string): string; | ||
| /** | ||
| * wrapper on replace() | ||
| */ | ||
| parse(str: string): string; | ||
| decode(str: string): string; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // EntitiesValueParser | ||
| // Named entity group exports (for use with `namedEntities` option) | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Raw DOCTYPE entity map shape as produced by `DocTypeReader`. | ||
| * Values are either plain strings or `{ regx, val }` objects | ||
| * (note: `regx`, not `regex` — matches the reader's output field name). | ||
| */ | ||
| export type DocTypeEntityMap = Record< | ||
| string, | ||
| | string | ||
| | { regx: RegExp; val: string | EntityValFn } | ||
| | { regex: RegExp; val: string | EntityValFn } | ||
| >; | ||
| /** | ||
| * ValueParser context object passed by `@nodable/flexible-xml-parser`. | ||
| * All fields are optional; `parse()` accepts but ignores this argument. | ||
| */ | ||
| export interface ValueParserContext { | ||
| elementName?: string; | ||
| elementValue?: string; | ||
| elementType?: string; | ||
| matcher?: unknown; | ||
| isLeafNode?: boolean; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // Named entity group exports | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * ~20 most commonly needed HTML named entities. | ||
| * Includes: ` ` `©` `®` `™` `—` `–` | ||
| * `…` `«` `»` `‘` `’` `“` `”` | ||
| * `•` `¶` `§` `°` `½` `¼` `¾` | ||
| */ | ||
| export const COMMON_HTML: EntityTable; | ||
| /** | ||
| * Currency symbol entities. | ||
| * Includes: `¢` `£` `¥` `€` `&inr;` `¤` `ƒ` | ||
| */ | ||
| export const CURRENCY_ENTITIES: EntityTable; | ||
| /** | ||
| * Mathematical operator entities. | ||
| * Includes: `×` `÷` `±` `−` `²` `³` | ||
| * `‰` `∞` `∑` `∏` `√` `≠` `≤` `≥` | ||
| */ | ||
| export const MATH_ENTITIES: EntityTable; | ||
| /** | ||
| * Arrow entities. | ||
| * Includes: `←` `↑` `→` `↓` `↔` | ||
| * and their double-stroke variants `⇐` `⇑` `⇒` `⇓` `⇔` | ||
| */ | ||
| export const ARROW_ENTITIES: EntityTable; | ||
| /** | ||
| * Numeric character reference entities. | ||
| * Handles any valid decimal `&#NNN;` and hex `&#xHH;` code point reference. | ||
| */ | ||
| export const NUMERIC_ENTITIES: EntityTable; | ||
| /** The built-in XML entity table (`lt`, `gt`, `apos`, `quot`). */ | ||
| export const DEFAULT_XML_ENTITIES: EntityTable; | ||
| /** The `&` entity entry used in the final expansion pass. */ | ||
| export const AMP_ENTITY: EntityEntry; | ||
| export const COMMON_HTML: Record<string, string>; | ||
| export const ALL_ENTITIES: Record<string, string>; | ||
| export const XML: Record<string, string>; | ||
| export const BASIC_LATIN: Record<string, string>; | ||
| export const LATIN_ACCENTS: Record<string, string>; | ||
| export const LATIN_EXTENDED: Record<string, string>; | ||
| export const GREEK: Record<string, string>; | ||
| export const CYRILLIC: Record<string, string>; | ||
| export const MATH: Record<string, string>; | ||
| export const MATH_ADVANCED: Record<string, string>; | ||
| export const ARROWS: Record<string, string>; | ||
| export const SHAPES: Record<string, string>; | ||
| export const PUNCTUATION: Record<string, string>; | ||
| export const CURRENCY: Record<string, string>; | ||
| export const FRACTIONS: Record<string, string>; | ||
| export const MISC_SYMBOLS: Record<string, string>; |
+20
-17
@@ -6,22 +6,25 @@ /** | ||
| * | ||
| * @example | ||
| * import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities'; | ||
| * | ||
| * const replacer = new EntityReplacer({ | ||
| * default: true, | ||
| * system: { ...COMMON_HTML, ...CURRENCY_ENTITIES }, | ||
| * }); | ||
| * | ||
| * replacer.replace('Price: £9.99 — © 2024'); | ||
| * // → 'Price: £9.99 — © 2024' | ||
| */ | ||
| export { default } from './EntityReplacer.js'; | ||
| export { DEFAULT_XML_ENTITIES, AMP_ENTITY } from './EntityReplacer.js'; | ||
| export { default as EntityDecoder } from './EntityDecoder.js'; | ||
| export { | ||
| COMMON_HTML, | ||
| CURRENCY_ENTITIES, | ||
| MATH_ENTITIES, | ||
| ARROW_ENTITIES, | ||
| NUMERIC_ENTITIES, | ||
| } from './groups.js'; | ||
| XML, | ||
| ALL_ENTITIES, | ||
| ARROWS, | ||
| BASIC_LATIN, | ||
| CURRENCY, | ||
| MATH, | ||
| MATH_ADVANCED, | ||
| CYRILLIC, | ||
| FRACTIONS, | ||
| GREEK, | ||
| LATIN_ACCENTS, | ||
| LATIN_EXTENDED, | ||
| MISC_SYMBOLS, | ||
| PUNCTUATION, | ||
| SHAPES, | ||
| } from './entities.js'; | ||
| export { default as EntityEncoder } from './EntityEncoder.js'; |
| // --------------------------------------------------------------------------- | ||
| // Built-in entity tables | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Standard XML entities — always processed after external/system so they | ||
| * cannot be overridden by DOCTYPE, and & is deferred to its own final pass. | ||
| * | ||
| * Each entry: { regex: RegExp, val: string } | ||
| */ | ||
| const DEFAULT_XML_ENTITIES = { | ||
| apos: { regex: /&(apos|#0*39|#x0*27);/g, val: "'" }, | ||
| gt: { regex: /&(gt|#0*62|#x0*3[Ee]);/g, val: '>' }, | ||
| lt: { regex: /&(lt|#0*60|#x0*3[Cc]);/g, val: '<' }, | ||
| quot: { regex: /&(quot|#0*34|#x0*22);/g, val: '"' }, | ||
| }; | ||
| /** & — always expanded last to avoid double-expansion. */ | ||
| const AMP_ENTITY = { regex: /&(amp|#0*38|#x0*26);/g, val: '&' }; | ||
| // --------------------------------------------------------------------------- | ||
| // Helpers | ||
| // --------------------------------------------------------------------------- | ||
| const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+'); | ||
| /** | ||
| * Validate that an entity name contains no regex-special or otherwise | ||
| * dangerous characters. | ||
| * @param {string} name | ||
| * @returns {string} the name, unchanged | ||
| * @throws {Error} on invalid characters | ||
| */ | ||
| function validateEntityName(name) { | ||
| for (const ch of name) { | ||
| if (SPECIAL_CHARS.has(ch)) { | ||
| throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`); | ||
| } | ||
| } | ||
| return name; | ||
| } | ||
| /** | ||
| * Escape a string for use inside a RegExp character class / alternation. | ||
| */ | ||
| function escapeForRegex(str) { | ||
| return str.replace(/[.\-+*:]/g, '\\$&'); | ||
| } | ||
| /** | ||
| * Resolve a constructor option to an entity table (plain object) or null. | ||
| */ | ||
| function resolveTable(option, builtIn, enabledByDefault = false) { | ||
| if (option === false || option === null) return null; | ||
| if (option === true) return builtIn; | ||
| if (option === undefined) return enabledByDefault ? builtIn : null; | ||
| if (typeof option === 'object') return option; | ||
| return null; | ||
| } | ||
| /** | ||
| * Convert a category name or array of names into a Set<string>. | ||
| */ | ||
| function resolveApplyLimitsTo(spec) { | ||
| if (spec === 'all') return 'all'; | ||
| if (typeof spec === 'string') return new Set([spec]); | ||
| if (Array.isArray(spec)) return new Set(spec); | ||
| return new Set(['external']); | ||
| } | ||
| /** | ||
| * Build an entries array from a raw map of name → string|{regex,val}. | ||
| * Skips string values that contain '&' (recursive expansion risk). | ||
| * Normalises DocTypeReader's `regx` spelling to `regex`. | ||
| * | ||
| * @param {object} map | ||
| * @returns {Array<[string, {regex: RegExp, val: string}]>} | ||
| */ | ||
| function buildEntries(map) { | ||
| const entries = []; | ||
| for (const key of Object.keys(map)) { | ||
| const raw = map[key]; | ||
| if (typeof raw === 'object' && raw !== null && (raw.val !== undefined)) { | ||
| // Accept pre-built { regex, val } or DocTypeReader's { regx, val } | ||
| entries.push([key, { regex: raw.regex ?? raw.regx, val: raw.val }]); | ||
| } else if (typeof raw === 'string') { | ||
| if (raw.indexOf('&') !== -1) continue; // skip — would cause recursive expansion | ||
| validateEntityName(key); | ||
| entries.push([key, { | ||
| regex: new RegExp('&' + escapeForRegex(key) + ';', 'g'), | ||
| val: raw, | ||
| }]); | ||
| } | ||
| } | ||
| return entries; | ||
| } | ||
| // --------------------------------------------------------------------------- | ||
| // EntityReplacer | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * Standalone, zero-dependency entity replacer for XML/HTML content. | ||
| * | ||
| * Entity categories: | ||
| * - **persistent external** — configured once, survive across documents. | ||
| * Set via `setExternalEntities()` or built up via `addExternalEntity()`. | ||
| * - **input / runtime** — DOCTYPE entities for the *current* document only. | ||
| * Injected via `addInputEntities()`. Wiped on every `getInstance()` call | ||
| * so they never leak between documents. | ||
| * | ||
| * Replacement order (fixed): | ||
| * 1. persistent external | ||
| * 2. input / runtime (DOCTYPE) | ||
| * 3. system (named entity groups) | ||
| * 4. default (lt / gt / apos / quot) | ||
| * 5. amp (& final pass) | ||
| * | ||
| * @example | ||
| * const replacer = new EntityReplacer({ default: true, system: COMMON_HTML }); | ||
| * replacer.setExternalEntities({ brand: 'Acme' }); | ||
| * | ||
| * // Builder factory calls getInstance() before each document: | ||
| * const instance = replacer.getInstance(); | ||
| * // Builder calls addInputEntities() if DOCTYPE entities are present: | ||
| * instance.addInputEntities({ version: '1.0' }); | ||
| * instance.replace('&brand; v&version; <'); // 'Acme v1.0 <' | ||
| */ | ||
| export default class EntityReplacer { | ||
| /** | ||
| * @param {object} [options] | ||
| * @param {boolean|object|null} [options.default=true] | ||
| * @param {boolean|object|null} [options.amp=true] | ||
| * @param {boolean|object|null} [options.system=false] | ||
| * @param {number} [options.maxTotalExpansions=0] | ||
| * @param {number} [options.maxExpandedLength=0] | ||
| * @param {'external'|'all'|string[]} [options.applyLimitsTo='external'] | ||
| * @param {((resolved: string, original: string) => string)|null} [options.postCheck=null] | ||
| */ | ||
| constructor(options = {}) { | ||
| // Immutable config resolved at construction | ||
| this._defaultTable = resolveTable(options.default, DEFAULT_XML_ENTITIES, true); | ||
| this._systemTable = resolveTable(options.system, null, false); | ||
| this._ampEnabled = options.amp !== false && options.amp !== null; | ||
| this._maxTotalExpansions = options.maxTotalExpansions || 0; | ||
| this._maxExpandedLength = options.maxExpandedLength || 0; | ||
| this._applyLimitsTo = resolveApplyLimitsTo(options.applyLimitsTo ?? 'external'); | ||
| this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r; | ||
| // Pre-computed category limit flags | ||
| this._limitExternal = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('external')); | ||
| this._limitSystem = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('system')); | ||
| this._limitDefault = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('default')); | ||
| // Frozen immutable entry arrays | ||
| this._defaultEntries = this._defaultTable ? Object.entries(this._defaultTable) : []; | ||
| this._systemEntries = this._systemTable ? Object.entries(this._systemTable) : []; | ||
| // Persistent external entities — survive across documents | ||
| /** @type {Array<[string, {regex: RegExp, val: string}]>} */ | ||
| this._persistentEntries = []; | ||
| // Input / runtime entities — current document only, reset per getInstance() | ||
| /** @type {Array<[string, {regex: RegExp, val: string}]>} */ | ||
| this._inputEntries = []; | ||
| // Per-document counters — reset in getInstance() | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Persistent external entity registration (survives across documents) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace the full set of persistent external entities. | ||
| * These are never wiped between documents. | ||
| * | ||
| * @param {Record<string, string | { regex: RegExp, val: string | Function }>} map | ||
| */ | ||
| setExternalEntities(map) { | ||
| this._persistentEntries = buildEntries(map); | ||
| } | ||
| /** | ||
| * Add a single persistent external entity without disturbing existing ones. | ||
| * | ||
| * @param {string} key — bare entity name, e.g. `'copy'` | ||
| * @param {string} value — replacement string, e.g. `'©'` | ||
| */ | ||
| addExternalEntity(key, value) { | ||
| validateEntityName(key); | ||
| if (typeof value === 'string' && value.indexOf('&') === -1) { | ||
| this._persistentEntries.push([key, { | ||
| regex: new RegExp('&' + escapeForRegex(key) + ';', 'g'), | ||
| val: value, | ||
| }]); | ||
| } | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Input / runtime entity registration (per document) | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Inject DOCTYPE (input/runtime) entities for the current document. | ||
| * These are stored separately from persistent entities and wiped on the | ||
| * next `getInstance()` call so they never leak into subsequent documents. | ||
| * | ||
| * Also resets per-document expansion counters. | ||
| * | ||
| * @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string | Function }>} map | ||
| */ | ||
| addInputEntities(map) { | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| this._inputEntries = buildEntries(map); | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // getInstance — builder factory integration point | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Reset all per-document state (input entities + expansion counters) and | ||
| * return `this`. | ||
| * | ||
| * The builder factory calls this each time it creates a new builder instance | ||
| * so DOCTYPE entities from a previous document are never carried over. | ||
| * | ||
| */ | ||
| reset() { | ||
| this._inputEntries = []; | ||
| this._totalExpansions = 0; | ||
| this._expandedLength = 0; | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Primary API | ||
| // ------------------------------------------------------------------------- | ||
| /** | ||
| * Replace all entity references in `str`. | ||
| * | ||
| * Processing order: | ||
| * 1. persistent external | ||
| * 2. input / runtime (DOCTYPE) | ||
| * 3. system | ||
| * 4. default (lt/gt/apos/quot) | ||
| * 5. amp | ||
| * 6. postCheck hook | ||
| * | ||
| * @param {string} str | ||
| * @returns {string} | ||
| */ | ||
| replace(str) { | ||
| if (typeof str !== 'string' || str.length === 0) return str; | ||
| if (str.indexOf('&') === -1) return str; // fast path | ||
| const original = str; | ||
| // 1. Persistent external entities | ||
| if (this._persistentEntries.length > 0) { | ||
| str = this._applyEntries(str, this._persistentEntries, this._limitExternal); | ||
| } | ||
| // 2. Input / runtime entities (DOCTYPE) | ||
| if (this._inputEntries.length > 0 && str.indexOf('&') !== -1) { | ||
| str = this._applyEntries(str, this._inputEntries, this._limitExternal); | ||
| } | ||
| // 3. Default XML entities (lt / gt / apos / quot) | ||
| if (this._defaultEntries.length > 0 && str.indexOf('&') !== -1) { | ||
| str = this._applyEntries(str, this._defaultEntries, this._limitDefault); | ||
| } | ||
| // 4. System (named groups) | ||
| if (this._systemEntries.length > 0 && str.indexOf('&') !== -1) { | ||
| str = this._applyEntries(str, this._systemEntries, this._limitSystem); | ||
| } | ||
| // 5. & — always last | ||
| if (this._ampEnabled && str.indexOf('&') !== -1) { | ||
| str = str.replace(AMP_ENTITY.regex, AMP_ENTITY.val); | ||
| } | ||
| // 6. postCheck | ||
| str = this._postCheck(str, original); | ||
| return str; | ||
| } | ||
| /** | ||
| * | ||
| * @param {string} val | ||
| * @returns | ||
| */ | ||
| parse(val) { | ||
| return this.replace(val); | ||
| } | ||
| // ------------------------------------------------------------------------- | ||
| // Private helpers | ||
| // ------------------------------------------------------------------------- | ||
| _applyEntries(str, entries, track) { | ||
| const limitExpansions = track && this._maxTotalExpansions > 0; | ||
| const limitLength = track && this._maxExpandedLength > 0; | ||
| const trackAny = limitExpansions || limitLength; | ||
| for (let i = 0; i < entries.length; i++) { | ||
| if (str.indexOf('&') === -1) break; | ||
| const entity = entries[i][1]; | ||
| if (!trackAny) { | ||
| str = str.replace(entity.regex, entity.val); | ||
| continue; | ||
| } | ||
| if (limitExpansions && !limitLength) { | ||
| let count = 0; | ||
| str = str.replace(entity.regex, (...args) => { | ||
| count++; | ||
| return typeof entity.val === 'function' ? entity.val(...args) : entity.val; | ||
| }); | ||
| if (count > 0) { | ||
| this._totalExpansions += count; | ||
| if (this._totalExpansions > this._maxTotalExpansions) { | ||
| throw new Error( | ||
| `[EntityReplacer] Entity expansion count limit exceeded: ` + | ||
| `${this._totalExpansions} > ${this._maxTotalExpansions}` | ||
| ); | ||
| } | ||
| } | ||
| } else if (limitLength && !limitExpansions) { | ||
| const before = str.length; | ||
| str = str.replace(entity.regex, entity.val); | ||
| const delta = str.length - before; | ||
| if (delta > 0) { | ||
| this._expandedLength += delta; | ||
| if (this._expandedLength > this._maxExpandedLength) { | ||
| throw new Error( | ||
| `[EntityReplacer] Expanded content length limit exceeded: ` + | ||
| `${this._expandedLength} > ${this._maxExpandedLength}` | ||
| ); | ||
| } | ||
| } | ||
| } else { | ||
| const before = str.length; | ||
| let count = 0; | ||
| str = str.replace(entity.regex, (...args) => { | ||
| count++; | ||
| return typeof entity.val === 'function' ? entity.val(...args) : entity.val; | ||
| }); | ||
| if (count > 0) { | ||
| this._totalExpansions += count; | ||
| if (this._totalExpansions > this._maxTotalExpansions) { | ||
| throw new Error( | ||
| `[EntityReplacer] Entity expansion count limit exceeded: ` + | ||
| `${this._totalExpansions} > ${this._maxTotalExpansions}` | ||
| ); | ||
| } | ||
| } | ||
| const delta = str.length - before; | ||
| if (delta > 0) { | ||
| this._expandedLength += delta; | ||
| if (this._expandedLength > this._maxExpandedLength) { | ||
| throw new Error( | ||
| `[EntityReplacer] Expanded content length limit exceeded: ` + | ||
| `${this._expandedLength} > ${this._maxExpandedLength}` | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return str; | ||
| } | ||
| } | ||
| // Re-export the built-in tables for advanced users who want to extend them | ||
| export { DEFAULT_XML_ENTITIES, AMP_ENTITY }; |
-110
| // --------------------------------------------------------------------------- | ||
| // Named entity groups — importable separately and freely composable. | ||
| // All groups are plain objects; no magic, no classes. | ||
| // --------------------------------------------------------------------------- | ||
| /** | ||
| * ~20 most commonly needed HTML named entities. | ||
| * @type {Record<string, { regex: RegExp, val: string | ((m: string, s: string) => string) }>} | ||
| */ | ||
| export const COMMON_HTML = { | ||
| nbsp: { regex: /&(nbsp|#0*160|#x0*[Aa]0);/g, val: '\u00a0' }, | ||
| copy: { regex: /&(copy|#0*169|#x0*[Aa]9);/g, val: '\u00a9' }, | ||
| reg: { regex: /&(reg|#0*174|#x0*[Aa][Ee]);/g, val: '\u00ae' }, | ||
| trade: { regex: /&(trade|#0*8482|#x0*2122);/g, val: '\u2122' }, | ||
| mdash: { regex: /&(mdash|#0*8212|#x0*2014);/g, val: '\u2014' }, | ||
| ndash: { regex: /&(ndash|#0*8211|#x0*2013);/g, val: '\u2013' }, | ||
| hellip: { regex: /&(hellip|#0*8230|#x0*2026);/g, val: '\u2026' }, | ||
| laquo: { regex: /&(laquo|#0*171|#x0*[Aa][Bb]);/g, val: '\u00ab' }, | ||
| raquo: { regex: /&(raquo|#0*187|#x0*[Bb][Bb]);/g, val: '\u00bb' }, | ||
| lsquo: { regex: /&(lsquo|#0*8216|#x0*2018);/g, val: '\u2018' }, | ||
| rsquo: { regex: /&(rsquo|#0*8217|#x0*2019);/g, val: '\u2019' }, | ||
| ldquo: { regex: /&(ldquo|#0*8220|#x0*201[Cc]);/g, val: '\u201c' }, | ||
| rdquo: { regex: /&(rdquo|#0*8221|#x0*201[Dd]);/g, val: '\u201d' }, | ||
| bull: { regex: /&(bull|#0*8226|#x0*2022);/g, val: '\u2022' }, | ||
| para: { regex: /&(para|#0*182|#x0*[Bb]6);/g, val: '\u00b6' }, | ||
| sect: { regex: /&(sect|#0*167|#x0*[Aa]7);/g, val: '\u00a7' }, | ||
| deg: { regex: /&(deg|#0*176|#x0*[Bb]0);/g, val: '\u00b0' }, | ||
| frac12: { regex: /&(frac12|#0*189|#x0*[Bb][Dd]);/g, val: '\u00bd' }, | ||
| frac14: { regex: /&(frac14|#0*188|#x0*[Bb][Cc]);/g, val: '\u00bc' }, | ||
| frac34: { regex: /&(frac34|#0*190|#x0*[Bb][Ee]);/g, val: '\u00be' }, | ||
| inr: { regex: /&(inr|#0*8377);/g, val: "₹" }, | ||
| }; | ||
| /** | ||
| * Currency symbol entities. | ||
| */ | ||
| export const CURRENCY_ENTITIES = { | ||
| cent: { regex: /&(cent|#0*162|#x0*[Aa]2);/g, val: '\u00a2' }, | ||
| pound: { regex: /&(pound|#0*163|#x0*[Aa]3);/g, val: '\u00a3' }, | ||
| yen: { regex: /&(yen|#0*165|#x0*[Aa]5);/g, val: '\u00a5' }, | ||
| euro: { regex: /&(euro|#0*8364|#x0*20[Aa][Cc]);/g, val: '\u20ac' }, | ||
| inr: { regex: /&(inr|#0*8377|#x0*20[Bb]9);/g, val: '\u20b9' }, | ||
| curren: { regex: /&(curren|#0*164|#x0*[Aa]4);/g, val: '\u00a4' }, | ||
| fnof: { regex: /&(fnof|#0*402|#x0*192);/g, val: '\u0192' }, | ||
| }; | ||
| /** | ||
| * Mathematical operator entities. | ||
| */ | ||
| export const MATH_ENTITIES = { | ||
| times: { regex: /&(times|#0*215|#x0*[Dd]7);/g, val: '\u00d7' }, | ||
| divide: { regex: /&(divide|#0*247|#x0*[Ff]7);/g, val: '\u00f7' }, | ||
| plusmn: { regex: /&(plusmn|#0*177|#x0*[Bb]1);/g, val: '\u00b1' }, | ||
| minus: { regex: /&(minus|#0*8722|#x0*2212);/g, val: '\u2212' }, | ||
| sup2: { regex: /&(sup2|#0*178|#x0*[Bb]2);/g, val: '\u00b2' }, | ||
| sup3: { regex: /&(sup3|#0*179|#x0*[Bb]3);/g, val: '\u00b3' }, | ||
| sup1: { regex: /&(sup1|#0*185|#x0*[Bb]9);/g, val: '\u00b9' }, | ||
| frac12: { regex: /&(frac12|#0*189|#x0*[Bb][Dd]);/g, val: '\u00bd' }, | ||
| frac14: { regex: /&(frac14|#0*188|#x0*[Bb][Cc]);/g, val: '\u00bc' }, | ||
| frac34: { regex: /&(frac34|#0*190|#x0*[Bb][Ee]);/g, val: '\u00be' }, | ||
| permil: { regex: /&(permil|#0*8240|#x0*2030);/g, val: '\u2030' }, | ||
| infin: { regex: /&(infin|#0*8734|#x0*221[Ee]);/g, val: '\u221e' }, | ||
| sum: { regex: /&(sum|#0*8721|#x0*2211);/g, val: '\u2211' }, | ||
| prod: { regex: /&(prod|#0*8719|#x0*220[Ff]);/g, val: '\u220f' }, | ||
| radic: { regex: /&(radic|#0*8730|#x0*221[Aa]);/g, val: '\u221a' }, | ||
| ne: { regex: /&(ne|#0*8800|#x0*2260);/g, val: '\u2260' }, | ||
| le: { regex: /&(le|#0*8804|#x0*2264);/g, val: '\u2264' }, | ||
| ge: { regex: /&(ge|#0*8805|#x0*2265);/g, val: '\u2265' }, | ||
| }; | ||
| /** | ||
| * Arrow entities. | ||
| */ | ||
| export const ARROW_ENTITIES = { | ||
| larr: { regex: /&(larr|#0*8592|#x0*2190);/g, val: '\u2190' }, | ||
| uarr: { regex: /&(uarr|#0*8593|#x0*2191);/g, val: '\u2191' }, | ||
| rarr: { regex: /&(rarr|#0*8594|#x0*2192);/g, val: '\u2192' }, | ||
| darr: { regex: /&(darr|#0*8595|#x0*2193);/g, val: '\u2193' }, | ||
| harr: { regex: /&(harr|#0*8596|#x0*2194);/g, val: '\u2194' }, | ||
| lArr: { regex: /&(lArr|#0*8656|#x0*21[Dd]0);/g, val: '\u21d0' }, | ||
| uArr: { regex: /&(uArr|#0*8657|#x0*21[Dd]1);/g, val: '\u21d1' }, | ||
| rArr: { regex: /&(rArr|#0*8658|#x0*21[Dd]2);/g, val: '\u21d2' }, | ||
| dArr: { regex: /&(dArr|#0*8659|#x0*21[Dd]3);/g, val: '\u21d3' }, | ||
| hArr: { regex: /&(hArr|#0*8660|#x0*21[Dd]4);/g, val: '\u21d4' }, | ||
| }; | ||
| /** | ||
| * Numeric character references — decimal &#NNN; and hex &#xHH; | ||
| * These are function-replacers; they expand any valid code point. | ||
| */ | ||
| export const NUMERIC_ENTITIES = { | ||
| num_dec: { | ||
| regex: /�*([0-9]{1,7});/g, | ||
| val: (_, s) => fromCodePoint(s, 10, "&#"), | ||
| }, | ||
| num_hex: { | ||
| regex: /�*([0-9a-fA-F]{1,6});/g, | ||
| val: (_, s) => fromCodePoint(s, 16, "&#x"), | ||
| }, | ||
| }; | ||
| function fromCodePoint(str, base, prefix) { | ||
| const codePoint = Number.parseInt(str, base); | ||
| if (codePoint >= 0 && codePoint <= 0x10FFFF) { | ||
| return String.fromCodePoint(codePoint); | ||
| } else { | ||
| return prefix + str + ";"; | ||
| } | ||
| } |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
52010
17.98%8
33.33%1925
161.55%41
-90.6%1
Infinity%