🚀. Socket Launch Week Day 2:Introducing Manifest Alerts.Learn more
Sign In

@nodable/entities

Package Overview
Dependencies
Maintainers
1
Versions
7
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@nodable/entities - npm Package Compare versions

Comparing version
1.1.0
to
2.0.0
+1177
src/entities.js
// ---------------------------------------------------------------------------
// Complete HTML5 named entity reference
// Organized by logical categories for easy maintenance and selective importing
// ---------------------------------------------------------------------------
/**
* Basic Latin & Special Characters
* @type {Record<string, string>}
*/
export const BASIC_LATIN = {
amp: '&',
AMP: '&',
lt: '<',
LT: '<',
gt: '>',
GT: '>',
quot: '"',
QUOT: '"',
apos: "'",
lsquo: '‘',
rsquo: '’',
ldquo: '“',
rdquo: '”',
lsquor: '‚',
rsquor: '’',
ldquor: '„',
bdquo: '„',
comma: ',',
period: '.',
colon: ':',
semi: ';',
excl: '!',
quest: '?',
num: '#',
dollar: '$',
percent: '%',
amp: '&',
ast: '*',
commat: '@',
lowbar: '_',
verbar: '|',
vert: '|',
sol: '/',
bsol: '\\',
lbrace: '{',
rbrace: '}',
lbrack: '[',
rbrack: ']',
lpar: '(',
rpar: ')',
nbsp: '\u00a0',
iexcl: '¡',
cent: '¢',
pound: '£',
curren: '¤',
yen: '¥',
brvbar: '¦',
sect: '§',
uml: '¨',
copy: '©',
COPY: '©',
ordf: 'ª',
laquo: '«',
not: '¬',
shy: '\u00ad',
reg: '®',
REG: '®',
macr: '¯',
deg: '°',
plusmn: '±',
sup2: '²',
sup3: '³',
acute: '´',
micro: 'µ',
para: '¶',
middot: '·',
cedil: '¸',
sup1: '¹',
ordm: 'º',
raquo: '»',
frac14: '¼',
frac12: '½',
half: '½',
frac34: '¾',
iquest: '¿',
times: '×',
div: '÷',
divide: '÷',
};
/**
* Latin Extended & Accented Letters (A-Z)
* @type {Record<string, string>}
*/
export const LATIN_ACCENTS = {
Agrave: 'À',
agrave: 'à',
Aacute: 'Á',
aacute: 'á',
Acirc: 'Â',
acirc: 'â',
Atilde: 'Ã',
atilde: 'ã',
Auml: 'Ä',
auml: 'ä',
Aring: 'Å',
aring: 'å',
AElig: 'Æ',
aelig: 'æ',
Ccedil: 'Ç',
ccedil: 'ç',
Egrave: 'È',
egrave: 'è',
Eacute: 'É',
eacute: 'é',
Ecirc: 'Ê',
ecirc: 'ê',
Euml: 'Ë',
euml: 'ë',
Igrave: 'Ì',
igrave: 'ì',
Iacute: 'Í',
iacute: 'í',
Icirc: 'Î',
icirc: 'î',
Iuml: 'Ï',
iuml: 'ï',
ETH: 'Ð',
eth: 'ð',
Ntilde: 'Ñ',
ntilde: 'ñ',
Ograve: 'Ò',
ograve: 'ò',
Oacute: 'Ó',
oacute: 'ó',
Ocirc: 'Ô',
ocirc: 'ô',
Otilde: 'Õ',
otilde: 'õ',
Ouml: 'Ö',
ouml: 'ö',
Oslash: 'Ø',
oslash: 'ø',
Ugrave: 'Ù',
ugrave: 'ù',
Uacute: 'Ú',
uacute: 'ú',
Ucirc: 'Û',
ucirc: 'û',
Uuml: 'Ü',
uuml: 'ü',
Yacute: 'Ý',
yacute: 'ý',
THORN: 'Þ',
thorn: 'þ',
szlig: 'ß',
yuml: 'ÿ',
Yuml: 'Ÿ',
};
/**
* Latin Extended (Letters with diacritics)
* @type {Record<string, string>}
*/
export const LATIN_EXTENDED = {
Amacr: 'Ā',
amacr: 'ā',
Abreve: 'Ă',
abreve: 'ă',
Aogon: 'Ą',
aogon: 'ą',
Cacute: 'Ć',
cacute: 'ć',
Ccirc: 'Ĉ',
ccirc: 'ĉ',
Cdot: 'Ċ',
cdot: 'ċ',
Ccaron: 'Č',
ccaron: 'č',
Dcaron: 'Ď',
dcaron: 'ď',
Dstrok: 'Đ',
dstrok: 'đ',
Emacr: 'Ē',
emacr: 'ē',
Ecaron: 'Ě',
ecaron: 'ě',
Edot: 'Ė',
edot: 'ė',
Eogon: 'Ę',
eogon: 'ę',
Gcirc: 'Ĝ',
gcirc: 'ĝ',
Gbreve: 'Ğ',
gbreve: 'ğ',
Gdot: 'Ġ',
gdot: 'ġ',
Gcedil: 'Ģ',
Hcirc: 'Ĥ',
hcirc: 'ĥ',
Hstrok: 'Ħ',
hstrok: 'ħ',
Itilde: 'Ĩ',
itilde: 'ĩ',
Imacr: 'Ī',
imacr: 'ī',
Iogon: 'Į',
iogon: 'į',
Idot: 'İ',
IJlig: 'IJ',
ijlig: 'ij',
Jcirc: 'Ĵ',
jcirc: 'ĵ',
Kcedil: 'Ķ',
kcedil: 'ķ',
kgreen: 'ĸ',
Lacute: 'Ĺ',
lacute: 'ĺ',
Lcedil: 'Ļ',
lcedil: 'ļ',
Lcaron: 'Ľ',
lcaron: 'ľ',
Lmidot: 'Ŀ',
lmidot: 'ŀ',
Lstrok: 'Ł',
lstrok: 'ł',
Nacute: 'Ń',
nacute: 'ń',
Ncaron: 'Ň',
ncaron: 'ň',
Ncedil: 'Ņ',
ncedil: 'ņ',
ENG: 'Ŋ',
eng: 'ŋ',
Omacr: 'Ō',
omacr: 'ō',
Odblac: 'Ő',
odblac: 'ő',
OElig: 'Œ',
oelig: 'œ',
Racute: 'Ŕ',
racute: 'ŕ',
Rcaron: 'Ř',
rcaron: 'ř',
Rcedil: 'Ŗ',
rcedil: 'ŗ',
Sacute: 'Ś',
sacute: 'ś',
Scirc: 'Ŝ',
scirc: 'ŝ',
Scedil: 'Ş',
scedil: 'ş',
Scaron: 'Š',
scaron: 'š',
Tcedil: 'Ţ',
tcedil: 'ţ',
Tcaron: 'Ť',
tcaron: 'ť',
Tstrok: 'Ŧ',
tstrok: 'ŧ',
Utilde: 'Ũ',
utilde: 'ũ',
Umacr: 'Ū',
umacr: 'ū',
Ubreve: 'Ŭ',
ubreve: 'ŭ',
Uring: 'Ů',
uring: 'ů',
Udblac: 'Ű',
udblac: 'ű',
Uogon: 'Ų',
uogon: 'ų',
Wcirc: 'Ŵ',
wcirc: 'ŵ',
Ycirc: 'Ŷ',
ycirc: 'ŷ',
Zacute: 'Ź',
zacute: 'ź',
Zdot: 'Ż',
zdot: 'ż',
Zcaron: 'Ž',
zcaron: 'ž',
};
/**
* Greek Letters
* @type {Record<string, string>}
*/
export const GREEK = {
Alpha: 'Α',
alpha: 'α',
Beta: 'Β',
beta: 'β',
Gamma: 'Γ',
gamma: 'γ',
Delta: 'Δ',
delta: 'δ',
Epsilon: 'Ε',
epsilon: 'ε',
epsiv: 'ϵ',
varepsilon: 'ϵ',
Zeta: 'Ζ',
zeta: 'ζ',
Eta: 'Η',
eta: 'η',
Theta: 'Θ',
theta: 'θ',
thetasym: 'ϑ',
vartheta: 'ϑ',
Iota: 'Ι',
iota: 'ι',
Kappa: 'Κ',
kappa: 'κ',
kappav: 'ϰ',
varkappa: 'ϰ',
Lambda: 'Λ',
lambda: 'λ',
Mu: 'Μ',
mu: 'μ',
Nu: 'Ν',
nu: 'ν',
Xi: 'Ξ',
xi: 'ξ',
Omicron: 'Ο',
omicron: 'ο',
Pi: 'Π',
pi: 'π',
piv: 'ϖ',
varpi: 'ϖ',
Rho: 'Ρ',
rho: 'ρ',
rhov: 'ϱ',
varrho: 'ϱ',
Sigma: 'Σ',
sigma: 'σ',
sigmaf: 'ς',
sigmav: 'ς',
varsigma: 'ς',
Tau: 'Τ',
tau: 'τ',
Upsilon: 'Υ',
upsilon: 'υ',
upsi: 'υ',
Upsi: 'ϒ',
upsih: 'ϒ',
Phi: 'Φ',
phi: 'φ',
phiv: 'ϕ',
varphi: 'ϕ',
Chi: 'Χ',
chi: 'χ',
Psi: 'Ψ',
psi: 'ψ',
Omega: 'Ω',
omega: 'ω',
ohm: 'Ω',
Gammad: 'Ϝ',
gammad: 'ϝ',
digamma: 'ϝ',
};
/**
* Cyrillic Letters
* @type {Record<string, string>}
*/
export const CYRILLIC = {
Afr: '𝔄',
afr: '𝔞',
Acy: 'А',
acy: 'а',
Bcy: 'Б',
bcy: 'б',
Vcy: 'В',
vcy: 'в',
Gcy: 'Г',
gcy: 'г',
Dcy: 'Д',
dcy: 'д',
IEcy: 'Е',
iecy: 'е',
IOcy: 'Ё',
iocy: 'ё',
ZHcy: 'Ж',
zhcy: 'ж',
Zcy: 'З',
zcy: 'з',
Icy: 'И',
icy: 'и',
Jcy: 'Й',
jcy: 'й',
Kcy: 'К',
kcy: 'к',
Lcy: 'Л',
lcy: 'л',
Mcy: 'М',
mcy: 'м',
Ncy: 'Н',
ncy: 'н',
Ocy: 'О',
ocy: 'о',
Pcy: 'П',
pcy: 'п',
Rcy: 'Р',
rcy: 'р',
Scy: 'С',
scy: 'с',
Tcy: 'Т',
tcy: 'т',
Ucy: 'У',
ucy: 'у',
Fcy: 'Ф',
fcy: 'ф',
KHcy: 'Х',
khcy: 'х',
TScy: 'Ц',
tscy: 'ц',
CHcy: 'Ч',
chcy: 'ч',
SHcy: 'Ш',
shcy: 'ш',
SHCHcy: 'Щ',
shchcy: 'щ',
HARDcy: 'Ъ',
hardcy: 'ъ',
Ycy: 'Ы',
ycy: 'ы',
SOFTcy: 'Ь',
softcy: 'ь',
Ecy: 'Э',
ecy: 'э',
YUcy: 'Ю',
yucy: 'ю',
YAcy: 'Я',
yacy: 'я',
DJcy: 'Ђ',
djcy: 'ђ',
GJcy: 'Ѓ',
gjcy: 'ѓ',
Jukcy: 'Є',
jukcy: 'є',
DScy: 'Ѕ',
dscy: 'ѕ',
Iukcy: 'І',
iukcy: 'і',
YIcy: 'Ї',
yicy: 'ї',
Jsercy: 'Ј',
jsercy: 'ј',
LJcy: 'Љ',
ljcy: 'љ',
NJcy: 'Њ',
njcy: 'њ',
TSHcy: 'Ћ',
tshcy: 'ћ',
KJcy: 'Ќ',
kjcy: 'ќ',
Ubrcy: 'Ў',
ubrcy: 'ў',
DZcy: 'Џ',
dzcy: 'џ',
};
/**
* Mathematical Operators & Relations
* @type {Record<string, string>}
*/
export const MATH = {
plus: '+',
minus: '−',
mnplus: '∓',
mp: '∓',
pm: '±',
times: '×',
div: '÷',
divide: '÷',
sdot: '⋅',
star: '☆',
starf: '★',
bigstar: '★',
lowast: '∗',
ast: '*',
midast: '*',
compfn: '∘',
smallcircle: '∘',
bullet: '•',
bull: '•',
nbsp: '\u00a0',
hellip: '…',
mldr: '…',
prime: '′',
Prime: '″',
tprime: '‴',
bprime: '‵',
backprime: '‵',
minus: '−',
minusd: '∸',
dotminus: '∸',
plusdo: '∔',
dotplus: '∔',
plusmn: '±',
minusplus: '∓',
mnplus: '∓',
mp: '∓',
setminus: '∖',
smallsetminus: '∖',
Backslash: '∖',
setmn: '∖',
ssetmn: '∖',
lowbar: '_',
verbar: '|',
vert: '|',
VerticalLine: '|',
colon: ':',
Colon: '∷',
Proportion: '∷',
ratio: '∶',
equals: '=',
ne: '≠',
nequiv: '≢',
equiv: '≡',
Congruent: '≡',
sim: '∼',
thicksim: '∼',
thksim: '∼',
sime: '≃',
simeq: '≃',
TildeEqual: '≃',
asymp: '≈',
approx: '≈',
thickapprox: '≈',
thkap: '≈',
TildeTilde: '≈',
ncong: '≇',
cong: '≅',
TildeFullEqual: '≅',
asympeq: '≍',
CupCap: '≍',
bump: '≎',
Bumpeq: '≎',
HumpDownHump: '≎',
bumpe: '≏',
bumpeq: '≏',
HumpEqual: '≏',
dotminus: '∸',
minusd: '∸',
plusdo: '∔',
dotplus: '∔',
le: '≤',
LessEqual: '≤',
ge: '≥',
GreaterEqual: '≥',
lesseqgtr: '⋚',
lesseqqgtr: '⪋',
greater: '>',
less: '<',
};
/**
* Mathematical Operators (Advanced)
* @type {Record<string, string>}
*/
export const MATH_ADVANCED = {
alefsym: 'ℵ',
aleph: 'ℵ',
beth: 'ℶ',
gimel: 'ℷ',
daleth: 'ℸ',
forall: '∀',
ForAll: '∀',
part: '∂',
PartialD: '∂',
exist: '∃',
Exists: '∃',
nexist: '∄',
nexists: '∄',
empty: '∅',
emptyset: '∅',
emptyv: '∅',
varnothing: '∅',
nabla: '∇',
Del: '∇',
isin: '∈',
isinv: '∈',
in: '∈',
Element: '∈',
notin: '∉',
notinva: '∉',
ni: '∋',
niv: '∋',
SuchThat: '∋',
ReverseElement: '∋',
notni: '∌',
notniva: '∌',
prod: '∏',
Product: '∏',
coprod: '∐',
Coproduct: '∐',
sum: '∑',
Sum: '∑',
minus: '−',
mp: '∓',
plusdo: '∔',
dotplus: '∔',
setminus: '∖',
lowast: '∗',
radic: '√',
Sqrt: '√',
prop: '∝',
propto: '∝',
Proportional: '∝',
varpropto: '∝',
infin: '∞',
infintie: '⧝',
ang: '∠',
angle: '∠',
angmsd: '∡',
measuredangle: '∡',
angsph: '∢',
mid: '∣',
VerticalBar: '∣',
nmid: '∤',
nsmid: '∤',
npar: '∦',
parallel: '∥',
spar: '∥',
nparallel: '∦',
nspar: '∦',
and: '∧',
wedge: '∧',
or: '∨',
vee: '∨',
cap: '∩',
cup: '∪',
int: '∫',
Integral: '∫',
conint: '∮',
ContourIntegral: '∮',
Conint: '∯',
DoubleContourIntegral: '∯',
Cconint: '∰',
there4: '∴',
therefore: '∴',
Therefore: '∴',
becaus: '∵',
because: '∵',
Because: '∵',
ratio: '∶',
Proportion: '∷',
minusd: '∸',
dotminus: '∸',
mDDot: '∺',
homtht: '∻',
sim: '∼',
bsimg: '∽',
backsim: '∽',
ac: '∾',
mstpos: '∾',
acd: '∿',
VerticalTilde: '≀',
wr: '≀',
wreath: '≀',
nsime: '≄',
nsimeq: '≄',
nsimeq: '≄',
ncong: '≇',
simne: '≆',
ncongdot: '⩭̸',
ngsim: '≵',
nsim: '≁',
napprox: '≉',
nap: '≉',
ngeq: '≱',
nge: '≱',
nleq: '≰',
nle: '≰',
ngtr: '≯',
ngt: '≯',
nless: '≮',
nlt: '≮',
nprec: '⊀',
npr: '⊀',
nsucc: '⊁',
nsc: '⊁',
};
/**
* Arrows
* @type {Record<string, string>}
*/
export const ARROWS = {
larr: '←',
leftarrow: '←',
LeftArrow: '←',
uarr: '↑',
uparrow: '↑',
UpArrow: '↑',
rarr: '→',
rightarrow: '→',
RightArrow: '→',
darr: '↓',
downarrow: '↓',
DownArrow: '↓',
harr: '↔',
leftrightarrow: '↔',
LeftRightArrow: '↔',
varr: '↕',
updownarrow: '↕',
UpDownArrow: '↕',
nwarr: '↖',
nwarrow: '↖',
UpperLeftArrow: '↖',
nearr: '↗',
nearrow: '↗',
UpperRightArrow: '↗',
searr: '↘',
searrow: '↘',
LowerRightArrow: '↘',
swarr: '↙',
swarrow: '↙',
LowerLeftArrow: '↙',
lArr: '⇐',
Leftarrow: '⇐',
uArr: '⇑',
Uparrow: '⇑',
rArr: '⇒',
Rightarrow: '⇒',
dArr: '⇓',
Downarrow: '⇓',
hArr: '⇔',
Leftrightarrow: '⇔',
iff: '⇔',
vArr: '⇕',
Updownarrow: '⇕',
lAarr: '⇚',
Lleftarrow: '⇚',
rAarr: '⇛',
Rrightarrow: '⇛',
lrarr: '⇆',
leftrightarrows: '⇆',
rlarr: '⇄',
rightleftarrows: '⇄',
lrhar: '⇋',
leftrightharpoons: '⇋',
ReverseEquilibrium: '⇋',
rlhar: '⇌',
rightleftharpoons: '⇌',
Equilibrium: '⇌',
udarr: '⇅',
UpArrowDownArrow: '⇅',
duarr: '⇵',
DownArrowUpArrow: '⇵',
llarr: '⇇',
leftleftarrows: '⇇',
rrarr: '⇉',
rightrightarrows: '⇉',
ddarr: '⇊',
downdownarrows: '⇊',
har: '↽',
lhard: '↽',
leftharpoondown: '↽',
lharu: '↼',
leftharpoonup: '↼',
rhard: '⇁',
rightharpoondown: '⇁',
rharu: '⇀',
rightharpoonup: '⇀',
lsh: '↰',
Lsh: '↰',
rsh: '↱',
Rsh: '↱',
ldsh: '↲',
rdsh: '↳',
hookleftarrow: '↩',
hookrightarrow: '↪',
mapstoleft: '↤',
mapstoup: '↥',
map: '↦',
mapsto: '↦',
mapstodown: '↧',
crarr: '↵',
nwarrow: '↖',
nearrow: '↗',
searrow: '↘',
swarrow: '↙',
nleftarrow: '↚',
nleftrightarrow: '↮',
nrightarrow: '↛',
nrarr: '↛',
larrtl: '↢',
rarrtl: '↣',
leftarrowtail: '↢',
rightarrowtail: '↣',
twoheadleftarrow: '↞',
twoheadrightarrow: '↠',
Larr: '↞',
Rarr: '↠',
larrhk: '↩',
rarrhk: '↪',
larrlp: '↫',
looparrowleft: '↫',
rarrlp: '↬',
looparrowright: '↬',
harrw: '↭',
leftrightsquigarrow: '↭',
nrarrw: '↝̸',
rarrw: '↝',
rightsquigarrow: '↝',
larrbfs: '⤟',
rarrbfs: '⤠',
nvHarr: '⤄',
nvlArr: '⤂',
nvrArr: '⤃',
larrfs: '⤝',
rarrfs: '⤞',
Map: '⤅',
larrsim: '⥳',
rarrsim: '⥴',
harrcir: '⥈',
Uarrocir: '⥉',
lurdshar: '⥊',
ldrdhar: '⥧',
ldrushar: '⥋',
rdldhar: '⥩',
lrhard: '⥭',
rlhar: '⇌',
uharr: '↾',
uharl: '↿',
dharr: '⇂',
dharl: '⇃',
Uarr: '↟',
Darr: '↡',
zigrarr: '⇝',
nwArr: '⇖',
neArr: '⇗',
seArr: '⇘',
swArr: '⇙',
nharr: '↮',
nhArr: '⇎',
nlarr: '↚',
nlArr: '⇍',
nrarr: '↛',
nrArr: '⇏',
larrb: '⇤',
LeftArrowBar: '⇤',
rarrb: '⇥',
RightArrowBar: '⇥',
};
/**
* Geometric Shapes
* @type {Record<string, string>}
*/
export const SHAPES = {
square: '□',
Square: '□',
squ: '□',
squf: '▪',
squarf: '▪',
blacksquar: '▪',
blacksquare: '▪',
FilledVerySmallSquare: '▪',
blk34: '▓',
blk12: '▒',
blk14: '░',
block: '█',
srect: '▭',
rect: '▭',
sdot: '⋅',
sdotb: '⊡',
dotsquare: '⊡',
triangle: '▵',
tri: '▵',
trine: '▵',
utri: '▵',
triangledown: '▿',
dtri: '▿',
tridown: '▿',
triangleleft: '◃',
ltri: '◃',
triangleright: '▹',
rtri: '▹',
blacktriangle: '▴',
utrif: '▴',
blacktriangledown: '▾',
dtrif: '▾',
blacktriangleleft: '◂',
ltrif: '◂',
blacktriangleright: '▸',
rtrif: '▸',
loz: '◊',
lozenge: '◊',
blacklozenge: '⧫',
lozf: '⧫',
bigcirc: '◯',
xcirc: '◯',
circ: 'ˆ',
Circle: '○',
cir: '○',
o: '○',
bullet: '•',
bull: '•',
hellip: '…',
mldr: '…',
nldr: '‥',
boxh: '─',
HorizontalLine: '─',
boxv: '│',
boxdr: '┌',
boxdl: '┐',
boxur: '└',
boxul: '┘',
boxvr: '├',
boxvl: '┤',
boxhd: '┬',
boxhu: '┴',
boxvh: '┼',
boxH: '═',
boxV: '║',
boxdR: '╒',
boxDr: '╓',
boxDR: '╔',
boxDl: '╕',
boxdL: '╖',
boxDL: '╗',
boxuR: '╘',
boxUr: '╙',
boxUR: '╚',
boxUl: '╜',
boxuL: '╛',
boxUL: '╝',
boxvR: '╞',
boxVr: '╟',
boxVR: '╠',
boxVl: '╢',
boxvL: '╡',
boxVL: '╣',
boxHd: '╤',
boxhD: '╥',
boxHD: '╦',
boxHu: '╧',
boxhU: '╨',
boxHU: '╩',
boxvH: '╪',
boxVh: '╫',
boxVH: '╬',
};
/**
* Punctuation & Diacritics
* @type {Record<string, string>}
*/
export const PUNCTUATION = {
excl: '!',
iexcl: '¡',
brvbar: '¦',
sect: '§',
uml: '¨',
copy: '©',
ordf: 'ª',
laquo: '«',
not: '¬',
shy: '\u00ad',
reg: '®',
macr: '¯',
deg: '°',
plusmn: '±',
sup2: '²',
sup3: '³',
acute: '´',
micro: 'µ',
para: '¶',
middot: '·',
cedil: '¸',
sup1: '¹',
ordm: 'º',
raquo: '»',
frac14: '¼',
frac12: '½',
frac34: '¾',
iquest: '¿',
nbsp: '\u00a0',
comma: ',',
period: '.',
colon: ':',
semi: ';',
vert: '|',
Verbar: '‖',
verbar: '|',
dblac: '˝',
circ: 'ˆ',
caron: 'ˇ',
breve: '˘',
dot: '˙',
ring: '˚',
ogon: '˛',
tilde: '˜',
DiacriticalGrave: '`',
DiacriticalAcute: '´',
DiacriticalTilde: '˜',
DiacriticalDot: '˙',
DiacriticalDoubleAcute: '˝',
grave: '`',
acute: '´',
};
/**
* Currency Symbols
* @type {Record<string, string>}
*/
export const CURRENCY = {
cent: '¢',
pound: '£',
curren: '¤',
yen: '¥',
euro: '€',
dollar: '$',
euro: '€',
fnof: 'ƒ',
inr: '₹',
af: '؋',
birr: 'ብር',
peso: '₱',
rub: '₽',
won: '₩',
yuan: '¥',
cedil: '¸',
};
/**
* Fractions
* @type {Record<string, string>}
*/
export const FRACTIONS = {
frac12: '½',
half: '½',
frac13: '⅓',
frac14: '¼',
frac15: '⅕',
frac16: '⅙',
frac18: '⅛',
frac23: '⅔',
frac25: '⅖',
frac34: '¾',
frac35: '⅗',
frac38: '⅜',
frac45: '⅘',
frac56: '⅚',
frac58: '⅝',
frac78: '⅞',
frasl: '⁄',
};
/**
* Miscellaneous Symbols
* @type {Record<string, string>}
*/
export const MISC_SYMBOLS = {
trade: '™',
TRADE: '™',
telrec: '⌕',
target: '⌖',
ulcorn: '⌜',
ulcorner: '⌜',
urcorn: '⌝',
urcorner: '⌝',
dlcorn: '⌞',
llcorner: '⌞',
drcorn: '⌟',
lrcorner: '⌟',
intercal: '⊺',
intcal: '⊺',
oplus: '⊕',
CirclePlus: '⊕',
ominus: '⊖',
CircleMinus: '⊖',
otimes: '⊗',
CircleTimes: '⊗',
osol: '⊘',
odot: '⊙',
CircleDot: '⊙',
oast: '⊛',
circledast: '⊛',
odash: '⊝',
circleddash: '⊝',
ocirc: '⊚',
circledcirc: '⊚',
boxplus: '⊞',
plusb: '⊞',
boxminus: '⊟',
minusb: '⊟',
boxtimes: '⊠',
timesb: '⊠',
boxdot: '⊡',
sdotb: '⊡',
veebar: '⊻',
vee: '∨',
barvee: '⊽',
and: '∧',
wedge: '∧',
Cap: '⋒',
Cup: '⋓',
Fork: '⋔',
pitchfork: '⋔',
epar: '⋕',
ltlarr: '⥶',
nvap: '≍⃒',
nvsim: '∼⃒',
nvge: '≥⃒',
nvle: '≤⃒',
nvlt: '<⃒',
nvgt: '>⃒',
nvltrie: '⊴⃒',
nvrtrie: '⊵⃒',
Vdash: '⊩',
dashv: '⊣',
vDash: '⊨',
Vdash: '⊩',
Vvdash: '⊪',
nvdash: '⊬',
nvDash: '⊭',
nVdash: '⊮',
nVDash: '⊯',
};
/**
* All entities combined (if you need everything)
* @type {Record<string, string>}
*/
export const ALL_ENTITIES = {
...BASIC_LATIN,
...LATIN_ACCENTS,
...LATIN_EXTENDED,
...GREEK,
...CYRILLIC,
...MATH,
...MATH_ADVANCED,
...ARROWS,
...SHAPES,
...PUNCTUATION,
...CURRENCY,
...FRACTIONS,
...MISC_SYMBOLS,
};
export const XML = {
amp: "&",
apos: "'",
gt: ">",
lt: "<",
quot: "\""
}
export const COMMON_HTML = {
nbsp: '\u00a0',
copy: '\u00a9',
reg: '\u00ae',
trade: '\u2122',
mdash: '\u2014',
ndash: '\u2013',
hellip: '\u2026',
laquo: '\u00ab',
raquo: '\u00bb',
lsquo: '\u2018',
rsquo: '\u2019',
ldquo: '\u201c',
rdquo: '\u201d',
bull: '\u2022',
para: '\u00b6',
sect: '\u00a7',
deg: '\u00b0',
frac12: '\u00bd',
frac14: '\u00bc',
frac34: '\u00be',
}
// ---------------------------------------------------------------------------
// Note: NUMERIC_ENTITIES (&#NNN; / &#xHH;) are handled by the scanner directly
// via String.fromCodePoint() without any map lookup.
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Built-in named entity map (name → replacement string)
// No regex, no {regex,val} objects — just flat key/value pairs.
// ---------------------------------------------------------------------------
import { XML as DEFAULT_XML_ENTITIES } from "./entities.js"
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+');
/**
* Validate that an entity name contains no dangerous characters.
* @param {string} name
* @returns {string} the name, unchanged
* @throws {Error} on invalid characters
*/
function validateEntityName(name) {
if (name[0] === '#') {
throw new Error(`[EntityReplacer] Invalid character '#' in entity name: "${name}"`);
}
for (const ch of name) {
if (SPECIAL_CHARS.has(ch)) {
throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`);
}
}
return name;
}
/**
* Merge one or more entity maps into a flat name→string map.
* Accepts either:
* - plain string values: { amp: '&' }
* - legacy {regex,val} / {regx,val}: { lt: { regex: /.../, val: '<' } }
*
* Values containing '&' are skipped (recursive expansion risk).
*
* @param {...object} maps
* @returns {Record<string, string>}
*/
function mergeEntityMaps(...maps) {
const out = Object.create(null);
for (const map of maps) {
if (!map) continue;
for (const key of Object.keys(map)) {
const raw = map[key];
if (typeof raw === 'string') {
out[key] = raw;
} else if (raw && typeof raw === 'object' && raw.val !== undefined) {
// Legacy {regex,val} or {regx,val} — extract the string val only
const val = raw.val;
if (typeof val === 'string') {
out[key] = val;
}
// function vals are not supported in the scanner — skip
}
}
}
return out;
}
// ---------------------------------------------------------------------------
// applyLimitsTo helpers
// ---------------------------------------------------------------------------
const LIMIT_TIER_EXTERNAL = 'external'; // input/runtime + persistent external maps
const LIMIT_TIER_BASE = 'base'; // DEFAULT_XML_ENTITIES + namedEntities (system) maps
const LIMIT_TIER_ALL = 'all'; // every entity regardless of tier
/**
* Resolve `applyLimitsTo` option into a normalised Set of tier strings.
* Accepted values: 'external' | 'base' | 'all' | string[]
* Default: 'external' (only untrusted injected entities are counted).
* @param {string|string[]|undefined} raw
* @returns {Set<string>}
*/
function parseLimitTiers(raw) {
if (!raw || raw === LIMIT_TIER_EXTERNAL) return new Set([LIMIT_TIER_EXTERNAL]);
if (raw === LIMIT_TIER_ALL) return new Set([LIMIT_TIER_ALL]);
if (raw === LIMIT_TIER_BASE) return new Set([LIMIT_TIER_BASE]);
if (Array.isArray(raw)) return new Set(raw);
return new Set([LIMIT_TIER_EXTERNAL]); // safe default for unrecognised values
}
// ---------------------------------------------------------------------------
// EntityReplacer
// ---------------------------------------------------------------------------
/**
* Single-pass, zero-regex entity replacer for XML/HTML content.
*
* Algorithm: scan the string once for '&', read to ';', resolve via map
* or direct codepoint conversion, build output chunks, join once at the end.
*
* Entity lookup priority (highest → lowest):
* 1. input / runtime (DOCTYPE entities for current document)
* 2. persistent external (survive across documents)
* 3. base named map (DEFAULT_XML_ENTITIES + user-supplied namedEntities)
*
* Both input and external resolve as the 'external' tier for limit purposes.
* Base map entities resolve as the 'base' tier.
*
* Numeric / hex references (&#NNN; / &#xHH;) are resolved directly via
* String.fromCodePoint() — no map needed. They count as 'base' tier.
*
* @example
* const replacer = new EntityReplacer({ namedEntities: COMMON_HTML });
* replacer.setExternalEntities({ brand: 'Acme' });
*
* const instance = replacer.reset();
* instance.addInputEntities({ version: '1.0' });
* instance.encode('&brand; v&version; &lt;'); // 'Acme v1.0 <'
*/
export default class EntityDecoder {
/**
* @param {object} [options]
* @param {object|null} [options.namedEntities] — extra named entities merged into base map
* @param {number} [options.maxTotalExpansions=0] — 0 = unlimited
* @param {number} [options.maxExpandedLength=0] — 0 = unlimited
* @param {'external'|'base'|'all'|string[]} [options.applyLimitsTo='external']
* Which entity tiers count against the security limits:
* - 'external' (default) — only input/runtime + persistent external entities
* - 'base' — only DEFAULT_XML_ENTITIES + namedEntities
* - 'all' — every entity regardless of tier
* - string[] — explicit combination, e.g. ['external', 'base']
* @param {((resolved: string, original: string) => string)|null} [options.postCheck=null]
*/
constructor(options = {}) {
this._maxTotalExpansions = options.maxTotalExpansions || 0;
this._maxExpandedLength = options.maxExpandedLength || 0;
this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r;
this._limitTiers = parseLimitTiers(options.applyLimitsTo ?? LIMIT_TIER_EXTERNAL);
this._numericAllowed = options.numericAllowed ?? true;
// Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction.
this._baseMap = mergeEntityMaps(DEFAULT_XML_ENTITIES, options.namedEntities || null);
// Persistent external entities — survive across documents.
// Stored as a separate map so reset() never touches them.
/** @type {Record<string, string>} */
this._externalMap = Object.create(null);
// Input / runtime entities — current document only, wiped on reset().
/** @type {Record<string, string>} */
this._inputMap = Object.create(null);
// Per-document counters
this._totalExpansions = 0;
this._expandedLength = 0;
}
// -------------------------------------------------------------------------
// Persistent external entity registration
// -------------------------------------------------------------------------
/**
* Replace the full set of persistent external entities.
* All keys are validated — throws on invalid characters.
* @param {Record<string, string | { regex?: RegExp, val: string }>} map
*/
setExternalEntities(map) {
if (map) {
for (const key of Object.keys(map)) {
validateEntityName(key);
}
}
this._externalMap = mergeEntityMaps(map);
}
/**
* Add a single persistent external entity.
* @param {string} key
* @param {string} value
*/
addExternalEntity(key, value) {
validateEntityName(key);
if (typeof value === 'string' && value.indexOf('&') === -1) {
this._externalMap[key] = value;
}
}
// -------------------------------------------------------------------------
// Input / runtime entity registration (per document)
// -------------------------------------------------------------------------
/**
* Inject DOCTYPE entities for the current document.
* Also resets per-document expansion counters.
* @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string }>} map
*/
addInputEntities(map) {
this._totalExpansions = 0;
this._expandedLength = 0;
this._inputMap = mergeEntityMaps(map);
}
// -------------------------------------------------------------------------
// Per-document reset
// -------------------------------------------------------------------------
/**
* Wipe input/runtime entities and reset counters.
* Call this before processing each new document.
* @returns {this}
*/
reset() {
this._inputMap = Object.create(null);
this._totalExpansions = 0;
this._expandedLength = 0;
return this;
}
// -------------------------------------------------------------------------
// Primary API
// -------------------------------------------------------------------------
/**
* Replace all entity references in `str` in a single pass.
*
* @param {string} str
* @returns {string}
*/
decode(str) {
if (typeof str !== 'string' || str.length === 0) return str;
//TODO: check if needed
//if (str.indexOf('&') === -1) return str; // fast path — no entities at all
const original = str;
const chunks = [];
const len = str.length;
let last = 0; // start of next unprocessed literal chunk
let i = 0;
const limitExpansions = this._maxTotalExpansions > 0;
const limitLength = this._maxExpandedLength > 0;
const checkLimits = limitExpansions || limitLength;
while (i < len) {
// Scan forward to next '&'
if (str.charCodeAt(i) !== 38 /* '&' */) { i++; continue; }
// --- Found '&' at position i ---
// Scan forward to ';'
let j = i + 1;
while (j < len && str.charCodeAt(j) !== 59 /* ';' */ && (j - i) <= 32) j++;
if (j >= len || str.charCodeAt(j) !== 59) {
// No closing ';' within window — treat '&' as literal
i++;
continue;
}
// Raw token between '&' and ';' (exclusive)
const token = str.slice(i + 1, j);
if (token.length === 0) { i++; continue; }
let replacement;
let tier; // which limit tier this entity belongs to
if (token.charCodeAt(0) === 35 /* '#' */ && this._numericAllowed) {
// ---- Numeric reference — base tier ----
replacement = this._resolveNumeric(token);
tier = LIMIT_TIER_BASE;
} else {
// ---- Named reference ----
const resolved = this._resolveName(token);
replacement = resolved?.value;
tier = resolved?.tier;
}
if (replacement === undefined) {
// Unknown entity — leave as-is, advance past '&' only
i++;
continue;
}
// Flush literal chunk before this entity
if (i > last) chunks.push(str.slice(last, i));
chunks.push(replacement);
last = j + 1; // skip past ';'
i = last;
// Apply expansion limits only if this tier is being tracked
if (checkLimits && this._tierCounts(tier)) {
if (limitExpansions) {
this._totalExpansions++;
if (this._totalExpansions > this._maxTotalExpansions) {
throw new Error(
`[EntityReplacer] Entity expansion count limit exceeded: ` +
`${this._totalExpansions} > ${this._maxTotalExpansions}`
);
}
}
if (limitLength) {
// delta: replacement.length minus the raw &token; length (token.length + 2 for '&' and ';')
const delta = replacement.length - (token.length + 2);
if (delta > 0) {
this._expandedLength += delta;
if (this._expandedLength > this._maxExpandedLength) {
throw new Error(
`[EntityReplacer] Expanded content length limit exceeded: ` +
`${this._expandedLength} > ${this._maxExpandedLength}`
);
}
}
}
}
}
// Flush trailing literal
if (last < len) chunks.push(str.slice(last));
// If nothing was replaced, chunks is empty — return original
const result = chunks.length === 0 ? str : chunks.join('');
return this._postCheck(result, original);
}
// -------------------------------------------------------------------------
// Private: limit tier check
// -------------------------------------------------------------------------
/**
* Returns true if a resolved entity of the given tier should count
* against the expansion/length limits.
* @param {string} tier — LIMIT_TIER_EXTERNAL | LIMIT_TIER_BASE
* @returns {boolean}
*/
_tierCounts(tier) {
if (this._limitTiers.has(LIMIT_TIER_ALL)) return true;
return this._limitTiers.has(tier);
}
// -------------------------------------------------------------------------
// Private: entity resolution
// -------------------------------------------------------------------------
/**
* Resolve a named entity token (without & and ;).
* Priority: inputMap > externalMap > baseMap
* Returns the resolved value tagged with its limit tier.
*
* @param {string} name
* @returns {{ value: string, tier: string }|undefined}
*/
_resolveName(name) {
// input and external both count as 'external' tier for limit purposes —
// they are injected at runtime and are the untrusted surface.
if (name in this._inputMap) return { value: this._inputMap[name], tier: LIMIT_TIER_EXTERNAL };
if (name in this._externalMap) return { value: this._externalMap[name], tier: LIMIT_TIER_EXTERNAL };
if (name in this._baseMap) return { value: this._baseMap[name], tier: LIMIT_TIER_BASE };
return undefined;
}
/**
* Resolve a numeric entity token (the part after '&', including '#').
* Handles &#NNN; and &#xHH; (case-insensitive x).
*
* @param {string} token — e.g. '#38', '#x26', '#X26'
* @returns {string|undefined}
*/
_resolveNumeric(token) {
const second = token.charCodeAt(1);
let codePoint;
if (second === 120 || second === 88) {
// &#xHH; or &#XHH; — hex
// token is like 'x0026' — slice off 'x', leading zeros handled by parseInt
codePoint = parseInt(token.slice(2), 16);
} else {
// &#NNN; — decimal
// token is like '0038'
codePoint = parseInt(token.slice(1), 10);
}
if (Number.isNaN(codePoint) || codePoint < 0 || codePoint > 0x10FFFF) {
return undefined; // invalid — leave as-is
}
return String.fromCodePoint(codePoint);
}
}
// EntityDecoder.js
import { trie1, trie2, trie3 } from './entityTries.js';
// Replacement strings indexed by char code — direct array access, no hashing
const XML_UNSAFE_REPLACEMENT = new Array(128);
XML_UNSAFE_REPLACEMENT[38] = '&amp;'; // &
XML_UNSAFE_REPLACEMENT[60] = '&lt;'; // <
XML_UNSAFE_REPLACEMENT[62] = '&gt;'; // >
XML_UNSAFE_REPLACEMENT[34] = '&quot;'; // "
XML_UNSAFE_REPLACEMENT[39] = '&apos;'; // '
// Typed bitmask for O(1) "is this ASCII code XML-unsafe?" check
const IS_XML_UNSAFE = new Uint8Array(128);
IS_XML_UNSAFE[38] = 1;
IS_XML_UNSAFE[60] = 1;
IS_XML_UNSAFE[62] = 1;
IS_XML_UNSAFE[34] = 1;
IS_XML_UNSAFE[39] = 1;
// Fast pre-scan: bail out immediately if nothing needs encoding
const NEEDS_PROCESSING = /[&<>"'\u0080-\uFFFF]/;
export default class EntityEncoder {
constructor(options = {}) {
this.encodeXmlSafe = options.encodeXmlSafe !== false;
this.encodeAllNamed = options.encodeAllNamed !== false;
this.maxReplacements = options.maxReplacements || 0;
this.replacementsCount = 0;
}
encode(str) {
if (typeof str !== 'string' || str.length === 0) return str;
if (!NEEDS_PROCESSING.test(str)) return str;
const maxRep = this.maxReplacements;
if (maxRep > 0 && this.replacementsCount >= maxRep) return str;
// Hoist to locals — avoids `this` property lookup inside the hot loop
const encodeXmlSafe = this.encodeXmlSafe;
const encodeAllNamed = this.encodeAllNamed;
const len = str.length;
let result = '';
let last = 0;
let i = 0;
let limitReached = false;
// ── Main loop: runs to len-2 so trie3 never needs a bounds check ────────
// The last 2 characters are handled by the tail block below.
const mainEnd = len - 2; // i <= mainEnd guarantees i+1 and i+2 are valid
while (i <= mainEnd && !limitReached) {
const c0 = str.charCodeAt(i);
// ── ASCII branch ───────────────────────────────────────────────────
if (c0 < 128) {
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
last = ++i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
// Bulk-skip: advance to the next interesting position without
// touching the outer loop overhead on every safe character
i++;
while (i <= mainEnd && !limitReached) {
const c = str.charCodeAt(i);
if (c >= 128 || (encodeXmlSafe && IS_XML_UNSAFE[c] === 1)) break;
i++;
}
}
continue;
}
// ── Non-ASCII: integer-keyed trie lookup ───────────────────────────
// No bounds checks needed for c1/c2 because i <= mainEnd guarantees
// i+1 and i+2 are both within the string.
let matchedEntity = null;
let advance = 1;
// Try 3-char match first (longest wins)
const mid3 = trie3.get(c0);
if (mid3 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const inner3 = mid3.get(c1);
if (inner3 !== undefined) {
const c2 = str.charCodeAt(i + 2);
const candidate = inner3.get(c2);
if (candidate !== undefined) { matchedEntity = candidate; advance = 3; }
}
}
// Try 2-char match
if (matchedEntity === null) {
const inner2 = trie2.get(c0);
if (inner2 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const candidate = inner2.get(c1);
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
}
}
// Try 1-char match
if (matchedEntity === null && encodeAllNamed) {
const candidate = trie1.get(c0);
if (candidate !== undefined) { matchedEntity = candidate; }
}
if (matchedEntity !== null) {
result += str.substring(last, i) + matchedEntity;
i += advance;
last = i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
}
// ── Tail: handle the last 1-2 characters (no 3-char match possible) ────
while (i < len && !limitReached) {
const c0 = str.charCodeAt(i);
if (c0 < 128) {
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
last = ++i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
continue;
}
// Non-ASCII tail — only 2-char and 1-char matches are possible here
let matchedEntity = null;
let advance = 1;
if (i + 1 < len) {
const inner2 = trie2.get(c0);
if (inner2 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const candidate = inner2.get(c1);
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
}
}
if (matchedEntity === null && encodeAllNamed) {
const candidate = trie1.get(c0);
if (candidate !== undefined) { matchedEntity = candidate; }
}
if (matchedEntity !== null) {
result += str.substring(last, i) + matchedEntity;
i += advance;
last = i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
}
// ── Flush any remaining literal suffix ────────────────────────────────
if (last < len) result += str.substring(last);
return result;
}
reset() {
this.replacementsCount = 0;
}
}
// entityTries.js
// Builds integer-keyed tries so the decoder never allocates a string object
// during lookup — every key is a plain charCode number.
//
// trie1: Map<code0, entity>
// trie2: Map<code0, Map<code1, entity>>
// trie3: Map<code0, Map<code1, Map<code2, entity>>>
import { ALL_ENTITIES } from './entities.js';
// Reverse map: character sequence → "&name;"
const CHAR_TO_ENTITY = new Map();
for (const [name, chars] of Object.entries(ALL_ENTITIES)) {
CHAR_TO_ENTITY.set(chars, `&${name};`);
}
export const trie1 = new Map(); // code0 → entity string
export const trie2 = new Map(); // code0 → Map → entity string
export const trie3 = new Map(); // code0 → Map → Map → entity string
for (const [chars, entity] of CHAR_TO_ENTITY) {
const len = chars.length;
if (len === 1) {
const c0 = chars.charCodeAt(0);
// Keep shortest match only if no longer match already claimed this code
// (longer matches are inserted in the same pass so we just overwrite —
// trie1 is only consulted after trie2/trie3 both miss, so no conflict)
trie1.set(c0, entity);
} else if (len === 2) {
const c0 = chars.charCodeAt(0);
const c1 = chars.charCodeAt(1);
let inner = trie2.get(c0);
if (inner === undefined) { inner = new Map(); trie2.set(c0, inner); }
inner.set(c1, entity);
} else if (len === 3) {
const c0 = chars.charCodeAt(0);
const c1 = chars.charCodeAt(1);
const c2 = chars.charCodeAt(2);
let mid = trie3.get(c0);
if (mid === undefined) { mid = new Map(); trie3.set(c0, mid); }
let inner = mid.get(c1);
if (inner === undefined) { inner = new Map(); mid.set(c1, inner); }
inner.set(c2, entity);
}
// HTML5 has no named entity whose character sequence is longer than 3 chars
}
+2
-2
{
"name": "@nodable/entities",
"version": "1.1.0",
"description": "Replace XML, HTML, External entites with security controls",
"version": "2.0.0",
"description": "Entity parser for XML, HTML, External entites with security controls",
"main": "./src/index.js",

@@ -6,0 +6,0 @@ "type": "module",

+23
-417

@@ -1,435 +0,41 @@

# `@nodable/entities`
# @nodable/entities
Standalone, zero-dependency XML/HTML entity replacement with:
Fast, zero-dependency XML/HTML entity encoder and decoder for Node.js.
- **5 entity categories** processed in a fixed, predictable order
- **Persistent vs. input entity separation** — no state leaks between documents
- **`reset()`** — clean per-document reset without cloning
- **Composable named entity groups** (HTML, currency, math, arrows, numeric refs)
- **Security limits** — cap total expansions and expanded length per document
- **Granular limit targeting** — apply limits to any subset of categories
- **`postCheck` hook** — inspect or sanitize the fully resolved string
## Install
---
## Installation
```sh
```bash
npm install @nodable/entities
```
---
## Quick start
## Quick Start
```js
import EntityReplacer from '@nodable/entities';
import { EntityEncoder, EntityDecoder, ALL_ENTITIES } from '@nodable/entities';
const replacer = new EntityReplacer({ default: true });
// Encode: plain text → entity references
const enc = new EntityEncoder();
enc.encode('Hello © 2024 & <stuff>');
// → 'Hello &copy; 2024 &amp; &lt;stuff&gt;'
replacer.replace('5 &lt; 10 &amp;&amp; x &gt; 0');
// → '5 < 10 && x > 0'
// Decode: entity references → plain text
const dec = new EntityDecoder({ namedEntities: ALL_ENTITIES });
dec.decode('Hello &copy; 2024 &amp; &lt;stuff&gt;');
// → 'Hello © 2024 & <stuff>'
```
With named entity groups:
## Performance
```js
import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
| | encode | decode |
|---|---|---|
| `entities` (npm) | 3.65 M req/s | 1.76 M req/s |
| `@nodable/entities` | 3.33 M req/s | **5.19 M req/s** |
const replacer = new EntityReplacer({
default: true,
system: { ...COMMON_HTML, ...CURRENCY_ENTITIES },
});
## Documentation
replacer.replace('&copy; 2024 &mdash; Price: &pound;9.99');
// → '© 2024 — Price: £9.99'
```
- [EntityEncoder](docs/EntityEncoder.md) — options, API, recipes
- [EntityDecoder](docs/EntityDecoder.md) — options, API, security limits, entity sets
---
## Entity Categories
Entities are processed in this fixed order — not configurable:
```
persistent input/runtime → external → system → default → amp
```
### `persistent external` — Caller-supplied configuration entities
Entities set at configuration time that survive across all documents. Never wiped by `reset()`. Set via `setExternalEntities()` or `addExternalEntity()` / `addEntity()`.
```js
const replacer = new EntityReplacer({ default: true });
replacer.setExternalEntities({ brand: 'Acme Corp', product: 'Widget Pro' });
replacer.replace('&brand; makes &product;');
// → 'Acme Corp makes Widget Pro'
```
### `input / runtime` — Per-document DOCTYPE entities
Entities injected by the parser from the document's DOCTYPE block. Stored separately from persistent entities and **wiped on every `reset()` call** so they cannot leak between documents.
Set via `addInputEntities()`. Never call this manually — `BaseOutputBuilder` calls it automatically.
### `system` — Named entity groups
Opt-in. Trusted programmer-supplied groups. Compose freely:
```js
import {
COMMON_HTML,
CURRENCY_ENTITIES,
MATH_ENTITIES,
ARROW_ENTITIES,
NUMERIC_ENTITIES,
} from '@nodable/entities';
const replacer = new EntityReplacer({
system: { ...COMMON_HTML, ...MATH_ENTITIES },
});
```
| Group | Contents |
|---------------------|----------|
| `COMMON_HTML` | `&nbsp;` `&copy;` `&reg;` `&trade;` `&mdash;` `&ndash;` `&hellip;` `&laquo;` `&raquo;` `&lsquo;` `&rsquo;` `&ldquo;` `&rdquo;` `&bull;` `&para;` `&sect;` `&deg;` `&frac12;` `&frac14;` `&frac34;` |
| `CURRENCY_ENTITIES` | `&cent;` `&pound;` `&yen;` `&euro;` `&inr;` `&curren;` `&fnof;` |
| `MATH_ENTITIES` | `&times;` `&divide;` `&plusmn;` `&minus;` `&sup2;` `&sup3;` `&permil;` `&infin;` `&sum;` `&prod;` `&radic;` `&ne;` `&le;` `&ge;` |
| `ARROW_ENTITIES` | `&larr;` `&uarr;` `&rarr;` `&darr;` `&harr;` `&lArr;` `&uArr;` `&rArr;` `&dArr;` `&hArr;` |
| `NUMERIC_ENTITIES` | `&#NNN;` decimal and `&#xHH;` hex refs — any valid Unicode code point |
### `default` — Built-in XML entities
Always on unless explicitly disabled.
| Entity | Output |
|----------|--------|
| `&lt;` | `<` |
| `&gt;` | `>` |
| `&quot;` | `"` |
| `&apos;` | `'` |
### `amp` — Final pass
`&amp;` → `&`
Processed **after all other categories** to prevent double-expansion:
- `&amp;lt;` → `&lt;` ✓ (not `<`)
- `&amp;amp;` → `&amp;` ✓ (not `&`)
---
## Constructor API
```js
const replacer = new EntityReplacer({
// Category toggles
default: true, // true (default) | false | custom EntityTable object
amp: true, // true (default) | false | null
system: false, // false (default) | true for COMMON_HTML | EntityTable object
// Security limits — 0 = unlimited
maxTotalExpansions: 0,
maxExpandedLength: 0,
// Which categories count against the limits
applyLimitsTo: 'external', // 'external' (default) | 'all' | ['external', 'system'] | ...
// Post-processing hook — fires once on the fully resolved string
postCheck: resolved => resolved, // (resolved: string, original: string) => string
});
```
---
## EntityReplacer Instance Methods
### `replace(str)`
Replace all entity references in `str`. Returns `str` unchanged (same reference) if no `&` is present — fast path.
```js
replacer.replace('Tom &amp; Jerry &lt;cartoons&gt;');
// → 'Tom & Jerry <cartoons>'
```
### `setExternalEntities(map)`
Replace the full set of **persistent** external entities. These survive across all documents and are not cleared by `reset()`.
```js
replacer.setExternalEntities({ brand: 'Acme', year: '2025' });
```
Calling this a second time replaces the entire persistent map. Values containing `&` are silently skipped.
### `addExternalEntity(key, value)`
Append a single persistent external entity without disturbing the rest.
```js
replacer.addExternalEntity('brand', 'Acme');
replacer.addExternalEntity('year', '2025');
```
### `addInputEntities(map)`
Inject **input/runtime** (DOCTYPE) entities for the current document. These are stored separately from persistent entities and wiped on the next `reset()` call. Also resets per-document expansion counters.
```js
// Called automatically by BaseOutputBuilder — no manual wiring needed.
replacer.addInputEntities(doctypeEntityMap);
```
Values containing `&` are silently skipped. Accepts pre-built `{ regex, val }` or `{ regx, val }` objects as produced by `DocTypeReader`.
### `reset()`
Reset all per-document state and return `this`.
**Clears:**
- input/runtime entities (DOCTYPE)
- `_totalExpansions` counter
- `_expandedLength` counter
**Preserves:**
- persistent external entities set via `setExternalEntities()` / `addExternalEntity()`
- all constructor config
The builder factory calls this when creating a new builder instance, ensuring each document starts clean whether or not it has a DOCTYPE.
```js
// In a builder factory:
reset() {
const builder = new MyBuilder(this.config);
builder.entityParser = this.entityVP.reset();
return builder;
}
```
---
## Document-to-Document Safety
A key design goal is that entities from one document never bleed into the next. Here's how the two categories work together:
```
Document 1 parse:
factory.reset() → evp.reset() [clears input, resets counters]
builder sees DOCTYPE → evp.addInputEntities({ version: '1.0' })
builder processes values → evp.parse('&brand; v&version;') → 'Acme v1.0'
Document 2 parse (no DOCTYPE):
factory.reset() → evp.reset() [clears &version;, resets counters]
no DOCTYPE → addInputEntities() not called
builder processes values → evp.parse('&brand; v&version;') → 'Acme v&version;'
↑ persistent &brand; works
↑ &version; is gone — correct
```
---
## Security Controls
### Expansion count limit
Caps the number of entity references that may be expanded per document.
```js
const replacer = new EntityReplacer({ maxTotalExpansions: 1000 });
```
Throws `Error` if exceeded:
> `[EntityReplacer] Entity expansion count limit exceeded: 1001 > 1000`
### Expanded length limit
Caps the total number of characters *added* by entity expansion per document.
```js
const replacer = new EntityReplacer({ maxExpandedLength: 65536 });
```
Throws `Error` if exceeded:
> `[EntityReplacer] Expanded content length limit exceeded: 65537 > 65536`
### `applyLimitsTo`
Controls which categories count against the limits.
```js
// Default — only untrusted injected entities (safest)
applyLimitsTo: 'external'
// All categories
applyLimitsTo: 'all'
// Specific combination
applyLimitsTo: ['external', 'system']
applyLimitsTo: ['external', 'default']
```
---
## `postCheck` Hook
Fires **once** on the fully resolved string, after all categories have been processed. Not called if the string is unchanged (no `&` present or no matches found).
```js
// Signature
postCheck: (resolved: string, original: string) => string
```
- `resolved` — string after all entity replacements
- `original` — the original input string before any replacement
- Must **return a string**
- To reject expansion: `return original`
- To sanitize: return a modified version of `resolved`
Examples:
```js
// Reject if expansion produces any HTML tags
postCheck: (resolved, original) =>
/<[a-z]/i.test(resolved) ? original : resolved
// Strip all tag-like content from the result
postCheck: (resolved) =>
resolved.replace(/<[^>]*>/g, '')
```
---
## Integration with — flex-xml-parser adapter
### Setup
```js
import EntityReplacer, { COMMON_HTML } from '@nodable/entities';
const evp = new EntityReplacer({
system: COMMON_HTML,
maxTotalExpansions: 500,
});
// Persistent entities — survive across all documents:
evp.setExternalEntities({ brand: 'Acme', product: 'Widget' });
// Register with the builder factory:
myBuilder.registerValueParser('entity', evp);
const parser = new XMLParser({ OutputBuilder: myBuilder });
parser.parse(xml);
```
### Constructor options
All `EntityReplacerOptions` are accepted, plus one extra:
```js
new EntityReplacer({
// All EntityReplacer options...
default: true,
system: COMMON_HTML,
maxTotalExpansions: 1000,
postCheck: (resolved, original) => resolved,
// Extra: initial persistent entity map (same as calling setExternalEntities after construction)
entities: { copy: '©', trade: '™', brand: 'Acme Corp' },
})
```
### `reset()` — called by builder factory
Reset per-document state (input entities + counters) and return `this`. The builder factory calls this each time it creates a new builder instance.
```js
// In your CompactObjBuilderFactory.reset():
reset() {
const builder = new CompactObjBuilder(this._config);
// Reset EVP for the new document:
builder.entityParser = this._entityVP.reset();
return builder;
}
```
---
## Custom Entity Tables
Pass any plain object as `default` or `system` to replace the built-in set:
```js
const myEntities = {
br: { regex: /&br;/g, val: '\n' },
tab: { regex: /&tab;/g, val: '\t' },
};
const replacer = new EntityReplacer({ default: myEntities });
replacer.replace('line1&br;line2&tab;indented');
// → 'line1\nline2\tindented'
```
Extend the built-in tables via spreading:
```js
import { DEFAULT_XML_ENTITIES } from '@nodable/entities';
const replacer = new EntityReplacer({
default: { ...DEFAULT_XML_ENTITIES, br: { regex: /&br;/g, val: '\n' } },
});
```
---
## Comparison with `entities` npm package
| Feature | `entities` pkg | `@nodable/entities` |
|------------------------------------------------|-------------------|---------------------|
| XML entity decoding | ✅ | ✅ |
| HTML entity decoding | ✅ full ~2000 | ✅ grouped, composable |
| Numeric refs with leading zeros | ✅ | ✅ |
| DOCTYPE / external entity injection | ❌ | ✅ |
| Persistent vs. input entity separation | ❌ | ✅ |
| Per-document reset via `reset()` | ❌ | ✅ |
| Expansion count limit | ❌ | ✅ |
| Expanded length limit | ❌ | ✅ |
| `applyLimitsTo` granularity | ❌ | ✅ |
| `postCheck` hook | ❌ | ✅ |
| Encoding / HTML escaping | ✅ | ❌ out of scope |
| Zero dependencies | ✅ | ✅ |
---
## TypeScript
Full TypeScript declarations are included via `index.d.ts`. No `@types/` package needed.
```ts
import EntityReplacer, {
COMMON_HTML,
EntityTable,
EntityReplacerOptions,
} from '@nodable/entities';
// EntityReplacer
const opts: EntityReplacerOptions = {
default: true,
system: COMMON_HTML,
maxTotalExpansions: 500,
postCheck: (resolved, original) =>
/<script/i.test(resolved) ? original : resolved,
};
const replacer = new EntityReplacer(opts);
replacer.setExternalEntities({ brand: 'Acme' });
replacer.reset(); // reset for new document
replacer.addInputEntities({ version: '1.0' }); // from DOCTYPE
```
## Note
This library silently skip numeric entities which are out range. For example `&#1114112;` is skipped.
## License
MIT
MIT

@@ -5,68 +5,104 @@ // ---------------------------------------------------------------------------

/** A function-based entity replacement value (used for numeric refs). */
export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string;
// ---------------------------------------------------------------------------
// Entity table shape
// Encoder options
// ---------------------------------------------------------------------------
/** A function-based entity replacement value (used for numeric refs). */
export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string;
export interface EntityEncoderOptions {
/**
* Whether to encode XML unsafe characters: `&`, `<`, `>`, `"`, `'`.
* @default true
*/
encodeXmlSafe?: boolean;
/** A single entity entry: a regex and its replacement value. */
export interface EntityEntry {
regex: RegExp;
val: string | EntityValFn;
/**
* Whether to encode non‑ASCII characters (e.g. `é` → `&eacute;`) using the
* built‑in named entity trie.
* @default true
*/
encodeAllNamed?: boolean;
/**
* Maximum number of replacements performed **cumulatively** across all
* `encode()` calls. `0` means unlimited.
*
* Use `reset()` to reset the internal counter.
* @default 0
*/
maxReplacements?: number;
}
/** A map of entity name → EntityEntry. */
export type EntityTable = Record<string, EntityEntry>;
// ---------------------------------------------------------------------------
// Constructor options
// EntityEncoder class
// ---------------------------------------------------------------------------
/**
* Controls which entity categories count toward the expansion limits.
* High‑performance encoder that replaces characters with XML/HTML entities.
*
* - `'external'` — only untrusted / injected entities (default, safest)
* - `'all'` — shorthand for all categories
* - `string[]` — any combination of `'external'`, `'system'`, `'default'`
* - Escapes XML unsafe characters (`&`, `<`, `>`, `"`, `'`) when `encodeXmlSafe` is true.
* - Replaces non‑ASCII characters (e.g. `é`, `©`) with named entities using
* a compact trie‑based lookup when `encodeAllNamed` is true.
* - Supports a cumulative replacement limit (`maxReplacements`) that persists
* across multiple `encode()` calls until `reset()` is called.
*
* @example
* const encoder = new EntityEncoder({ encodeXmlSafe: true, encodeAllNamed: true });
* encoder.encode('<foo>'); // "&lt;foo&gt;"
* encoder.encode('© 2025'); // "&copy; 2025"
*
* // With limit
* const limited = new EntityEncoder({ maxReplacements: 2 });
* limited.encode('<>&'); // "&lt;&gt;&" (third replacement omitted)
* limited.reset(); // reset counter
*/
export type ApplyLimitsTo = 'external' | 'all' | Array<'external' | 'system' | 'default'>;
export class EntityEncoder {
constructor(options?: EntityEncoderOptions);
/**
* Options accepted by the `EntityReplacer` constructor.
*/
export interface EntityReplacerOptions {
/**
* Built-in XML entities: `&lt;` `&gt;` `&quot;` `&apos;`
* Encode a string by replacing XML‑unsafe characters and (optionally)
* non‑ASCII characters with named entities.
*
* - `true` — use built-in table (default)
* - `false` — disable
* - `object` — use a custom table instead of the built-in set
* @default true
* If `maxReplacements` is set and the cumulative limit has been reached,
* the input string is returned unchanged.
*
* @returns Encoded string (may be identical to input if no replacements needed
* or the limit has been exhausted).
*/
default?: boolean | EntityTable | null;
encode(str: string): string;
/**
* `&amp;` → `&` final pass (always processed last to prevent double-expansion).
* @default true
* Reset the internal replacement counter.
* Does **not** change `encodeXmlSafe`, `encodeAllNamed`, or `maxReplacements`.
*/
amp?: boolean | null;
reset(): void;
}
// ---------------------------------------------------------------------------
// Constructor options for EntityDecoder (existing)
// ---------------------------------------------------------------------------
/**
* Controls which entity categories count toward the expansion limits.
*
* - `'external'` — only untrusted / injected entities (default)
* - `'base'` — only built‑in XML entities + user‑supplied `namedEntities`
* - `'all'` — all entities regardless of tier
* - `string[]` — explicit combination, e.g. `['external', 'base']`
*/
export type ApplyLimitsTo = 'external' | 'base' | 'all' | Array<'external' | 'base'>;
export interface EntityDecoderOptions {
/**
* Named entity groups (system-level, trusted).
* Extra named entities merged into the **base map** (trusted, counts as `'base'` tier).
* These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`).
* Values containing `&` are silently skipped to prevent recursive expansion.
*
* - `false` — disabled (default)
* - `true` — enables `COMMON_HTML` built-in group
* - `object` — use the supplied table (compose freely with exported groups)
*
* @example
* import { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
* new EntityReplacer({ system: { ...COMMON_HTML, ...CURRENCY_ENTITIES } });
*
* @default false
* @default null
*/
system?: boolean | EntityTable | null;
namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null;
/**
* Maximum number of entity references expanded per document.
* Maximum number of entity references expanded **per document**.
* `0` means unlimited.

@@ -78,3 +114,3 @@ * @default 0

/**
* Maximum number of characters *added* by entity expansion per document.
* Maximum number of characters **added** by entity expansion per document.
* `0` means unlimited.

@@ -86,3 +122,9 @@ * @default 0

/**
* Which entity categories count toward the expansion limits.
* Which entity tiers count toward the expansion limits.
*
* - `'external'` (default) – only input/runtime + persistent external entities
* - `'base'` – only built‑in XML + `namedEntities`
* - `'all'` – every entity regardless of tier
* - `string[]` – explicit combination, e.g. `['external', 'base']`
*
* @default 'external'

@@ -93,3 +135,3 @@ */

/**
* Hook called once on the fully resolved string (after all categories).
* Hook called once on the fully decoded string (after all replacements).
*

@@ -105,59 +147,39 @@ * - Receives `(resolved, original)` and **must return a string**.

postCheck?: ((resolved: string, original: string) => string) | null;
/**
* Whether numeric character references (`&#NNN;`, `&#xHH;`) are allowed.
* @default true
*/
numericAllowed?: boolean;
}
// ---------------------------------------------------------------------------
// EntityReplacer class
// EntityDecoder class (default export)
// ---------------------------------------------------------------------------
/**
* Standalone, zero-dependency XML/HTML entity replacer.
* Single‑pass, zero‑regex entity decoder for XML/HTML content.
*
* ## Entity categories and replacement order
* ## Entity lookup priority (highest → lowest)
* 1. **input / runtime** – injected via `addInputEntities()` (DOCTYPE per document)
* 2. **persistent external** – set via `setExternalEntities()` / `addExternalEntity()`
* 3. **base map** – built‑in XML entities + user‑supplied `namedEntities`
*
* Entities are processed in this fixed order per `replace()` call:
* 1. **persistent external** — set via `setExternalEntities()` / `addExternalEntity()`
* 2. **input / runtime** — injected via `addInputEntities()` (DOCTYPE per-document)
* 3. **system** — named entity groups (e.g. `COMMON_HTML`)
* 4. **default** — built-in XML entities (`lt`, `gt`, `apos`, `quot`)
* 5. **amp** — `&amp;` → `&` (always last)
* 6. **postCheck** — optional hook on the fully resolved string
* Numeric references (`&#NNN;`, `&#xHH;`) are resolved directly and count as the `'base'` tier.
*
* ## Lifecycle with `@nodable/flexible-xml-parser`
* @example
* const decoder = new EntityDecoder({
* namedEntities: COMMON_HTML,
* maxTotalExpansions: 100
* });
* decoder.setExternalEntities({ brand: 'Acme' });
*
* Construct once, then let the builder factory drive the lifecycle:
* decoder.addInputEntities({ version: '1.0' });
* decoder.decode('&brand; v&version; &lt;'); // 'Acme v1.0 <'
*
* ```ts
* const replacer = new EntityReplacer({ default: true, system: COMMON_HTML });
* replacer.setExternalEntities({ brand: 'Acme' }); // persistent — survives all docs
*
* // Builder factory calls getInstance() when creating a new builder instance:
* const instance = replacer.getInstance();
*
* // Builder calls addInputEntities() if the document has a DOCTYPE block:
* instance.addInputEntities(doctypeEntities);
*
* // Builder calls replace() (indirectly via ValueParser) for each text node:
* instance.replace('&brand; v&version; &lt;'); // 'Acme v1.0 <'
* ```
* decoder.reset(); // clears input entities + counters, keeps external entities
*/
export default class EntityReplacer {
constructor(options?: EntityReplacerOptions);
export default class EntityDecoder {
constructor(options?: EntityDecoderOptions);
// -------------------------------------------------------------------------
// Persistent external entities (survive across documents)
// -------------------------------------------------------------------------
/**
* Replace the full set of persistent external entities.
*
* These entities survive across all documents — they are **not** wiped by
* `getInstance()`. Use them for caller-supplied entities that are fixed at
* configuration time (e.g. brand names, product codes).
*
* Calling this a second time replaces the previous persistent entity map.
*
* Values containing `&` are silently skipped to prevent recursive expansion.
*
* @param map Entity name → replacement string, or pre-built `{ regex, val }` object.
*/
setExternalEntities(

@@ -167,28 +189,4 @@ map: Record<string, string | { regex: RegExp; val: string | EntityValFn }>

/**
* Append a single persistent external entity without disturbing the rest.
*
* @param key Bare entity name without `&` / `;` — e.g. `'copy'`
* @param value Replacement string — must not contain `&`
* @throws if `key` contains regex-special characters
*/
addExternalEntity(key: string, value: string): void;
// -------------------------------------------------------------------------
// Input / runtime entities (per document, cleared by getInstance)
// -------------------------------------------------------------------------
/**
* Inject DOCTYPE (input/runtime) entities for the **current document only**.
*
* These are stored separately from persistent entities. They are wiped on
* the next `getInstance()` call so they never leak into subsequent documents.
*
* Also resets the per-document expansion counters.
*
* Accepts both plain string values and `{ regx, val }` / `{ regex, val }`
* objects as produced by `DocTypeReader`.
*
* @param map Raw entity map from the DOCTYPE reader.
*/
addInputEntities(

@@ -203,111 +201,26 @@ map: Record<

// -------------------------------------------------------------------------
// Builder factory integration
// -------------------------------------------------------------------------
/**
* Reset all per-document state and return `this`.
*
* Clears:
* - input / runtime entities (DOCTYPE)
* - `_totalExpansions` counter
* - `_expandedLength` counter
*
* Does **not** clear persistent external entities set via
* `setExternalEntities()` / `addExternalEntity()`.
*
* The builder factory calls this when creating a new builder instance,
* ensuring each document starts clean regardless of whether it has a DOCTYPE.
*
*/
reset(): this;
// -------------------------------------------------------------------------
// Primary API
// -------------------------------------------------------------------------
/**
* Replace all entity references in `str`.
* Returns `str` unchanged if it contains no `&` character (fast path).
*/
replace(str: string): string;
/**
* wrapper on replace()
*/
parse(str: string): string;
decode(str: string): string;
}
// ---------------------------------------------------------------------------
// EntitiesValueParser
// Named entity group exports (for use with `namedEntities` option)
// ---------------------------------------------------------------------------
/**
* Raw DOCTYPE entity map shape as produced by `DocTypeReader`.
* Values are either plain strings or `{ regx, val }` objects
* (note: `regx`, not `regex` — matches the reader's output field name).
*/
export type DocTypeEntityMap = Record<
string,
| string
| { regx: RegExp; val: string | EntityValFn }
| { regex: RegExp; val: string | EntityValFn }
>;
/**
* ValueParser context object passed by `@nodable/flexible-xml-parser`.
* All fields are optional; `parse()` accepts but ignores this argument.
*/
export interface ValueParserContext {
elementName?: string;
elementValue?: string;
elementType?: string;
matcher?: unknown;
isLeafNode?: boolean;
}
// ---------------------------------------------------------------------------
// Named entity group exports
// ---------------------------------------------------------------------------
/**
* ~20 most commonly needed HTML named entities.
* Includes: `&nbsp;` `&copy;` `&reg;` `&trade;` `&mdash;` `&ndash;`
* `&hellip;` `&laquo;` `&raquo;` `&lsquo;` `&rsquo;` `&ldquo;` `&rdquo;`
* `&bull;` `&para;` `&sect;` `&deg;` `&frac12;` `&frac14;` `&frac34;`
*/
export const COMMON_HTML: EntityTable;
/**
* Currency symbol entities.
* Includes: `&cent;` `&pound;` `&yen;` `&euro;` `&inr;` `&curren;` `&fnof;`
*/
export const CURRENCY_ENTITIES: EntityTable;
/**
* Mathematical operator entities.
* Includes: `&times;` `&divide;` `&plusmn;` `&minus;` `&sup2;` `&sup3;`
* `&permil;` `&infin;` `&sum;` `&prod;` `&radic;` `&ne;` `&le;` `&ge;`
*/
export const MATH_ENTITIES: EntityTable;
/**
* Arrow entities.
* Includes: `&larr;` `&uarr;` `&rarr;` `&darr;` `&harr;`
* and their double-stroke variants `&lArr;` `&uArr;` `&rArr;` `&dArr;` `&hArr;`
*/
export const ARROW_ENTITIES: EntityTable;
/**
* Numeric character reference entities.
* Handles any valid decimal `&#NNN;` and hex `&#xHH;` code point reference.
*/
export const NUMERIC_ENTITIES: EntityTable;
/** The built-in XML entity table (`lt`, `gt`, `apos`, `quot`). */
export const DEFAULT_XML_ENTITIES: EntityTable;
/** The `&amp;` entity entry used in the final expansion pass. */
export const AMP_ENTITY: EntityEntry;
export const COMMON_HTML: Record<string, string>;
export const ALL_ENTITIES: Record<string, string>;
export const XML: Record<string, string>;
export const BASIC_LATIN: Record<string, string>;
export const LATIN_ACCENTS: Record<string, string>;
export const LATIN_EXTENDED: Record<string, string>;
export const GREEK: Record<string, string>;
export const CYRILLIC: Record<string, string>;
export const MATH: Record<string, string>;
export const MATH_ADVANCED: Record<string, string>;
export const ARROWS: Record<string, string>;
export const SHAPES: Record<string, string>;
export const PUNCTUATION: Record<string, string>;
export const CURRENCY: Record<string, string>;
export const FRACTIONS: Record<string, string>;
export const MISC_SYMBOLS: Record<string, string>;

@@ -6,22 +6,25 @@ /**

*
* @example
* import EntityReplacer, { COMMON_HTML, CURRENCY_ENTITIES } from '@nodable/entities';
*
* const replacer = new EntityReplacer({
* default: true,
* system: { ...COMMON_HTML, ...CURRENCY_ENTITIES },
* });
*
* replacer.replace('Price: &pound;9.99 &mdash; &copy; 2024');
* // → 'Price: £9.99 — © 2024'
*/
export { default } from './EntityReplacer.js';
export { DEFAULT_XML_ENTITIES, AMP_ENTITY } from './EntityReplacer.js';
export { default as EntityDecoder } from './EntityDecoder.js';
export {
COMMON_HTML,
CURRENCY_ENTITIES,
MATH_ENTITIES,
ARROW_ENTITIES,
NUMERIC_ENTITIES,
} from './groups.js';
XML,
ALL_ENTITIES,
ARROWS,
BASIC_LATIN,
CURRENCY,
MATH,
MATH_ADVANCED,
CYRILLIC,
FRACTIONS,
GREEK,
LATIN_ACCENTS,
LATIN_EXTENDED,
MISC_SYMBOLS,
PUNCTUATION,
SHAPES,
} from './entities.js';
export { default as EntityEncoder } from './EntityEncoder.js';
// ---------------------------------------------------------------------------
// Built-in entity tables
// ---------------------------------------------------------------------------
/**
* Standard XML entities — always processed after external/system so they
* cannot be overridden by DOCTYPE, and &amp; is deferred to its own final pass.
*
* Each entry: { regex: RegExp, val: string }
*/
const DEFAULT_XML_ENTITIES = {
apos: { regex: /&(apos|#0*39|#x0*27);/g, val: "'" },
gt: { regex: /&(gt|#0*62|#x0*3[Ee]);/g, val: '>' },
lt: { regex: /&(lt|#0*60|#x0*3[Cc]);/g, val: '<' },
quot: { regex: /&(quot|#0*34|#x0*22);/g, val: '"' },
};
/** &amp; — always expanded last to avoid double-expansion. */
const AMP_ENTITY = { regex: /&(amp|#0*38|#x0*26);/g, val: '&' };
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+');
/**
* Validate that an entity name contains no regex-special or otherwise
* dangerous characters.
* @param {string} name
* @returns {string} the name, unchanged
* @throws {Error} on invalid characters
*/
function validateEntityName(name) {
for (const ch of name) {
if (SPECIAL_CHARS.has(ch)) {
throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`);
}
}
return name;
}
/**
* Escape a string for use inside a RegExp character class / alternation.
*/
function escapeForRegex(str) {
return str.replace(/[.\-+*:]/g, '\\$&');
}
/**
* Resolve a constructor option to an entity table (plain object) or null.
*/
function resolveTable(option, builtIn, enabledByDefault = false) {
if (option === false || option === null) return null;
if (option === true) return builtIn;
if (option === undefined) return enabledByDefault ? builtIn : null;
if (typeof option === 'object') return option;
return null;
}
/**
* Convert a category name or array of names into a Set<string>.
*/
function resolveApplyLimitsTo(spec) {
if (spec === 'all') return 'all';
if (typeof spec === 'string') return new Set([spec]);
if (Array.isArray(spec)) return new Set(spec);
return new Set(['external']);
}
/**
* Build an entries array from a raw map of name → string|{regex,val}.
* Skips string values that contain '&' (recursive expansion risk).
* Normalises DocTypeReader's `regx` spelling to `regex`.
*
* @param {object} map
* @returns {Array<[string, {regex: RegExp, val: string}]>}
*/
function buildEntries(map) {
const entries = [];
for (const key of Object.keys(map)) {
const raw = map[key];
if (typeof raw === 'object' && raw !== null && (raw.val !== undefined)) {
// Accept pre-built { regex, val } or DocTypeReader's { regx, val }
entries.push([key, { regex: raw.regex ?? raw.regx, val: raw.val }]);
} else if (typeof raw === 'string') {
if (raw.indexOf('&') !== -1) continue; // skip — would cause recursive expansion
validateEntityName(key);
entries.push([key, {
regex: new RegExp('&' + escapeForRegex(key) + ';', 'g'),
val: raw,
}]);
}
}
return entries;
}
// ---------------------------------------------------------------------------
// EntityReplacer
// ---------------------------------------------------------------------------
/**
* Standalone, zero-dependency entity replacer for XML/HTML content.
*
* Entity categories:
* - **persistent external** — configured once, survive across documents.
* Set via `setExternalEntities()` or built up via `addExternalEntity()`.
* - **input / runtime** — DOCTYPE entities for the *current* document only.
* Injected via `addInputEntities()`. Wiped on every `getInstance()` call
* so they never leak between documents.
*
* Replacement order (fixed):
* 1. persistent external
* 2. input / runtime (DOCTYPE)
* 3. system (named entity groups)
* 4. default (lt / gt / apos / quot)
* 5. amp (&amp; final pass)
*
* @example
* const replacer = new EntityReplacer({ default: true, system: COMMON_HTML });
* replacer.setExternalEntities({ brand: 'Acme' });
*
* // Builder factory calls getInstance() before each document:
* const instance = replacer.getInstance();
* // Builder calls addInputEntities() if DOCTYPE entities are present:
* instance.addInputEntities({ version: '1.0' });
* instance.replace('&brand; v&version; &lt;'); // 'Acme v1.0 <'
*/
export default class EntityReplacer {
/**
* @param {object} [options]
* @param {boolean|object|null} [options.default=true]
* @param {boolean|object|null} [options.amp=true]
* @param {boolean|object|null} [options.system=false]
* @param {number} [options.maxTotalExpansions=0]
* @param {number} [options.maxExpandedLength=0]
* @param {'external'|'all'|string[]} [options.applyLimitsTo='external']
* @param {((resolved: string, original: string) => string)|null} [options.postCheck=null]
*/
constructor(options = {}) {
// Immutable config resolved at construction
this._defaultTable = resolveTable(options.default, DEFAULT_XML_ENTITIES, true);
this._systemTable = resolveTable(options.system, null, false);
this._ampEnabled = options.amp !== false && options.amp !== null;
this._maxTotalExpansions = options.maxTotalExpansions || 0;
this._maxExpandedLength = options.maxExpandedLength || 0;
this._applyLimitsTo = resolveApplyLimitsTo(options.applyLimitsTo ?? 'external');
this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r;
// Pre-computed category limit flags
this._limitExternal = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('external'));
this._limitSystem = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('system'));
this._limitDefault = this._applyLimitsTo === 'all' || (this._applyLimitsTo instanceof Set && this._applyLimitsTo.has('default'));
// Frozen immutable entry arrays
this._defaultEntries = this._defaultTable ? Object.entries(this._defaultTable) : [];
this._systemEntries = this._systemTable ? Object.entries(this._systemTable) : [];
// Persistent external entities — survive across documents
/** @type {Array<[string, {regex: RegExp, val: string}]>} */
this._persistentEntries = [];
// Input / runtime entities — current document only, reset per getInstance()
/** @type {Array<[string, {regex: RegExp, val: string}]>} */
this._inputEntries = [];
// Per-document counters — reset in getInstance()
this._totalExpansions = 0;
this._expandedLength = 0;
}
// -------------------------------------------------------------------------
// Persistent external entity registration (survives across documents)
// -------------------------------------------------------------------------
/**
* Replace the full set of persistent external entities.
* These are never wiped between documents.
*
* @param {Record<string, string | { regex: RegExp, val: string | Function }>} map
*/
setExternalEntities(map) {
this._persistentEntries = buildEntries(map);
}
/**
* Add a single persistent external entity without disturbing existing ones.
*
* @param {string} key — bare entity name, e.g. `'copy'`
* @param {string} value — replacement string, e.g. `'©'`
*/
addExternalEntity(key, value) {
validateEntityName(key);
if (typeof value === 'string' && value.indexOf('&') === -1) {
this._persistentEntries.push([key, {
regex: new RegExp('&' + escapeForRegex(key) + ';', 'g'),
val: value,
}]);
}
}
// -------------------------------------------------------------------------
// Input / runtime entity registration (per document)
// -------------------------------------------------------------------------
/**
* Inject DOCTYPE (input/runtime) entities for the current document.
* These are stored separately from persistent entities and wiped on the
* next `getInstance()` call so they never leak into subsequent documents.
*
* Also resets per-document expansion counters.
*
* @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string | Function }>} map
*/
addInputEntities(map) {
this._totalExpansions = 0;
this._expandedLength = 0;
this._inputEntries = buildEntries(map);
}
// -------------------------------------------------------------------------
// getInstance — builder factory integration point
// -------------------------------------------------------------------------
/**
* Reset all per-document state (input entities + expansion counters) and
* return `this`.
*
* The builder factory calls this each time it creates a new builder instance
* so DOCTYPE entities from a previous document are never carried over.
*
*/
reset() {
this._inputEntries = [];
this._totalExpansions = 0;
this._expandedLength = 0;
}
// -------------------------------------------------------------------------
// Primary API
// -------------------------------------------------------------------------
/**
* Replace all entity references in `str`.
*
* Processing order:
* 1. persistent external
* 2. input / runtime (DOCTYPE)
* 3. system
* 4. default (lt/gt/apos/quot)
* 5. amp
* 6. postCheck hook
*
* @param {string} str
* @returns {string}
*/
replace(str) {
if (typeof str !== 'string' || str.length === 0) return str;
if (str.indexOf('&') === -1) return str; // fast path
const original = str;
// 1. Persistent external entities
if (this._persistentEntries.length > 0) {
str = this._applyEntries(str, this._persistentEntries, this._limitExternal);
}
// 2. Input / runtime entities (DOCTYPE)
if (this._inputEntries.length > 0 && str.indexOf('&') !== -1) {
str = this._applyEntries(str, this._inputEntries, this._limitExternal);
}
// 3. Default XML entities (lt / gt / apos / quot)
if (this._defaultEntries.length > 0 && str.indexOf('&') !== -1) {
str = this._applyEntries(str, this._defaultEntries, this._limitDefault);
}
// 4. System (named groups)
if (this._systemEntries.length > 0 && str.indexOf('&') !== -1) {
str = this._applyEntries(str, this._systemEntries, this._limitSystem);
}
// 5. &amp; — always last
if (this._ampEnabled && str.indexOf('&') !== -1) {
str = str.replace(AMP_ENTITY.regex, AMP_ENTITY.val);
}
// 6. postCheck
str = this._postCheck(str, original);
return str;
}
/**
*
* @param {string} val
* @returns
*/
parse(val) {
return this.replace(val);
}
// -------------------------------------------------------------------------
// Private helpers
// -------------------------------------------------------------------------
_applyEntries(str, entries, track) {
const limitExpansions = track && this._maxTotalExpansions > 0;
const limitLength = track && this._maxExpandedLength > 0;
const trackAny = limitExpansions || limitLength;
for (let i = 0; i < entries.length; i++) {
if (str.indexOf('&') === -1) break;
const entity = entries[i][1];
if (!trackAny) {
str = str.replace(entity.regex, entity.val);
continue;
}
if (limitExpansions && !limitLength) {
let count = 0;
str = str.replace(entity.regex, (...args) => {
count++;
return typeof entity.val === 'function' ? entity.val(...args) : entity.val;
});
if (count > 0) {
this._totalExpansions += count;
if (this._totalExpansions > this._maxTotalExpansions) {
throw new Error(
`[EntityReplacer] Entity expansion count limit exceeded: ` +
`${this._totalExpansions} > ${this._maxTotalExpansions}`
);
}
}
} else if (limitLength && !limitExpansions) {
const before = str.length;
str = str.replace(entity.regex, entity.val);
const delta = str.length - before;
if (delta > 0) {
this._expandedLength += delta;
if (this._expandedLength > this._maxExpandedLength) {
throw new Error(
`[EntityReplacer] Expanded content length limit exceeded: ` +
`${this._expandedLength} > ${this._maxExpandedLength}`
);
}
}
} else {
const before = str.length;
let count = 0;
str = str.replace(entity.regex, (...args) => {
count++;
return typeof entity.val === 'function' ? entity.val(...args) : entity.val;
});
if (count > 0) {
this._totalExpansions += count;
if (this._totalExpansions > this._maxTotalExpansions) {
throw new Error(
`[EntityReplacer] Entity expansion count limit exceeded: ` +
`${this._totalExpansions} > ${this._maxTotalExpansions}`
);
}
}
const delta = str.length - before;
if (delta > 0) {
this._expandedLength += delta;
if (this._expandedLength > this._maxExpandedLength) {
throw new Error(
`[EntityReplacer] Expanded content length limit exceeded: ` +
`${this._expandedLength} > ${this._maxExpandedLength}`
);
}
}
}
}
return str;
}
}
// Re-export the built-in tables for advanced users who want to extend them
export { DEFAULT_XML_ENTITIES, AMP_ENTITY };
// ---------------------------------------------------------------------------
// Named entity groups — importable separately and freely composable.
// All groups are plain objects; no magic, no classes.
// ---------------------------------------------------------------------------
/**
* ~20 most commonly needed HTML named entities.
* @type {Record<string, { regex: RegExp, val: string | ((m: string, s: string) => string) }>}
*/
export const COMMON_HTML = {
nbsp: { regex: /&(nbsp|#0*160|#x0*[Aa]0);/g, val: '\u00a0' },
copy: { regex: /&(copy|#0*169|#x0*[Aa]9);/g, val: '\u00a9' },
reg: { regex: /&(reg|#0*174|#x0*[Aa][Ee]);/g, val: '\u00ae' },
trade: { regex: /&(trade|#0*8482|#x0*2122);/g, val: '\u2122' },
mdash: { regex: /&(mdash|#0*8212|#x0*2014);/g, val: '\u2014' },
ndash: { regex: /&(ndash|#0*8211|#x0*2013);/g, val: '\u2013' },
hellip: { regex: /&(hellip|#0*8230|#x0*2026);/g, val: '\u2026' },
laquo: { regex: /&(laquo|#0*171|#x0*[Aa][Bb]);/g, val: '\u00ab' },
raquo: { regex: /&(raquo|#0*187|#x0*[Bb][Bb]);/g, val: '\u00bb' },
lsquo: { regex: /&(lsquo|#0*8216|#x0*2018);/g, val: '\u2018' },
rsquo: { regex: /&(rsquo|#0*8217|#x0*2019);/g, val: '\u2019' },
ldquo: { regex: /&(ldquo|#0*8220|#x0*201[Cc]);/g, val: '\u201c' },
rdquo: { regex: /&(rdquo|#0*8221|#x0*201[Dd]);/g, val: '\u201d' },
bull: { regex: /&(bull|#0*8226|#x0*2022);/g, val: '\u2022' },
para: { regex: /&(para|#0*182|#x0*[Bb]6);/g, val: '\u00b6' },
sect: { regex: /&(sect|#0*167|#x0*[Aa]7);/g, val: '\u00a7' },
deg: { regex: /&(deg|#0*176|#x0*[Bb]0);/g, val: '\u00b0' },
frac12: { regex: /&(frac12|#0*189|#x0*[Bb][Dd]);/g, val: '\u00bd' },
frac14: { regex: /&(frac14|#0*188|#x0*[Bb][Cc]);/g, val: '\u00bc' },
frac34: { regex: /&(frac34|#0*190|#x0*[Bb][Ee]);/g, val: '\u00be' },
inr: { regex: /&(inr|#0*8377);/g, val: "₹" },
};
/**
* Currency symbol entities.
*/
export const CURRENCY_ENTITIES = {
cent: { regex: /&(cent|#0*162|#x0*[Aa]2);/g, val: '\u00a2' },
pound: { regex: /&(pound|#0*163|#x0*[Aa]3);/g, val: '\u00a3' },
yen: { regex: /&(yen|#0*165|#x0*[Aa]5);/g, val: '\u00a5' },
euro: { regex: /&(euro|#0*8364|#x0*20[Aa][Cc]);/g, val: '\u20ac' },
inr: { regex: /&(inr|#0*8377|#x0*20[Bb]9);/g, val: '\u20b9' },
curren: { regex: /&(curren|#0*164|#x0*[Aa]4);/g, val: '\u00a4' },
fnof: { regex: /&(fnof|#0*402|#x0*192);/g, val: '\u0192' },
};
/**
* Mathematical operator entities.
*/
export const MATH_ENTITIES = {
times: { regex: /&(times|#0*215|#x0*[Dd]7);/g, val: '\u00d7' },
divide: { regex: /&(divide|#0*247|#x0*[Ff]7);/g, val: '\u00f7' },
plusmn: { regex: /&(plusmn|#0*177|#x0*[Bb]1);/g, val: '\u00b1' },
minus: { regex: /&(minus|#0*8722|#x0*2212);/g, val: '\u2212' },
sup2: { regex: /&(sup2|#0*178|#x0*[Bb]2);/g, val: '\u00b2' },
sup3: { regex: /&(sup3|#0*179|#x0*[Bb]3);/g, val: '\u00b3' },
sup1: { regex: /&(sup1|#0*185|#x0*[Bb]9);/g, val: '\u00b9' },
frac12: { regex: /&(frac12|#0*189|#x0*[Bb][Dd]);/g, val: '\u00bd' },
frac14: { regex: /&(frac14|#0*188|#x0*[Bb][Cc]);/g, val: '\u00bc' },
frac34: { regex: /&(frac34|#0*190|#x0*[Bb][Ee]);/g, val: '\u00be' },
permil: { regex: /&(permil|#0*8240|#x0*2030);/g, val: '\u2030' },
infin: { regex: /&(infin|#0*8734|#x0*221[Ee]);/g, val: '\u221e' },
sum: { regex: /&(sum|#0*8721|#x0*2211);/g, val: '\u2211' },
prod: { regex: /&(prod|#0*8719|#x0*220[Ff]);/g, val: '\u220f' },
radic: { regex: /&(radic|#0*8730|#x0*221[Aa]);/g, val: '\u221a' },
ne: { regex: /&(ne|#0*8800|#x0*2260);/g, val: '\u2260' },
le: { regex: /&(le|#0*8804|#x0*2264);/g, val: '\u2264' },
ge: { regex: /&(ge|#0*8805|#x0*2265);/g, val: '\u2265' },
};
/**
* Arrow entities.
*/
export const ARROW_ENTITIES = {
larr: { regex: /&(larr|#0*8592|#x0*2190);/g, val: '\u2190' },
uarr: { regex: /&(uarr|#0*8593|#x0*2191);/g, val: '\u2191' },
rarr: { regex: /&(rarr|#0*8594|#x0*2192);/g, val: '\u2192' },
darr: { regex: /&(darr|#0*8595|#x0*2193);/g, val: '\u2193' },
harr: { regex: /&(harr|#0*8596|#x0*2194);/g, val: '\u2194' },
lArr: { regex: /&(lArr|#0*8656|#x0*21[Dd]0);/g, val: '\u21d0' },
uArr: { regex: /&(uArr|#0*8657|#x0*21[Dd]1);/g, val: '\u21d1' },
rArr: { regex: /&(rArr|#0*8658|#x0*21[Dd]2);/g, val: '\u21d2' },
dArr: { regex: /&(dArr|#0*8659|#x0*21[Dd]3);/g, val: '\u21d3' },
hArr: { regex: /&(hArr|#0*8660|#x0*21[Dd]4);/g, val: '\u21d4' },
};
/**
* Numeric character references — decimal &#NNN; and hex &#xHH;
* These are function-replacers; they expand any valid code point.
*/
export const NUMERIC_ENTITIES = {
num_dec: {
regex: /&#0*([0-9]{1,7});/g,
val: (_, s) => fromCodePoint(s, 10, "&#"),
},
num_hex: {
regex: /&#x0*([0-9a-fA-F]{1,6});/g,
val: (_, s) => fromCodePoint(s, 16, "&#x"),
},
};
function fromCodePoint(str, base, prefix) {
const codePoint = Number.parseInt(str, base);
if (codePoint >= 0 && codePoint <= 0x10FFFF) {
return String.fromCodePoint(codePoint);
} else {
return prefix + str + ";";
}
}