Comparing version 3.1.0 to 4.0.0
@@ -1,2 +0,2 @@ | ||
var Regex=(()=>{var H=Object.defineProperty;var ge=Object.getOwnPropertyDescriptor;var me=Object.getOwnPropertyNames;var Ne=Object.prototype.hasOwnProperty;var he=(e,t)=>{for(var n in t)H(e,n,{get:t[n],enumerable:!0})},Ae=(e,t,n,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of me(t))!Ne.call(e,o)&&o!==n&&H(e,o,{get:()=>t[o],enumerable:!(r=ge(t,o))||r.enumerable});return e};var we=e=>Ae(H({},"__esModule",{value:!0}),e);var Ge={};he(Ge,{pattern:()=>M,regex:()=>Pe});var d=Object.freeze({DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"});function w(e,t,n,r){let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu"),i=0,s="";for(let u of e.matchAll(o)){let{0:a,groups:{skip:E}}=u;if(!E&&(!r||r===d.DEFAULT==!i)){n instanceof Function?s+=n(u):s+=n;continue}a==="["?i++:a==="]"&&i&&i--,s+=a}return s}function k(e,t,n,r){w(e,t,n,r)}function y(e,t,n=0,r){if(!new RegExp(t,"su").test(e))return null;let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu");o.lastIndex=n;let i=0,s;for(;s=o.exec(e);){let{0:u,groups:{skip:a}}=s;if(!a&&(!r||r===d.DEFAULT==!i))return s;u==="["?i++:u==="]"&&i&&i--,o.lastIndex==s.index&&o.lastIndex++}return null}function O(e,t,n){return!!y(e,t,0,n)}function Q(e,t){let n=/\\?./gsu;n.lastIndex=t;let r=e.length,o=0,i=1,s;for(;s=n.exec(e);){let[u]=s;if(u==="[")o++;else if(o)u==="]"&&o--;else if(u==="(")i++;else if(u===")"&&(i--,!i)){r=s.index;break}}return e.slice(t,r)}var _=class{#e;constructor(t){this.#e=t}toString(){return String(this.#e)}};function M(e,...t){if(Array.isArray(e?.raw))return new _(e.raw.flatMap((n,r)=>r<e.raw.length-1?[n,t[r]]:n).join(""));if(!t.length)return new _(e??"");throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var l={DEFAULT:"R_DEFAULT",CHAR_CLASS:"R_CHAR_CLASS",GROUP_NAME:"R_GROUP_NAME",ENCLOSED_TOKEN:"R_ENCLOSED_TOKEN",INTERVAL_QUANTIFIER:"R_INTERVAL_QUANTIFIER",INVALID_INCOMPLETE_TOKEN:"R_INVALID_INCOMPLETE_TOKEN"},p={DEFAULT:"CC_DEFAULT",RANGE:"CC_RANGE",ENCLOSED_TOKEN:"CC_ENCLOSED_TOKEN",Q_TOKEN:"CC_Q_TOKEN",INVALID_INCOMPLETE_TOKEN:"CC_INVALID_INCOMPLETE_TOKEN"},x=(()=>{try{new RegExp("(?i:)")}catch{return!1}return!0})(),te=(()=>{try{new RegExp("","v")}catch{return!1}return!0})(),D="&!#$%*+,.:;<=>?@^`~",V=String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`,v=String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${V}`,R=String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;function q(e,t){return t===d.CHAR_CLASS?e.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${D}]`,"g"),"\\$&"):e.replace(/[()\[\]{}|\\^$*+?.]/g,"\\$&")}function j(e){return e.replace(new RegExp(`^([${D}])(?!\\1)`),(t,n,r)=>`\\${t}${r+1===e.length?"":t}`)}function ne(e){return e.replace(/^\^/,"\\^^")}function G(e,t){return w(e,String.raw`\\0(?!\d)`,"\\u{0}",t)}function Y(e,t,n){let r=0;for(let[o]of e.matchAll(new RegExp(`[${q(t+n,d.CHAR_CLASS)}]`,"g")))if(r+=o===t?1:-1,r<0)return n;return r>0?t:""}function re(e,t,n){let r=e.replace(/\\./gsu,"");if(r.endsWith("\\"))return"\\";if(t===l.DEFAULT)return Y(r,"(",")");if(t===l.CHAR_CLASS&&!(n===p.ENCLOSED_TOKEN||n===p.Q_TOKEN))return Y(r,"[","]");if(t===l.ENCLOSED_TOKEN||t===l.INTERVAL_QUANTIFIER||n===p.ENCLOSED_TOKEN||n===p.Q_TOKEN){if(r.includes("}"))return"}"}else if(t===l.GROUP_NAME&&r.includes(">"))return">";return""}var ee=new RegExp(String.raw` | ||
var Regex=(()=>{var q=Object.defineProperty;var Ae=Object.getOwnPropertyDescriptor;var we=Object.getOwnPropertyNames;var Te=Object.prototype.hasOwnProperty;var Se=(e,t)=>{for(var n in t)q(e,n,{get:t[n],enumerable:!0})},Le=(e,t,n,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of we(t))!Te.call(e,o)&&o!==n&&q(e,o,{get:()=>t[o],enumerable:!(r=Ae(t,o))||r.enumerable});return e};var Ce=e=>Le(q({},"__esModule",{value:!0}),e);var ve={};Se(ve,{pattern:()=>j,regex:()=>Ke});var g=Object.freeze({DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"});function h(e,t,n,r){let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu"),i=0,s="";for(let u of e.matchAll(o)){let{0:l,groups:{skip:f}}=u;if(!f&&(!r||r===g.DEFAULT==!i)){n instanceof Function?s+=n(u):s+=n;continue}l==="["?i++:l==="]"&&i&&i--,s+=l}return s}function G(e,t,n,r){h(e,t,n,r)}function v(e,t,n=0,r){if(!new RegExp(t,"su").test(e))return null;let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu");o.lastIndex=n;let i=0,s;for(;s=o.exec(e);){let{0:u,groups:{skip:l}}=s;if(!l&&(!r||r===g.DEFAULT==!i))return s;u==="["?i++:u==="]"&&i&&i--,o.lastIndex==s.index&&o.lastIndex++}return null}function O(e,t,n){return!!v(e,t,0,n)}function W(e,t){let n=/\\?./gsu;n.lastIndex=t;let r=e.length,o=0,i=1,s;for(;s=n.exec(e);){let[u]=s;if(u==="[")o++;else if(o)u==="]"&&o--;else if(u==="(")i++;else if(u===")"&&(i--,!i)){r=s.index;break}}return e.slice(t,r)}var _=class{#e;constructor(t){this.#e=t}toString(){return String(this.#e)}};function j(e,...t){if(Array.isArray(e?.raw))return new _(e.raw.flatMap((n,r)=>r<e.raw.length-1?[n,t[r]]:n).join(""));if(!t.length)return new _(e??"");throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var c={DEFAULT:"R_DEFAULT",CHAR_CLASS:"R_CHAR_CLASS",GROUP_NAME:"R_GROUP_NAME",ENCLOSED_TOKEN:"R_ENCLOSED_TOKEN",INTERVAL_QUANTIFIER:"R_INTERVAL_QUANTIFIER",INVALID_INCOMPLETE_TOKEN:"R_INVALID_INCOMPLETE_TOKEN"},d={DEFAULT:"CC_DEFAULT",RANGE:"CC_RANGE",ENCLOSED_TOKEN:"CC_ENCLOSED_TOKEN",Q_TOKEN:"CC_Q_TOKEN",INVALID_INCOMPLETE_TOKEN:"CC_INVALID_INCOMPLETE_TOKEN"},K=(()=>{try{new RegExp("(?i:)")}catch{return!1}return!0})(),oe=(()=>{try{new RegExp("","v")}catch{return!1}return!0})(),R="&!#$%*+,.:;<=>?@^`~",C="$E$",H=String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`,U=String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${H}`,b=String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;function B(e,t){return t===g.CHAR_CLASS?e.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${R}]`,"g"),"\\$&"):e.replace(/[()\[\]{}|\\^$*+?.]/g,"\\$&")}function Q(e){return e.replace(new RegExp(`^([${R}])(?!\\1)`),(t,n,r)=>`\\${t}${r+1===e.length?"":t}`)}function se(e){return e.replace(/^\^/,"\\^^")}function M(e,t){return h(e,String.raw`\\0(?!\d)`,"\\u{0}",t)}function ne(e,t,n){let r=0;for(let[o]of e.matchAll(new RegExp(`[${B(t+n,g.CHAR_CLASS)}]`,"g")))if(r+=o===t?1:-1,r<0)return n;return r>0?t:""}function ie(e,t,n){let r=e.replace(/\\./gsu,"");if(r.endsWith("\\"))return"\\";if(t===c.DEFAULT)return ne(r,"(",")");if(t===c.CHAR_CLASS&&!(n===d.ENCLOSED_TOKEN||n===d.Q_TOKEN))return ne(r,"[","]");if(t===c.ENCLOSED_TOKEN||t===c.INTERVAL_QUANTIFIER||n===d.ENCLOSED_TOKEN||n===d.Q_TOKEN){if(r.includes("}"))return"}"}else if(t===c.GROUP_NAME&&r.includes(">"))return">";return""}var re=new RegExp(String.raw` | ||
(?<groupN>\(\?<(?![=!])|\\[gk]<) | ||
@@ -14,5 +14,5 @@ | (?<enclosedT>\\[pPu]\{) | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function U(e,{regexContext:t=l.DEFAULT,charClassContext:n=p.DEFAULT,charClassDepth:r=0,lastPos:o=0}){ee.lastIndex=o;let i;for(;i=ee.exec(e);){let{0:s,groups:{groupN:u,enclosedT:a,qT:E,intervalQ:g,incompleteT:c}}=i;s==="["?(r++,t=l.CHAR_CLASS,n=p.DEFAULT):s==="]"&&t===l.CHAR_CLASS?(r&&r--,r||(t=l.DEFAULT),n=p.DEFAULT):t===l.CHAR_CLASS?c?n=p.INVALID_INCOMPLETE_TOKEN:s==="-"?n=p.RANGE:a?n=p.ENCLOSED_TOKEN:E?n=p.Q_TOKEN:(s==="}"&&(n===p.ENCLOSED_TOKEN||n===p.Q_TOKEN)||n===p.INVALID_INCOMPLETE_TOKEN||n===p.RANGE)&&(n=p.DEFAULT):c?t=l.INVALID_INCOMPLETE_TOKEN:u?t=l.GROUP_NAME:a?t=l.ENCLOSED_TOKEN:g?t=l.INTERVAL_QUANTIFIER:(s===">"&&t===l.GROUP_NAME||s==="}"&&(t===l.ENCLOSED_TOKEN||t===l.INTERVAL_QUANTIFIER)||t===l.INVALID_INCOMPLETE_TOKEN)&&(t=l.DEFAULT)}return{regexContext:t,charClassContext:n,charClassDepth:r,lastPos:e.length}}function b(e){let t=0;return k(e,v,()=>t++,d.DEFAULT),t}function oe(e,t){return w(e,String.raw`\\(?<num>[1-9]\d*)`,({groups:{num:n}})=>`\\${+n+t}`,d.DEFAULT)}var Te=["Basic_Emoji","Emoji_Keycap_Sequence","RGI_Emoji_Modifier_Sequence","RGI_Emoji_Flag_Sequence","RGI_Emoji_Tag_Sequence","RGI_Emoji_ZWJ_Sequence","RGI_Emoji"].join("|"),_e=new RegExp(String.raw` | ||
`.replace(/\s+/g,""),"gsu");function F(e,{regexContext:t=c.DEFAULT,charClassContext:n=d.DEFAULT,charClassDepth:r=0,lastPos:o=0}={}){re.lastIndex=o;let i;for(;i=re.exec(e);){let{0:s,groups:{groupN:u,enclosedT:l,qT:f,intervalQ:p,incompleteT:a}}=i;s==="["?(r++,t=c.CHAR_CLASS,n=d.DEFAULT):s==="]"&&t===c.CHAR_CLASS?(r&&r--,r||(t=c.DEFAULT),n=d.DEFAULT):t===c.CHAR_CLASS?a?n=d.INVALID_INCOMPLETE_TOKEN:s==="-"?n=d.RANGE:l?n=d.ENCLOSED_TOKEN:f?n=d.Q_TOKEN:(s==="}"&&(n===d.ENCLOSED_TOKEN||n===d.Q_TOKEN)||n===d.INVALID_INCOMPLETE_TOKEN||n===d.RANGE)&&(n=d.DEFAULT):a?t=c.INVALID_INCOMPLETE_TOKEN:u?t=c.GROUP_NAME:l?t=c.ENCLOSED_TOKEN:p?t=c.INTERVAL_QUANTIFIER:(s===">"&&t===c.GROUP_NAME||s==="}"&&(t===c.ENCLOSED_TOKEN||t===c.INTERVAL_QUANTIFIER)||t===c.INVALID_INCOMPLETE_TOKEN)&&(t=c.DEFAULT)}return{regexContext:t,charClassContext:n,charClassDepth:r,lastPos:e.length}}function k(e){let t=0;return G(e,U,()=>t++,g.DEFAULT),t}function ue(e,t){return h(e,String.raw`\\(?<num>[1-9]\d*)`,({groups:{num:n}})=>`\\${+n+t}`,g.DEFAULT)}var _e=["Basic_Emoji","Emoji_Keycap_Sequence","RGI_Emoji_Modifier_Sequence","RGI_Emoji_Flag_Sequence","RGI_Emoji_Tag_Sequence","RGI_Emoji_ZWJ_Sequence","RGI_Emoji"].join("|"),Ie=new RegExp(String.raw` | ||
\\(?: c[A-Za-z] | ||
| p\{(?<pStrProp>${Te})\} | ||
| p\{(?<pStrProp>${_e})\} | ||
| [pP]\{[^\}]+\} | ||
@@ -27,8 +27,8 @@ | (?<qStrProp>q) | ||
| . | ||
`.replace(/\s+/g,""),"gsu");function W(e){let t=!1,n;for(let{0:r,groups:o}of e.matchAll(_e)){if(o.pStrProp||o.qStrProp||r==="["&&t)return!0;if(["-","--","&&"].includes(r))t=!1;else if(!["[","]"].includes(r)){if(t||n==="]")return!0;t=!0}n=r}return!1}function z(e,t,n){let r={raw:[]},o=[],i={};return e.raw.forEach((s,u)=>{let a=n(s,{...i,lastPos:0});if(r.raw.push(a.transformed),i=a.runningContext,u<e.raw.length-1){let E=t[u];if(E instanceof _){let g=n(E,{...i,lastPos:0});o.push(M(g.transformed)),i=g.runningContext}else o.push(E)}}),{template:r,substitutions:o}}var Se=new RegExp(String.raw` | ||
${R} | ||
`.replace(/\s+/g,""),"gsu");function z(e){let t=!1,n;for(let{0:r,groups:o}of e.matchAll(Ie)){if(o.pStrProp||o.qStrProp||r==="["&&t)return!0;if(["-","--","&&"].includes(r))t=!1;else if(!["[","]"].includes(r)){if(t||n==="]")return!0;t=!0}n=r}return!1}function Z(e,t,n){let r={raw:[]},o=[],i;return e.raw.forEach((s,u)=>{let l=n(s,{...i,lastPos:0});if(r.raw.push(l.transformed),i=l.runningContext,u<e.raw.length-1){let f=t[u];if(f instanceof _){let p=n(f,{...i,lastPos:0});o.push(j(p.transformed)),i=p.runningContext}else o.push(f)}}),{template:r,substitutions:o}}var $e=new RegExp(String.raw` | ||
${b} | ||
| \(\?< | ||
| (?<backrefNum>\\[1-9]\d*) | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function se(e,t){e=String(e);let n="",r="";for(let{0:o,groups:{backrefNum:i}}of e.matchAll(Se)){n+=o,t=U(n,t);let{regexContext:s}=t;if(s===l.DEFAULT)if(o==="(")r+="(?:";else{if(i)throw new Error(`Invalid decimal escape "${o}" with implicit flag n; replace with named backreference`);r+=o}else r+=o}return{transformed:r,runningContext:t}}var ie=/^\s$/,Le=/^\\[\s#]$/,ae=/^[ \t]$/,Ce=/^\\[ \t]$/,Ie=new RegExp(String.raw` | ||
`.replace(/\s+/g,""),"gsu");function ae(e,t){e=String(e);let n="",r="";for(let{0:o,groups:{backrefNum:i}}of e.matchAll($e)){n+=o,t=F(n,t);let{regexContext:s}=t;if(s===c.DEFAULT)if(o==="(")r+="(?:";else{if(i)throw new Error(`Invalid decimal escape "${o}" with implicit flag n; replace with named backreference`);r+=o}else r+=o}return{transformed:r,runningContext:t}}var le=/^\s$/,De=/^\\[\s#]$/,ce=/^[ \t]$/,Oe=/^\\[ \t]$/,Re=new RegExp(String.raw` | ||
\\(?: [gk]< | ||
@@ -42,22 +42,22 @@ | [pPu]\{ | ||
| \[\^ | ||
| ${R} | ||
| ${b} | ||
| \(\?< | ||
| (?<dp>[${D}])\k<dp> | ||
| (?<dp>[${R}])\k<dp> | ||
| -- | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function ue(e,t){e=String(e);let n=!1,r=!1,o=!1,i="",s="",u="",a="",E=!1,g=(c,{prefix:f=!0,postfix:m=!1}={})=>(c=(E&&f?"(?:)":"")+c+(m?"(?:)":""),E=!1,c);for(let[c]of e.matchAll(Ie)){if(o){c===` | ||
`&&(o=!1,E=!0);continue}if(n){if(ie.test(c))continue;n=!1,E=!0}else if(r){if(ae.test(c))continue;r=!1}i+=c,t=U(i,t);let{regexContext:f,charClassContext:m}=t;if(c==="-"&&f===l.CHAR_CLASS&&a===p.RANGE)throw new Error("Invalid unescaped hyphen as the end value for a range");if(f===l.DEFAULT&&/^(?:[?*+]|\?\?)$/.test(c)||f===l.INTERVAL_QUANTIFIER&&c==="{")s+=g(c,{prefix:!1,postfix:u==="("&&c==="?"});else if(f===l.DEFAULT)ie.test(c)?n=!0:c.startsWith("#")?o=!0:Le.test(c)?s+=g(c[1],{prefix:!1}):s+=g(c);else if(f===l.CHAR_CLASS&&c!=="["&&c!=="[^")if(ae.test(c)&&(m===p.DEFAULT||m===p.RANGE||m===p.Q_TOKEN))r=!0;else{if(m===p.INVALID_INCOMPLETE_TOKEN)throw new Error(`Invalid incomplete token in character class: "${c}"`);Ce.test(c)&&(m===p.DEFAULT||m===p.Q_TOKEN)?s+=g(c[1],{prefix:!1}):m===p.DEFAULT?s+=g(j(G(c))):s+=g(c)}else s+=g(c);n||r||o||(u=c,a=m)}return{transformed:s,runningContext:t}}function ce(e){let t=String.raw`\(\?:\)`;return e=w(e,`(?:${t}){2,}`,"(?:)",d.DEFAULT),e=w(e,String.raw`${t}(?=[)|.[$\\]|\((?!DEFINE)|$)|(?<=[()|.\]^>]|\\[bBdDfnrsStvwW]|\(\?(?:[:=!]|<[=!])|^)${t}(?![?*+{])`,"",d.DEFAULT),e}function le(e){if(!O(e,"\\(\\?>",d.DEFAULT))return e;let t=new RegExp(String.raw`(?<noncapturingStart>${R})|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`,"gsu"),n="(?>",r="(?:(?=(",o=0,i=0,s=NaN,u;do{u=!1;let a=0,E=0,g=!1,c;for(t.lastIndex=Number.isNaN(s)?0:s+r.length;c=t.exec(e);){let{0:f,index:m,groups:{backrefNum:N,capturingStart:T,noncapturingStart:A}}=c;if(f==="[")a++;else if(a)f==="]"&&a--;else if(f===n&&!g)s=m,g=!0;else if(g&&A)E++;else if(T)g&&E++,o++;else if(f===")"&&g){if(!E){i++,e=`${e.slice(0,s)}${r}${e.slice(s+n.length,m)}))\\k<$$${i+o}>)${e.slice(m+1)}`,u=!0;break}E--}else if(N)throw new Error(`Invalid decimal escape "${f}" in interpolated regex; cannot be used with atomic group`)}}while(u);return e=w(e,String.raw`\\k<\$\$(?<backrefNum>\d+)>`,({groups:{backrefNum:a}})=>`\\${a}`,d.DEFAULT),e}function pe(e){let t=Ee(e,{includeContents:!0}),n=$e(e,t);return De(n,t)}var Oe=String.raw`\\g<(?<subroutineName>[^>&]+)>`,K=new RegExp(String.raw` | ||
${Oe} | ||
| (?<capturingStart>${v}) | ||
`.replace(/\s+/g,""),"gsu");function fe(e,t){e=String(e);let n=!1,r=!1,o=!1,i="",s="",u="",l="",f=!1,p=(a,{prefix:m=!0,postfix:E=!1}={})=>(a=(f&&m?"(?:)":"")+a+(E?"(?:)":""),f=!1,a);for(let[a]of e.matchAll(Re)){if(o){a===` | ||
`&&(o=!1,f=!0);continue}if(n){if(le.test(a))continue;n=!1,f=!0}else if(r){if(ce.test(a))continue;r=!1}i+=a,t=F(i,t);let{regexContext:m,charClassContext:E}=t;if(a==="-"&&m===c.CHAR_CLASS&&l===d.RANGE)throw new Error("Invalid unescaped hyphen as the end value for a range");if(m===c.DEFAULT&&/^(?:[?*+]|\?\?)$/.test(a)||m===c.INTERVAL_QUANTIFIER&&a==="{")s+=p(a,{prefix:!1,postfix:u==="("&&a==="?"});else if(m===c.DEFAULT)le.test(a)?n=!0:a.startsWith("#")?o=!0:De.test(a)?s+=p(a[1],{prefix:!1}):s+=p(a);else if(m===c.CHAR_CLASS&&a!=="["&&a!=="[^")if(ce.test(a)&&(E===d.DEFAULT||E===d.RANGE||E===d.Q_TOKEN))r=!0;else{if(E===d.INVALID_INCOMPLETE_TOKEN)throw new Error(`Invalid incomplete token in character class: "${a}"`);Oe.test(a)&&(E===d.DEFAULT||E===d.Q_TOKEN)?s+=p(a[1],{prefix:!1}):E===d.DEFAULT?s+=p(Q(M(a))):s+=p(a)}else s+=p(a);n||r||o||(u=a,l=E)}return{transformed:s,runningContext:t}}function pe(e){let t=String.raw`\(\?:\)`;e=h(e,`(?:${t}){2,}`,"(?:)",g.DEFAULT);let n=C.replace(/\$/g,"\\$");return e=h(e,String.raw`(?:${t}(?=[)|.[$\\]|\((?!DEFINE)|$)|(?<=[()|.\]^>]|\\[bBdDfnrsStvwW]|\(\?(?:[:=!]|<[=!])|^)${t}(?![?*+{]))(?!${n})`,"",g.DEFAULT),e}var Ee=new RegExp(String.raw`(?<noncapturingStart>${b})|(?<capturingStart>\((?:\?<[^>]+>)?)|\\?.`,"gsu");function ge(e,t){if(!O(e,"\\(\\?>",g.DEFAULT))return e;let n="(?>",r=`(?:(?=(${t.useEmulationGroups?C:""}`,o=[0],i=0,s=0,u=NaN,l;do{l=!1;let f=0,p=0,a=!1,m;for(Ee.lastIndex=Number.isNaN(u)?0:u+r.length;m=Ee.exec(e);){let{0:E,index:A,groups:{capturingStart:N,noncapturingStart:S}}=m;if(E==="[")f++;else if(f)E==="]"&&f--;else if(E===n&&!a)u=A,a=!0;else if(a&&S)p++;else if(N)a?p++:(i++,o.push(i+s));else if(E===")"&&a){if(!p){s++,e=`${e.slice(0,u)}${r}${e.slice(u+n.length,A)}))<$$${s+i}>)${e.slice(A+1)}`,l=!0;break}p--}}}while(l);return e=h(e,String.raw`\\(?<backrefNum>[1-9]\d*)|<\$\$(?<wrappedBackrefNum>\d+)>`,({0:f,groups:{backrefNum:p,wrappedBackrefNum:a}})=>{if(p){let m=+p;if(m>o.length-1)throw new Error(`Backref "${f}" greater than number of captures`);return`\\${o[m]}`}return`\\${a}`},g.DEFAULT),e}function me(e,t){let n=Ne(e,{includeContents:!0}),r=be(e,n,t.useEmulationGroups);return Fe(r,n)}var Ue=String.raw`\\g<(?<subroutineName>[^>&]+)>`,y=new RegExp(String.raw` | ||
${Ue} | ||
| (?<capturingStart>${U}) | ||
| \\(?<backrefNum>[1-9]\d*) | ||
| \\k<(?<backrefName>[^>]+)> | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function $e(e,t){if(!O(e,"\\\\g<",d.DEFAULT))return e;let n=O(e,"\\\\(?:[1-9]|k<[^>]+>)",d.DEFAULT),r=n?"(":"(?:",o=new Map,i=[],s=[0],u=0,a=0,E=0,g=0,c=0,f=e,m;for(K.lastIndex=0;m=K.exec(f);){let{0:N,index:T,groups:{subroutineName:A,capturingStart:F,backrefNum:L,backrefName:$}}=m;if(N==="[")c++;else if(c)N==="]"&&c--;else if(A){if(!t.has(A))throw new Error(`Invalid named capture referenced by subroutine ${N}`);if(o.has(A))throw new Error(`Subroutine ${N} followed a recursive reference`);let h=t.get(A).contents,C=`${r}${h})`;n&&(E=0,a++),o.set(A,{unclosedGroupCount:Re(C)}),i.push(A),f=J(f,T,N,C),K.lastIndex-=N.length-r.length}else if(F)o.size?(n&&(E++,a++),N!=="("&&(f=J(f,T,N,r),K.lastIndex-=N.length-r.length)):n&&(s.push(Z(s)+1+a-g),g=a,u++);else if((L||$)&&o.size){let h=L?+L:t.get($)?.groupNum,C=!1;for(let S of i){let I=t.get(S);if(h>=I.groupNum&&h<=I.groupNum+I.numCaptures){C=!0;break}}if(C){let S=t.get(Z(i)),I=u+a-E,P=`\\k<$$b${h}s${I}r${S.groupNum}c${S.numCaptures}>`;f=J(f,T,N,P),K.lastIndex+=P.length-N.length}}else if(N===")"&&o.size){let h=o.get(Z(i));h.unclosedGroupCount--,h.unclosedGroupCount||o.delete(i.pop())}}return n&&(f=w(f,String.raw`\\(?:(?<bNum>[1-9]\d*)|k<\$\$b(?<bNumSub>\d+)s(?<subNum>\d+)r(?<refNum>\d+)c(?<refCaps>\d+)>)`,({0:N,groups:{bNum:T,bNumSub:A,subNum:F,refNum:L,refCaps:$}})=>{if(T){let P=+T;if(P>s.length-1)throw new Error(`Backref "${N}" greater than number of captures`);return`\\${s[P]}`}let h=+A,C=+F,S=+L,I=+$;return h<S||h>S+I?`\\${s[h]}`:`\\${C-S+h}`},d.DEFAULT)),f}var B=new RegExp(String.raw`${V}|\(\?:\)|(?<invalid>\\?.)`,"gsu");function De(e,t){let n=y(e,String.raw`\(\?\(DEFINE\)`,0,d.DEFAULT);if(!n)return e;let r=fe(e,n);if(r.afterPos<e.length)throw new Error("DEFINE group allowed only at the end of a regex");if(r.afterPos>e.length)throw new Error("DEFINE group is unclosed");let o;for(B.lastIndex=0;o=B.exec(r.contents);){let{captureName:i,invalid:s}=o.groups;if(i){let u=fe(r.contents,o),a;if(!t.get(i).isUnique)a=i;else{let E=Ee(u.contents);for(let g of E.keys())if(!t.get(g).isUnique){a=g;break}}if(a)throw new Error(`Duplicate group name "${a}" within DEFINE`);B.lastIndex=u.afterPos}else if(s)throw new Error("DEFINE group includes unsupported syntax at top level")}return e.slice(0,n.index)}function Re(e){let t=0;return k(e,"\\(",()=>t++,d.DEFAULT),t}function Ue(e,t){let n=0,r=0,o;for(;o=y(e,v,r,d.DEFAULT);){let{0:i,index:s,groups:{captureName:u}}=o;if(n++,u===t)break;r=s+i.length}return n}function fe(e,t){let n=t.index+t[0].length,r=Q(e,n),o=n+r.length+1;return{contents:r,afterPos:o}}function Ee(e,{includeContents:t}={}){let n=new Map;return k(e,V,({0:r,index:o,groups:{captureName:i}})=>{if(n.has(i))n.get(i).isUnique=!1;else{let s={isUnique:!0};if(t){let u=Q(e,o+r.length);Object.assign(s,{contents:u,groupNum:Ue(e,i),numCaptures:b(u)})}n.set(i,s)}},d.DEFAULT),n}function Z(e){return e[e.length-1]}function J(e,t,n,r){return e.slice(0,t)+r+e.slice(t+n.length)}var be="&!#%,:;<=>@`~",Fe=new RegExp(String.raw` | ||
`.replace(/\s+/g,""),"gsu");function be(e,t,n){if(!O(e,"\\\\g<",g.DEFAULT))return e;let r=O(e,"\\\\(?:[1-9]|k<[^>]+>)",g.DEFAULT),o=r?`(${n?C:""}`:"(?:",i=new Map,s=[],u=[0],l=0,f=0,p=0,a=0,m=0,E=e,A;for(y.lastIndex=0;A=y.exec(E);){let{0:N,index:S,groups:{subroutineName:L,capturingStart:P,backrefNum:T,backrefName:V}}=A;if(N==="[")m++;else if(m)N==="]"&&m--;else if(L){if(!t.has(L))throw new Error(`Invalid named capture referenced by subroutine ${N}`);if(i.has(L))throw new Error(`Subroutine ${N} followed a recursive reference`);let w=t.get(L).contents,$=`${o}${w})`;r&&(p=0,f++),i.set(L,{unclosedGroupCount:ke($)}),s.push(L),E=Y(E,S,N,$),y.lastIndex-=N.length-o.length}else if(P)i.size?(r&&(p++,f++),N!=="("&&(E=Y(E,S,N,o),y.lastIndex-=N.length-o.length)):r&&(u.push(X(u)+1+f-a),a=f,l++);else if((T||V)&&i.size){let w=T?+T:t.get(V)?.groupNum,$=!1;for(let I of s){let D=t.get(I);if(w>=D.groupNum&&w<=D.groupNum+D.numCaptures){$=!0;break}}if($){let I=t.get(X(s)),D=l+f-p,x=`\\k<$$b${w}s${D}r${I.groupNum}c${I.numCaptures}>`;E=Y(E,S,N,x),y.lastIndex+=x.length-N.length}}else if(N===")"&&i.size){let w=i.get(X(s));w.unclosedGroupCount--,w.unclosedGroupCount||i.delete(s.pop())}}return r&&(E=h(E,String.raw`\\(?:(?<bNum>[1-9]\d*)|k<\$\$b(?<bNumSub>\d+)s(?<subNum>\d+)r(?<refNum>\d+)c(?<refCaps>\d+)>)`,({0:N,groups:{bNum:S,bNumSub:L,subNum:P,refNum:T,refCaps:V}})=>{if(S){let x=+S;if(x>u.length-1)throw new Error(`Backref "${N}" greater than number of captures`);return`\\${u[x]}`}let w=+L,$=+P,I=+T,D=+V;return w<I||w>I+D?`\\${u[w]}`:`\\${$-I+w}`},g.DEFAULT)),E}var J=new RegExp(String.raw`${H}|\(\?:\)|(?<invalid>\\?.)`,"gsu");function Fe(e,t){let n=v(e,String.raw`\(\?\(DEFINE\)`,0,g.DEFAULT);if(!n)return e;let r=de(e,n);if(r.afterPos<e.length)throw new Error("DEFINE group allowed only at the end of a regex");if(r.afterPos>e.length)throw new Error("DEFINE group is unclosed");let o;for(J.lastIndex=0;o=J.exec(r.contents);){let{captureName:i,invalid:s}=o.groups;if(i){let u=de(r.contents,o),l;if(!t.get(i).isUnique)l=i;else{let f=Ne(u.contents);for(let p of f.keys())if(!t.get(p).isUnique){l=p;break}}if(l)throw new Error(`Duplicate group name "${l}" within DEFINE`);J.lastIndex=u.afterPos}else if(s)throw new Error("DEFINE group includes unsupported syntax at top level")}return e.slice(0,n.index)}function ke(e){let t=0;return G(e,"\\(",()=>t++,g.DEFAULT),t}function Pe(e,t){let n=0,r=0,o;for(;o=v(e,U,r,g.DEFAULT);){let{0:i,index:s,groups:{captureName:u}}=o;if(n++,u===t)break;r=s+i.length}return n}function de(e,t){let n=t.index+t[0].length,r=W(e,n),o=n+r.length+1;return{contents:r,afterPos:o}}function Ne(e,{includeContents:t}={}){let n=new Map;return G(e,H,({0:r,index:o,groups:{captureName:i}})=>{if(n.has(i))n.get(i).isUnique=!1;else{let s={isUnique:!0};if(t){let u=W(e,o+r.length);Object.assign(s,{contents:u,groupNum:Pe(e,i),numCaptures:k(u)})}n.set(i,s)}},g.DEFAULT),n}function X(e){return e[e.length-1]}function Y(e,t,n,r){return e.slice(0,t)+r+e.slice(t+n.length)}var xe="&!#%,:;<=>@`~",Ge=new RegExp(String.raw` | ||
\[\^?-? | ||
| --?\] | ||
| (?<dp>[${D}])\k<dp> | ||
| (?<dp>[${R}])\k<dp> | ||
| -- | ||
| \\(?<vOnlyEscape>[${be}]) | ||
| \\(?<vOnlyEscape>[${xe}]) | ||
| \\[pPu]\{[^}]+\} | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function de(e,t){let n='Invalid unescaped "-" in character class',r=!1,o=!1,i="";for(let{0:s,groups:{dp:u,vOnlyEscape:a}}of e.matchAll(Fe)){if(s[0]==="["){if(r)throw new Error("Invalid nested character class when flag v not supported; possibly from interpolation");if(s.endsWith("-"))throw new Error(n);r=!0,o=s[1]==="^"}else if(s.endsWith("]")){if(s[0]==="-")throw new Error(n);r=o=!1}else if(r){if(s==="&&"||s==="--")throw new Error(`Invalid set operator "${s}" when flag v not supported`);if(u)throw new Error(`Invalid double punctuator "${s}", reserved by flag v`);if("(){}/|".includes(s))throw new Error(`Invalid unescaped "${s}" in character class`);if(o&&s.startsWith("\\P")&&t.includes("i"))throw new Error("Negated \\P in negated character class with flag i works differently with flag v");if(a){i+=a;continue}}i+=s}return i}function Pe(e,...t){let n=this instanceof Function?this:RegExp;if(Array.isArray(e?.raw))return X(n,{flags:""},e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return X.bind(null,n,{flags:e});if({}.toString.call(e)==="[object Object]"&&!t.length)return X.bind(null,n,e);throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}function X(e,t,n,...r){let{flags:o="",postprocessors:i=[],__extendSyntax:s=t.__flagN??!0,__flagN:u=!0,__flagV:a=te,__flagX:E=!0,__rake:g=!0}=t;if(/[vu]/.test(o))throw new Error("Flags v/u cannot be explicitly added");E&&({template:n,substitutions:r}=z(n,r,ue)),u&&({template:n,substitutions:r}=z(n,r,se));let c=0,f="",m={};n.raw.forEach((T,A)=>{let F=!!(n.raw[A]||n.raw[A+1]);c+=b(T),f+=G(T,d.CHAR_CLASS),m=U(f,m);let{regexContext:L,charClassContext:$}=m;if(A<n.raw.length-1){let h=r[A];f+=ke(h,o,L,$,F,c),h instanceof RegExp?c+=b(h.source):h instanceof _&&(c+=b(String(h)))}});let N=[...i];return s&&N.push(le,pe),a||N.push(de),g&&N.push(ce),N.forEach(T=>f=T(f,o)),new e(f,(a?"v":"u")+o)}function ke(e,t,n,r,o,i){if(e instanceof RegExp&&n!==l.DEFAULT)throw new Error("Cannot interpolate a RegExp at this position because the syntax context does not match");if(n===l.INVALID_INCOMPLETE_TOKEN||r===p.INVALID_INCOMPLETE_TOKEN)throw new Error("Interpolation preceded by invalid incomplete token");let s=e instanceof _,u;if(!(e instanceof RegExp)){e=String(e),s||(u=q(e,n===l.CHAR_CLASS?d.CHAR_CLASS:d.DEFAULT));let a=re(u||e,n,r);if(a)throw new Error(`Unescaped stray "${a}" in the interpolated value would have side effects outside it`)}if(n===l.ENCLOSED_TOKEN||n===l.INTERVAL_QUANTIFIER||n===l.GROUP_NAME||r===p.ENCLOSED_TOKEN||r===p.Q_TOKEN)return s?e:u;if(n===l.CHAR_CLASS){if(s){if(O(e,"^-|^&&|-$|&&$"))throw new Error("Cannot use range or set operator at boundary of interpolated pattern; move the operation into the pattern or the operator outside of it");let a=ne(j(e));return W(e)?`[${a}]`:G(a)}return W(u)?`[${u}]`:u}if(e instanceof RegExp){let a=xe(e,t),E=oe(a.value,i);return a.usedModifier?E:`(?:${E})`}return s?`(?:${e})`:o?`(?:${u})`:u}function xe(e,t){let n={i:null,m:null,s:null},r="\\n\\r\\u2028\\u2029",o=e.source;if(e.ignoreCase!==t.includes("i"))if(x)n.i=e.ignoreCase;else throw new Error("Pattern modifiers not supported, so the value of flag i on the interpolated RegExp must match the outer regex");if(e.dotAll!==t.includes("s")&&(x?n.s=e.dotAll:o=w(o,"\\.",e.dotAll?"[^]":`[^${r}]`,d.DEFAULT)),e.multiline!==t.includes("m")&&(x?n.m=e.multiline:(o=w(o,"\\^",e.multiline?`(?<=^|[${r}])`:"(?<![^])",d.DEFAULT),o=w(o,"\\$",e.multiline?`(?=$|[${r}])`:"(?![^])",d.DEFAULT))),x){let i=Object.keys(n),s=i.filter(a=>n[a]===!0).join(""),u=i.filter(a=>n[a]===!1).join("");if(u&&(s+=`-${u}`),s)return{value:`(?${s}:${o})`,usedModifier:!0}}return{value:o}}return we(Ge);})(); | ||
`.replace(/\s+/g,""),"gsu");function he(e){let t='Invalid unescaped "-" in character class',n=!1,r="";for(let{0:o,groups:{dp:i,vOnlyEscape:s}}of e.matchAll(Ge)){if(o[0]==="["){if(n)throw new Error("Invalid nested character class when flag v not supported; possibly from interpolation");if(o.endsWith("-"))throw new Error(t);n=!0}else if(o.endsWith("]")){if(o[0]==="-")throw new Error(t);n=!1}else if(n){if(o==="&&"||o==="--")throw new Error(`Invalid set operator "${o}" when flag v not supported`);if(i)throw new Error(`Invalid double punctuator "${o}", reserved by flag v`);if("(){}/|".includes(o))throw new Error(`Invalid unescaped "${o}" in character class`);if(s){r+=s;continue}}r+=o}return r}var Ke=(e,...t)=>{if(Array.isArray(e?.raw))return ee({},e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return ee.bind(null,{flags:e});if({}.toString.call(e)==="[object Object]"&&!t.length)return ee.bind(null,e);throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)},ee=(e,t,...n)=>{let{flags:r="",subclass:o=!1,plugins:i=[],unicodeSetsPlugin:s=he,disable:u={},force:l={}}=e;if(/[vu]/.test(r))throw new Error("Flags v/u cannot be explicitly added");let f=l.v||(u.v?!1:oe),p=(f?"v":"u")+r;u.x||({template:t,substitutions:n}=Z(t,n,fe)),u.n||({template:t,substitutions:n}=Z(t,n,ae));let a=0,m="",E;if(t.raw.forEach((A,N)=>{let S=!!(t.raw[N]||t.raw[N+1]);a+=k(A),m+=M(A,g.CHAR_CLASS),E=F(m,E);let{regexContext:L,charClassContext:P}=E;if(N<t.raw.length-1){let T=n[N];m+=Me(T,r,L,P,S,a),T instanceof RegExp?a+=k(T.source):T instanceof _&&(a+=k(String(T)))}}),[...i,...u.atomic?[]:[ge],...u.subroutines?[]:[me],...u.x?[]:[pe],...f||!s?[]:[s]].forEach(A=>m=A(m,{flags:p,useEmulationGroups:o})),o){let A=Ve(m);return new te(A.expression,p,{captureNums:A.captureNums})}return new RegExp(m,p)},te=class e extends RegExp{#e;constructor(t,n,r){super(t,n),r?this.#e=r.captureNums:t instanceof e&&(this.#e=t.#e)}exec(t){let n=RegExp.prototype.exec.call(this,t);if(!n||!this.#e)return n;let r=[...n];n.length=1;for(let o=1;o<r.length;o++)this.#e[o]!==null&&n.push(r[o]);return n}};function Me(e,t,n,r,o,i){if(e instanceof RegExp&&n!==c.DEFAULT)throw new Error("Cannot interpolate a RegExp at this position because the syntax context does not match");if(n===c.INVALID_INCOMPLETE_TOKEN||r===d.INVALID_INCOMPLETE_TOKEN)throw new Error("Interpolation preceded by invalid incomplete token");let s=e instanceof _,u="";if(!(e instanceof RegExp)){e=String(e),s||(u=B(e,n===c.CHAR_CLASS?g.CHAR_CLASS:g.DEFAULT));let l=ie(u||e,n,r);if(l)throw new Error(`Unescaped stray "${l}" in the interpolated value would have side effects outside it`)}if(n===c.ENCLOSED_TOKEN||n===c.INTERVAL_QUANTIFIER||n===c.GROUP_NAME||r===d.ENCLOSED_TOKEN||r===d.Q_TOKEN)return s?String(e):u;if(n===c.CHAR_CLASS){if(s){if(O(String(e),"^-|^&&|-$|&&$"))throw new Error("Cannot use range or set operator at boundary of interpolated pattern; move the operation into the pattern or the operator outside of it");let l=se(Q(e));return z(e)?`[${l}]`:M(l)}return z(u)?`[${u}]`:u}if(e instanceof RegExp){let l=ye(e,t),f=ue(l.value,i);return l.usedModifier?f:`(?:${f})`}return s?`(?:${e})`:o?`(?:${u})`:u}function ye(e,t){let n={i:null,m:null,s:null},r="\\n\\r\\u2028\\u2029",o=e.source;if(e.ignoreCase!==t.includes("i"))if(K)n.i=e.ignoreCase;else throw new Error("Pattern modifiers not supported, so the value of flag i on the interpolated RegExp must match the outer regex");if(e.dotAll!==t.includes("s")&&(K?n.s=e.dotAll:o=h(o,"\\.",e.dotAll?"[^]":`[^${r}]`,g.DEFAULT)),e.multiline!==t.includes("m")&&(K?n.m=e.multiline:(o=h(o,"\\^",e.multiline?`(?<=^|[${r}])`:"(?<![^])",g.DEFAULT),o=h(o,"\\$",e.multiline?`(?=$|[${r}])`:"(?![^])",g.DEFAULT))),K){let i=Object.keys(n),s=i.filter(l=>n[l]===!0).join(""),u=i.filter(l=>n[l]===!1).join("");if(u&&(s+=`-${u}`),s)return{value:`(?${s}:${o})`,usedModifier:!0}}return{value:o}}function Ve(e){let t=C.replace(/\$/g,"\\$"),n=[0],r=0;return e=h(e,`(?:${U})${t}`,({0:o})=>(r++,o.endsWith(C)?(n.push(null),o.slice(0,-C.length)):(n.push(r),o)),g.DEFAULT),{captureNums:n,expression:e}}return Ce(ve);})(); |
{ | ||
"name": "regex", | ||
"version": "3.1.0", | ||
"version": "4.0.0", | ||
"description": "Regex template tag with extended syntax, context-aware interpolation, and always-on best practices", | ||
@@ -14,2 +14,3 @@ "author": "Steven Levithan", | ||
}, | ||
"browser": "./dist/regex.min.js", | ||
"scripts": { | ||
@@ -21,7 +22,7 @@ "bundle:global": "esbuild src/regex.js --global-name=Regex --bundle --minify --outfile=dist/regex.min.js", | ||
"build": "npm run bundle:global && npm run bundle:esm && npm run types", | ||
"test": "jasmine", | ||
"prepare": "npm run build && npm test" | ||
"pretest": "npm run build", | ||
"test": "jasmine && tsc --project spec/types/tsconfig.test.json", | ||
"prepare": "npm test" | ||
}, | ||
"files": [ | ||
"src", | ||
"dist", | ||
@@ -43,2 +44,3 @@ "types" | ||
"esbuild": "^0.23.0", | ||
"expect-type": "^0.19.0", | ||
"jasmine": "^5.2.0", | ||
@@ -45,0 +47,0 @@ "rimraf": "^6.0.1", |
280
README.md
<div align="center"> | ||
<a href="https://github.com/slevithan/regex#readme"><img src="https://github.com/slevithan/regex/raw/main/regex-logo.svg" height="130" alt="regex logo"></a> | ||
<a href="https://github.com/slevithan/regex#readme"><img src="https://github.com/slevithan/regex/raw/main/regex-logo.svg" height="180" alt="regex logo"></a> | ||
@@ -10,7 +10,7 @@ [![build status](https://github.com/slevithan/regex/workflows/CI/badge.svg)](https://github.com/slevithan/regex/actions) | ||
`regex` is a template tag that extends JavaScript regular expressions with features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that equal or exceed native performance. It's also lightweight, supports all ES2024+ regex features, and can be used as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex) to avoid any runtime dependencies or added runtime cost. | ||
`regex` is a template tag that extends JavaScript regular expressions with features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that equal or exceed native performance. It's also lightweight, supports all ES2025 regex features, and can be used as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex) to avoid any runtime dependencies or added runtime cost. | ||
Highlights include support for free spacing and comments, atomic groups via `(?>…)` that can help you avoid [ReDoS](https://en.wikipedia.org/wiki/ReDoS), subroutines via `\g<name>` and definition groups via `(?(DEFINE)…)` that enable powerful subpattern composition, and context-aware interpolation of regexes, escaped strings, and partial patterns. | ||
Highlights include support for free spacing and comments, atomic groups via `(?>…)` that can help you avoid [ReDoS](https://en.wikipedia.org/wiki/ReDoS), subroutines via `\g<name>` and subroutine definition groups via `(?(DEFINE)…)` that enable powerful subpattern composition, and context-aware interpolation of regexes, escaped strings, and partial patterns. | ||
With the `regex` package, JavaScript steps up as one of the best regex flavors alongside PCRE and Perl, and maybe surpassing C++, Java, .NET, and Python. | ||
With the `regex` library, JavaScript steps up as one of the best regex flavors alongside PCRE and Perl, possibly surpassing C++, Java, .NET, Python, and Ruby. | ||
@@ -24,6 +24,6 @@ <details> | ||
- [Context](#-context) | ||
- [New regex syntax](#-new-regex-syntax) | ||
- [Extended regex syntax](#-extended-regex-syntax) | ||
- [Atomic groups](#atomic-groups) | ||
- [Subroutines](#subroutines) | ||
- [Definition groups](#definition-groups) | ||
- [Subroutine definition groups](#subroutine-definition-groups) | ||
- [Recursion](#recursion) | ||
@@ -41,2 +41,3 @@ - [Flags](#-flags) | ||
- [Interpolation contexts](#interpolation-contexts) | ||
- [Options](#-options) | ||
- [Performance](#-performance) | ||
@@ -50,10 +51,11 @@ - [Compatibility](#-compatibility) | ||
- **A modern regex baseline** so you don't need to continually opt-in to best practices. | ||
- Always-on flag <kbd>v</kbd> gives you the best level of Unicode support and strict errors. In environments without native <kbd>v</kbd>, flag <kbd>u</kbd> is used with <kbd>v</kbd>'s rules applied. | ||
- Always-on flag <kbd>x</kbd> allows you to freely add whitespace and comments to your regexes. | ||
- Always-on flag <kbd>n</kbd> (*named capture only* mode) improves regex readability and efficiency. | ||
- Always-on flag <kbd>v</kbd> gives you the best level of Unicode support and strict errors. | ||
- New flags: | ||
- Always-on flag <kbd>x</kbd> allows you to freely add whitespace and comments to your regexes. | ||
- Always-on flag <kbd>n</kbd> (*named capture only* mode) improves regex readability and efficiency. | ||
- No unreadable escaped backslashes `\\\\` since it's a raw string template tag. | ||
- **New regex syntax**. | ||
- **Extended regex syntax**. | ||
- Atomic groups via `(?>…)` can dramatically improve performance and prevent ReDoS. | ||
- Subroutines via `\g<name>` enable powerful subpattern composition, improving readability and maintainability. | ||
- Definition groups via `(?(DEFINE)…)` allow defining subpatterns within them for use by reference only. | ||
- Subroutines via `\g<name>` enable powerful composition, improving readability and maintainability. | ||
- Subroutine definition groups via `(?(DEFINE)…)` allow groups within them to be used by reference only. | ||
- Recursive matching is enabled by an extension. | ||
@@ -69,3 +71,3 @@ - **Context-aware and safe interpolation** of regexes, strings, and partial patterns. | ||
// Definition group and subroutines | ||
// Subroutines and a subroutine definition group | ||
const record = regex` | ||
@@ -99,6 +101,6 @@ ^ Admitted:\ (?<admitted> \g<date>) \n | ||
// Adjusts numbered backreferences in interpolated regexes | ||
const double = /(\w)\1/; | ||
regex`^ ${double} ${double} $`; | ||
// → /^(\w)\1(\w)\2$/v | ||
// Numbered backreferences in interpolated regexes are adjusted | ||
const double = /(.)\1/; | ||
const re2 = regex`^ (?first>.) ${double} ${double} $`; | ||
// re2 → /^(?<first>.)(.)\2(.)\3$/v | ||
``` | ||
@@ -119,7 +121,19 @@ | ||
```html | ||
<script src="https://cdn.jsdelivr.net/npm/regex/dist/regex.min.js"></script> | ||
<script type="module"> | ||
import {regex, pattern} from 'https://cdn.jsdelivr.net/npm/regex@4.0.0/+esm'; | ||
// … | ||
</script> | ||
``` | ||
<details> | ||
<summary>Using a global name (no import)</summary> | ||
```html | ||
<script src="https://cdn.jsdelivr.net/npm/regex@4.0.0/dist/regex.min.js"></script> | ||
<script> | ||
const {regex, pattern} = Regex; | ||
// … | ||
</script> | ||
``` | ||
</details> | ||
@@ -136,3 +150,3 @@ ## ❓ Context | ||
3. Unicode mode with flag <kbd>u</kbd> adds strict errors (for unreserved letter escapes, octal escapes, escaped literal digits, and unescaped special characters in some contexts), switches to code-point-based matching (changing the potential handling of the dot, negated sets like `\W`, character class ranges, and quantifiers), changes flag <kbd>i</kbd> to apply Unicode case-folding, and adds support for new syntax. | ||
4. UnicodeSets mode with flag <kbd>v</kbd> (an upgrade to <kbd>u</kbd>) incompatibly changes escaping rules within character classes, fixes case-insensitive matching for doubly-negated `[^\P{…}]`, and adds support for new features/syntax. | ||
4. UnicodeSets mode with flag <kbd>v</kbd> (an upgrade to <kbd>u</kbd>) incompatibly changes escaping rules within character classes, fixes case-insensitive matching for `\p` and `\P` within negated `[^…]`, and adds support for new features/syntax. | ||
</details> | ||
@@ -144,5 +158,5 @@ | ||
## 🦾 New regex syntax | ||
## 🦾 Extended regex syntax | ||
Historically, JavaScript regexes were not as powerful or readable as other major regex flavors like PCRE, Perl, Java, .NET, and Python. With recent advancements and the `regex` package, those days are over. Modern JavaScript regexes have [significantly improved](https://github.com/slevithan/awesome-regex#javascript-regex-evolution) (adding lookbehind, named capture, Unicode properties, character class subtraction and intersection, etc.). The `regex` package, with its extended syntax and implicit flags, adds the key remaining pieces needed to stand alongside or surpass other major flavors. | ||
Historically, JavaScript regexes were not as powerful or readable as other major regex flavors like Java, .NET, PCRE, Perl, Python, and Ruby. With recent advancements and the `regex` library, those days are over. Modern JavaScript regexes have [significantly improved](https://github.com/slevithan/awesome-regex#javascript-regex-evolution) (adding lookbehind, named capture, Unicode properties, character class subtraction and intersection, etc.). The `regex` library, with its extended syntax and implicit flags, adds the key remaining pieces needed to stand alongside or surpass other major flavors. | ||
@@ -197,22 +211,42 @@ ### Atomic groups | ||
```js | ||
regex`(?<double> (?<char>.) \k<char> ) \g<double> \k<double>` | ||
// The backreference \k<double> matches whatever was matched by capturing group | ||
// `double`, regardless of what was matched by the subroutine. For example, the | ||
// regex matches 'xx!!xx' but not 'xx!!!!' | ||
regex` | ||
(?<double> (?<char>.)\k<char>) | ||
\g<double> | ||
\k<double> | ||
` | ||
``` | ||
You can also define subpatterns for use by reference only: | ||
The backreference `\k<double>` matches whatever was matched by capturing group `(?<double>…)`, regardless of what was matched in between by the subroutine `\g<double>`. For example, this regex matches `'xx!!xx'`, but not `'xx!!!!'`. | ||
<details> | ||
<summary>👉 <b>Show more details</b></summary> | ||
- Subroutines can appear before the groups they reference. | ||
- If there are [duplicate capture names](https://github.com/tc39/proposal-duplicate-named-capturing-groups), subroutines refer to the first instance of the given group (matching the behavior of PCRE and Perl). | ||
- Although subroutines can be chained to any depth, a descriptive error is thrown if they're used recursively. Support for recursion can be added via an extension (see [*Recursion*](#recursion)). | ||
- Like backreferences, subroutines can't be used *within* character classes. | ||
- As with all extended syntax in `regex`, subroutines are applied after interpolation, giving them maximal flexibility. | ||
</details> | ||
<details> | ||
<summary>👉 <b>Show how to define subpatterns for use by reference only</b></summary> | ||
The following regex matches an IPv4 address such as "192.168.12.123": | ||
```js | ||
// Matches an IPv4 address such as '192.168.12.123' | ||
regex`\b \g<byte> (\.\g<byte>){3} \b | ||
const ipv4 = regex` | ||
\b \g<byte> (\.\g<byte>){3} \b | ||
# The {0} quantifier allows defining a subpattern without matching it | ||
# Define the 'byte' subpattern | ||
(?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d ){0} | ||
` | ||
`; | ||
``` | ||
// Matches a record with several date fields, and captures each value | ||
regex` | ||
^ Born:\ (?<born> \g<date>) \n | ||
Admitted:\ (?<admitted> \g<date>) \n | ||
Above, the `{0}` quantifier at the end of the `(?<byte>…)` group allows *defining* the group without *matching* it at that position. The subpattern within it can then be used by reference elsewhere within the pattern. | ||
This next regex matches a record with multiple date fields, and captures each value: | ||
```js | ||
const record = regex` | ||
^ Admitted:\ (?<admitted> \g<date>) \n | ||
Released:\ (?<released> \g<date>) $ | ||
@@ -226,29 +260,23 @@ | ||
){0} | ||
` | ||
`; | ||
``` | ||
See the next section on definition groups for another way to do this. | ||
Here, the `{0}` quantifier at the end once again prevents matching its group at that position, while enabling all of the named groups within it to be used by reference. | ||
When using a regex to find matches (e.g. via the string `matchAll` method), named groups defined this way appear on each match's `groups` object, with the value `undefined` (which is the value for any capturing group that didn't participate in a match). See the next section [*Subroutine definition groups*](#subroutine-definition-groups) for a way to prevent such groups from appearing on the `groups` object. | ||
</details> | ||
> [!NOTE] | ||
> Subroutines are based on the feature in PCRE and Perl. PCRE allows several syntax options including `\g<name>`, whereas Perl uses `(?&name)`. Ruby also supports subroutines (and uses the `\g<name>` syntax), but it has behavior differences that make its subroutines not always act as independent subpatterns. | ||
<details> | ||
<summary>👉 <b>Show more details</b></summary> | ||
### Subroutine definition groups | ||
- Subroutines can appear before the groups they reference, as shown in examples above. | ||
- If there are [duplicate capture names](https://github.com/tc39/proposal-duplicate-named-capturing-groups), subroutines refer to the first instance of the given group (matching the behavior of PCRE and Perl). | ||
- Although subroutines can be chained to any depth, a descriptive error is thrown if they're used recursively. Support for recursion can be added via an extension (see [*Recursion*](#recursion)). | ||
- Like backreferences, subroutines can't be used from *within* character classes. | ||
- As with all new syntax in `regex`, subroutines are applied after interpolation, giving them maximal flexibility. | ||
</details> | ||
The syntax `(?(DEFINE)…)` can be used at the end of a regex to define subpatterns for use by reference only. When combined with [subroutines](#subroutines), this enables writing regexes in a grammatical way that can significantly improve readability and maintainability. | ||
### Definition groups | ||
> Named groups defined within subroutine definition groups don't appear on the `groups` object of matches. | ||
The syntax `(?(DEFINE)…)` can be used at the end of a regex to define subpatterns for use by reference only. Compared to the `(…){0}` syntax described in the preceding section on subroutines, definition groups have the advantage that the named groups within them don't appear on a match's `groups` object. | ||
Example: | ||
```js | ||
const record = 'Admitted: 2024-01-01\nReleased: 2024-01-02'; | ||
const match = regex` | ||
const re = regex` | ||
^ Admitted:\ (?<admitted> \g<date>) \n | ||
@@ -263,10 +291,15 @@ Released:\ (?<released> \g<date>) $ | ||
) | ||
`.exec(record); | ||
`; | ||
const record = 'Admitted: 2024-01-01\nReleased: 2024-01-03'; | ||
const match = re.exec(record); // Same as `record.match(re)` | ||
console.log(match.groups); | ||
// → {admitted: '2024-01-01', released: '2024-01-02'} | ||
/* → { | ||
admitted: '2024-01-01', | ||
released: '2024-01-03' | ||
} */ | ||
``` | ||
> [!NOTE] | ||
> Definition groups are based on the feature in PCRE and Perl. However, `regex` supports a stricter version of definition groups since it limits their placement, quantity, and the top-level syntax that can be used within them. | ||
> Subroutine definition groups are based on the feature in PCRE and Perl. However, `regex` supports a stricter version since it limits their placement, quantity, and the top-level syntax that can be used within them. | ||
@@ -276,6 +309,7 @@ <details> | ||
- Only one definition group is allowed per regex, and it must appear at the end of its pattern (trailing whitespace and comments are allowed by implicit flag <kbd>x</kbd>). | ||
- At the top level of definition groups, only named groups, whitespace, and comments are allowed. | ||
- Within definition groups, all named groups must use unique names, and all are excluded from the `groups` object of resulting matches. | ||
- The word `DEFINE` must appear in uppercase. | ||
- **Quantity:** Only one definition group is allowed per regex, but it can contain any number of named groups (and those groups can appear in any order). | ||
- **Placement:** Apart from trailing whitespace and comments (allowed by implicit flag <kbd>x</kbd>), definition groups must appear at the end of their pattern. | ||
- **Contents:** At the top level of definition groups, only named groups, whitespace, and comments are allowed. | ||
- **Duplicate names:** All named groups within definition groups must use unique names. | ||
- **Casing:** The word `DEFINE` must appear in uppercase. | ||
</details> | ||
@@ -285,3 +319,3 @@ | ||
You can use the `regex` extension package [`regex-recursion`](https://github.com/slevithan/regex-recursion) to match recursive patterns via `(?R)` and `\g<name>`, up to a specified max depth. | ||
You can use the `regex` extension package [regex-recursion](https://github.com/slevithan/regex-recursion) to match recursive patterns via `(?R)` and `\g<name>`, up to a specified max depth. | ||
@@ -302,14 +336,4 @@ ## 🚩 Flags | ||
<details> | ||
<summary>🐜 Debugging</summary> | ||
> For special situations such as when using `regex` within other tools, implicit flags can be disabled. See: [*Options*](#-options). | ||
For debugging purposes, you can disable implicit flags via experimental options: | ||
```js | ||
regex({__flagV: false, __flagX: false, __flagN: false})`…` | ||
``` | ||
However, disabling flag <kbd>n</kbd> also disables extended syntax. This is because flag <kbd>n</kbd>'s behavior is needed to emulate atomic groups and subroutines without side effects. | ||
</details> | ||
### Flag `v` | ||
@@ -409,3 +433,3 @@ | ||
- Regexes can't be interpolated inside character classes (so `` regex`[${/./}]` `` is an error) because the syntax context doesn't match. See [*Interpolating partial patterns*](#interpolating-partial-patterns) for a way to safely embed regex syntax (rather than `RegExp` instances) in character classes and other edge-case locations with different context. | ||
- To change the flags used by an interpolated regex, use the built-in capability of `RegExp` to copy a regex while providing new flags. Ex: `new RegExp(/./, 's')`. | ||
- To change the flags used by an interpolated regex, use the built-in capability of `RegExp` to copy a regex while providing new flags. E.g. `new RegExp(/./, 's')`. | ||
</details> | ||
@@ -575,3 +599,3 @@ | ||
> Implementation note: `pattern` returns an object with a custom `toString` that simply returns `String(value)`. So, if you wanted to, you could use it anywhere values are coerced to strings. | ||
> Implementation note: `pattern` returns an object with a custom `toString` that simply returns `String(value)`. | ||
@@ -631,9 +655,80 @@ ### Interpolation principles | ||
- *Sandboxed* means that the value can't change the meaning or error status of characters outside of the interpolation, and vice versa. | ||
- Character classes have a sub-context on the borders of ranges. Only one character node (ex: `a` or `\u0061`) can be interpolated at these positions. | ||
- Character classes have a sub-context on the borders of ranges. Only one character node (e.g. `a` or `\u0061`) can be interpolated at these positions. | ||
> The implementation details vary for how `regex` accomplishes sandboxing and atomization, based on the details of the specific pattern. But the concepts should always hold up. | ||
## 🔩 Options | ||
Typically, `regex` is used as follows: | ||
```js | ||
regex`…` // Without flags | ||
regex('gi')`…` // With flags | ||
``` | ||
However, several options are available that can be provided via an options object in place of the flags argument. These options aren't usually needed, and are primarily intended for use within other tools. | ||
Following are the available options and their default values: | ||
```js | ||
regex({ | ||
flags: '', | ||
subclass: false, | ||
plugins: [], | ||
unicodeSetsPlugin: <function> | ||
disable: { | ||
x: false, | ||
n: false, | ||
v: false, | ||
atomic: false, | ||
subroutines: false, | ||
}, | ||
force: { | ||
v: false, | ||
}, | ||
})`…`; | ||
``` | ||
<details> | ||
<summary>👉 <b>See details for each option</b></summary> | ||
**`flags`** - For providing flags when using an options object. | ||
**`subclass`** - When `true`, the resulting regex is constructed using a `RegExp` subclass that avoids edge case issues with numbered backreferences. Without subclassing, submatches referenced *by number* from outside of the regex (e.g. in replacement strings) might reference the wrong values, because `regex`'s emulation of extended syntax (atomic groups and subroutines) can add anonymous captures to generated regex source that might affect group numbering. | ||
Context: `regex`'s implicit flag <kbd>n</kbd> (*named capture only* mode) means that all captures have names, so normally there's no need to reference submatches by number. In fact, flag <kbd>n</kbd> *prevents* you from doing so within the regex. And even in edge cases (such as when interpolating `RegExp` instances with numbered backreferences, or when flag <kbd>n</kbd> is explicitly disabled), any numbered backreferences within the regex are automatically adjusted to work correctly. However, issues can arise if you reference submatches by number (instead of their group names) from outside of the regex. Setting `subclass: true` resolves this, since the subclass knows about added "emulation groups" and automatically adjusts match results in all contexts. | ||
> This option isn't enabled by default because it would prevent `regex`'s Babel plugin from emitting regex literals. It also has a small performance cost, and is rarely needed. The primary use case is tools that use `regex` internally with flag <kbd>n</kbd> disabled. | ||
**`plugins`** - An array of functions. Plugins are called in order, after applying emulated flags and interpolation, but before the built-in plugins for extended syntax. This means that plugins can output extended syntax like atomic groups and subroutines. Plugins are expected to return an updated pattern string, and are called with two arguments: | ||
1. The pattern, as processed so far by preceding plugins, etc. | ||
2. An object with a `flags` property that includes the native (non-emulated) flags that will be used by the regex. | ||
The final result after running all plugins is provided to the `RegExp` constructor. | ||
> The tiny [regex-utilities](https://github.com/slevithan/regex-utilities) library is intended for use in plugins, and can make it easier to work with regex syntax. | ||
**`unicodeSetsPlugin`** - A plugin function that's used when flag <kbd>v</kbd> isn't supported natively, or when implicit flag <kbd>v</kbd> is disabled. The default value is a built-in function that provides basic backward compatibility by applying flag <kbd>v</kbd>'s escaping rules and throwing on use of <kbd>v</kbd>-only syntax (nested character classes, set subtraction/intersection, etc.). | ||
> `regex` is not primarily a backward compatibility library, so in order to remain lightweight, it doesn't transpile flag <kbd>v</kbd>'s new features out of the box. By replacing the default function, you can add backward compatible support for these features. See also: [*Compatibility*](#-compatibility). | ||
> This plugin runs last, which means it's possible to wrap an existing library (e.g. [regexpu-core](https://github.com/mathiasbynens/regexpu-core), used by Babel to [transpile <kbd>v</kbd>](https://babel.dev/docs/babel-plugin-transform-unicode-sets-regex)), without the library needing to understand `regex`'s extended syntax. | ||
**`disable`** - A set of options that can be individually disabled by setting their values to `true`. | ||
- **`x`** - Disables implicit, emulated [flag <kbd>x</kbd>](#flag-x). | ||
- **`n`** - Disables implicit, emulated [flag <kbd>n</kbd>](#flag-n). Note that, although it's safe to use anonymous captures and numbered backreferences within a regex when flag <kbd>n</kbd> is disabled, referencing submatches by number from *outside* a regex (e.g. in replacement strings) can result in incorrect values because extended syntax (atomic groups and subroutines) might add "emulation groups" to generated regex source. It's therefore recommended to enable option `subclass` when disabling `n`. | ||
- **`v`** - Disables implicit [flag <kbd>v</kbd>](#flag-v) even when it's supported natively, resulting in flag <kbd>u</kbd> being added instead (in combination with the `unicodeSetsPlugin`). | ||
- **`atomic`** - Prevents transpiling [atomic groups](#atomic-groups), resulting in a syntax error if they're used. | ||
- **`subroutines`** - Prevents transpiling [subroutines](#subroutines) and [subroutine definition groups](#subroutine-definition-groups), resulting in a syntax error if they're used. | ||
**`force`** - Options that, if set to `true`, override default settings (as well as options set on the `disable` object). | ||
- **`v`** - Force the use of flag <kbd>v</kbd> even when it's not supported natively (resulting in an error). | ||
</details> | ||
## ⚡ Performance | ||
`regex` transpiles its input to native `RegExp` instances. Therefore regexes created by `regex` perform equally fast as native regular expressions. `regex` calls can also be transpiled via a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex), avoiding the tiny overhead of transpiling at runtime. | ||
`regex` transpiles its input to native `RegExp` instances. Therefore regexes created by `regex` perform equally as fast as native regular expressions. The use of `regex` can also be transpiled via a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex), avoiding the tiny overhead of transpiling at runtime. | ||
@@ -644,7 +739,7 @@ For regexes that rely on or have the potential to trigger heavy backtracking, you can dramatically improve beyond native performance via the [atomic groups](#atomic-groups) feature built into `regex`. | ||
`regex` uses flag <kbd>v</kbd> (`unicodeSets`) when it's supported natively. Flag <kbd>v</kbd> is supported by 2023-era browsers ([compat table](https://caniuse.com/mdn-javascript_builtins_regexp_unicodesets)) and Node.js 20. When <kbd>v</kbd> isn't available, flag <kbd>u</kbd> is automatically used instead (while still enforcing <kbd>v</kbd>'s rules), which extends support to Node.js 14 and 2020-era browsers (2017-era with a build step that transpiles private class fields, string `matchAll`, array `flatMap`, and the `??` and `?.` operators). | ||
`regex` uses flag <kbd>v</kbd> (`unicodeSets`) when it's supported natively. Flag <kbd>v</kbd> is supported by 2023-era browsers ([compat table](https://caniuse.com/mdn-javascript_builtins_regexp_unicodesets)) and Node.js 20. When <kbd>v</kbd> isn't available, flag <kbd>u</kbd> is automatically used instead (while still enforcing <kbd>v</kbd>'s escaping rules), which extends support to Node.js 14 and 2020-era browsers (2017-era with a build step that transpiles private class fields, string `matchAll`, array `flatMap`, and the `??` and `?.` operators). | ||
The following edge cases rely on modern JavaScript features: | ||
- To ensure atomization, `regex` uses nested character classes (which require native flag <kbd>v</kbd>) when interpolating more than one token at a time *inside character classes*. A descriptive error is thrown when this isn't supported, which you can avoid by not interpolating multi-token patterns or strings into character classes. | ||
- To ensure atomization, `regex` uses nested character classes (which require flag <kbd>v</kbd>) when interpolating more than one token at a time *inside character classes*. A descriptive error is thrown when this isn't supported, which you can avoid by not interpolating multi-token patterns or strings into character classes. | ||
- Using an interpolated `RegExp` instance with a different value for flag <kbd>i</kbd> than its outer regex relies on [regex modifiers](https://github.com/tc39/proposal-regexp-modifiers), a bleeding-edge feature available in Chrome/Edge 125 and Opera 111. A descriptive error is thrown in environments without support, which you can avoid by aligning the use of flag <kbd>i</kbd> on inner and outer regexes. Local-only application of other flags doesn't rely on this feature. | ||
@@ -654,27 +749,36 @@ | ||
<details> | ||
<details name="faq"> | ||
<summary><b>How are you comparing regex flavors?</b></summary> | ||
The claim that JavaScript with the `regex` package is among the best regex flavors is based on a holistic view. Following are some of the aspects considered: | ||
The claim that JavaScript with the `regex` library is among the best regex flavors is based on a holistic view. Following are some of the aspects considered: | ||
1. **Performance:** An important aspect, but not the main one since mature regex implementations are generally pretty fast. JavaScript is strong on regex performance (at least considering V8's Irregexp engine and JavaScriptCore), but it uses a backtracking engine that is missing any syntax for backtracking control—a major limitation that makes ReDoS vulnerability more common. The `regex` package adds atomic groups to native JavaScript regexes, which is a solution to this problem and therefore can dramatically improve performance. | ||
1. **Performance:** An important aspect, but not the main one since mature regex implementations are generally pretty fast. JavaScript is strong on regex performance (at least considering V8's Irregexp engine and JavaScriptCore), but it uses a backtracking engine that is missing any syntax for backtracking control—a major limitation that makes ReDoS vulnerability more common. The `regex` library adds atomic groups to native JavaScript regexes, which is a solution to this problem and therefore can dramatically improve performance. | ||
2. **Support for advanced features** that enable easily creating patterns for common or important use cases: Here, JavaScript stepped up its game with ES2018 and ES2024. JavaScript is now best in class for some features like lookbehind (with it's infinite-length support) and Unicode properties (with multicharacter "properties of strings", character class subtraction and intersection, and Script_Extensions). These features are either not supported or not as robust in many other flavors. | ||
3. **Ability to write readable and maintainable patterns:** Here, native JavaScript has long been the worst of the major flavors, since it lacks the `x` (extended) flag that allows insignificant whitespace and comments. The `regex` package not only adds `x` and turns it on by default, but it additionally adds regex subroutines (matched only by PCRE and Perl, although some other flavors have inferior versions) which enable powerful subpattern composition and reuse. And it includes context-aware interpolation of `RegExp` instances, escaped strings, and partial patterns, all of which can also help with composition and readability. | ||
3. **Ability to write readable and maintainable patterns:** Here, native JavaScript has long been the worst of the major flavors, since it lacks the `x` (extended) flag that allows insignificant whitespace and comments. The `regex` library not only adds `x` and turns it on by default, but it additionally adds regex subroutines (matched only by PCRE and Perl, although some other flavors have inferior versions) which enable powerful subpattern composition and reuse. And it includes context-aware interpolation of `RegExp` instances, escaped strings, and partial patterns, all of which can also help with composition and readability. | ||
</details> | ||
<details> | ||
<summary><b>Does <code>regex</code> support extensions?</b></summary> | ||
<details name="faq"> | ||
<summary><b>Can <code>regex</code> be called as a function instead of using it with backticks?</b></summary> | ||
Yes. There are two approaches for this: | ||
Yes, although you might not need to. If you want to use `regex` with dynamic input, you can interpolate a `pattern` call as the full expression. For example: | ||
1. **Alternative constructors:** If you want `regex` to use a `RegExp` subclass or other constructor, you can do so by modifying `this`: `` regex.bind(RegExpSubclass)`…` ``. The constructor is expected to accept two arguments (the pattern and flags) and return a `RegExp` instance. | ||
2. **Postprocessors:** `regex` can be called with an options object that includes an array of postprocessor functions. Ex: `` regex({flags: 'g', postprocessors: [myExtension]})`…` ``. Postprocessors are called in order after applying emulated flags and interpolation. They're called with two arguments (the pattern and flags) and are expected to return an updated pattern string. The final result is provided to the `RegExp` (or alternative) constructor. | ||
```js | ||
import {regex, pattern} from 'regex'; | ||
const str = '…'; | ||
const re = regex('gi')`${pattern(str)}`; | ||
``` | ||
You can make extensions easier to use by wrapping the use of these features in your own function or template tag. See extension [`regex-recursion`](https://github.com/slevithan/regex-recursion) for an example of using all of these features. For a much simpler example of a postprocessor, see `regex`'s built-in `rakePostprocessor`. | ||
If you prefer to call `regex` as a function (rather than using it as a template tag), that requires explicitly providing the raw template strings array, as follows: | ||
```js | ||
import {regex} from 'regex'; | ||
const str = '…'; | ||
const re = regex('gi')({raw: [str]}); | ||
``` | ||
</details> | ||
<details> | ||
<details name="faq"> | ||
<summary><b>Why are flags added via <code>regex('g')`…`</code> rather than <code>regex`/…/g`</code>?</b></summary> | ||
There are several disadvantages to the alternative syntax: | ||
The alternative syntax isn't used because it has several disadvantages: | ||
@@ -684,3 +788,3 @@ - It doesn't match the `RegExp` constructor's syntax. | ||
- Flags-up-front can be more readable, especially with long or multiline regexes that make flags easy to miss when they're at the end. And since some flags change the meaning of regex syntax, it can help to read them first. | ||
- It would most likely be incompatible with any future standardized regex template tag. To date, TC39 discussions about a standardized tag for regexes have not favored the `` `/…/g` `` format. | ||
- It would most likely be incompatible if a standardized regex template tag was added to the JavaScript language in the future. To date, TC39 discussions about a standardized tag for regexes have not favored the `` `/…/g` `` format. | ||
</details> | ||
@@ -690,5 +794,5 @@ | ||
`regex` was partly inspired by [XRegExp](https://github.com/slevithan/xregexp)'s `.tag` and [regexp-make-js](https://github.com/mikesamuel/regexp-make-js). `regex`'s only dependency is the ultra-lightweight [`regex-utilities`](https://github.com/slevithan/regex-utilities), which was separated so it can be reused by `regex` extensions. | ||
`regex` was partly inspired by [XRegExp](https://github.com/slevithan/xregexp)'s `.tag` and [regexp-make-js](https://github.com/mikesamuel/regexp-make-js). `regex`'s only dependency is the ultra-lightweight [regex-utilities](https://github.com/slevithan/regex-utilities), which was separated so it can be reused by `regex` plugins. | ||
Crafted by Steven Levithan with ❤︎ for regular expressions and their enthusiasts.<br> | ||
MIT License. |
/** | ||
@typedef {import('./regex.js').PluginData} PluginData | ||
*/ | ||
/** | ||
@param {string} expression | ||
@param {PluginData} data | ||
@returns {string} | ||
*/ | ||
export function atomicGroupsPostprocessor(expression: string): string; | ||
export function atomicPlugin(expression: string, data: PluginData): string; | ||
export type PluginData = import("./regex.js").PluginData; |
@@ -5,5 +5,4 @@ /** | ||
@param {string} expression | ||
@param {string} flags | ||
@returns {string} | ||
*/ | ||
export function backcompatPostprocessor(expression: string, flags: string): string; | ||
export function backcompatPlugin(expression: string): string; |
@@ -1,4 +0,5 @@ | ||
export function flagNPreprocessor(value: any, runningContext: any): { | ||
export function flagNPreprocessor(value: import("./utils.js").InterpolatedValue, runningContext: import("./utils.js").RunningContext): { | ||
transformed: string; | ||
runningContext: any; | ||
runningContext: import("./utils.js").RunningContext; | ||
}; | ||
export type Preprocessor = import("./utils.js").Preprocessor; |
@@ -1,5 +0,11 @@ | ||
export function flagXPreprocessor(value: any, runningContext: any): { | ||
export function flagXPreprocessor(value: import("./utils.js").InterpolatedValue, runningContext: import("./utils.js").RunningContext): { | ||
transformed: string; | ||
runningContext: any; | ||
runningContext: import("./utils.js").RunningContext; | ||
}; | ||
export function rakePostprocessor(expression: any): any; | ||
/** | ||
Remove `(?:)` token separators (most likely added by flag x) in cases where it's safe to do so. | ||
@param {string} expression | ||
@returns {string} | ||
*/ | ||
export function cleanPlugin(expression: string): string; | ||
export type Preprocessor = import("./utils.js").Preprocessor; |
@@ -10,3 +10,3 @@ /** | ||
@overload | ||
@param {string} value | ||
@param {string | number} value | ||
@returns {Pattern} | ||
@@ -19,3 +19,3 @@ | ||
*/ | ||
export function pattern(value: string): Pattern; | ||
export function pattern(value: string | number): Pattern; | ||
/** | ||
@@ -30,3 +30,3 @@ Returns a value that can be interpolated into a `regex` template string without having its special | ||
@overload | ||
@param {string} value | ||
@param {string | number} value | ||
@returns {Pattern} | ||
@@ -41,11 +41,7 @@ | ||
export class Pattern { | ||
/** | ||
@param {string} value | ||
*/ | ||
/** @param {string} value */ | ||
constructor(value: string); | ||
/** | ||
@returns {string} | ||
*/ | ||
/** @returns {string} */ | ||
toString(): string; | ||
#private; | ||
} |
@@ -0,84 +1,97 @@ | ||
export type InterpolatedValue = string | RegExp | Pattern | number; | ||
export type PluginData = { | ||
flags: string; | ||
useEmulationGroups: boolean; | ||
}; | ||
export type RawTemplate = TemplateStringsArray | { | ||
raw: Array<string>; | ||
}; | ||
export type RegexTagOptions = { | ||
flags?: string; | ||
postprocessors?: Array<(expression: string, flags: string) => string>; | ||
__extendSyntax?: boolean; | ||
__flagN?: boolean; | ||
__flagV?: boolean; | ||
__flagX?: boolean; | ||
__rake?: boolean; | ||
subclass?: boolean; | ||
plugins?: Array<(expression: string, data: PluginData) => string>; | ||
unicodeSetsPlugin?: ((expression: string, data: PluginData) => string) | null; | ||
disable?: { | ||
x?: boolean; | ||
n?: boolean; | ||
v?: boolean; | ||
atomic?: boolean; | ||
subroutines?: boolean; | ||
}; | ||
force?: { | ||
v?: boolean; | ||
}; | ||
}; | ||
export type RegexTag<T> = { | ||
(template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T; | ||
(flags?: string): RegexTag<T>; | ||
(options: RegexTagOptions & { | ||
subclass?: false; | ||
}): RegexTag<T>; | ||
(options: RegexTagOptions & { | ||
subclass: true; | ||
}): RegexTag<WrappedRegex>; | ||
}; | ||
export type RegexFromTemplate<T> = { | ||
(options: RegexTagOptions, template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T; | ||
}; | ||
export type EmulationGroupSlots = Array<number | null>; | ||
/** | ||
Template tag for constructing a regex with advanced features and context-aware interpolation of | ||
regexes, strings, and patterns. | ||
Can be called in multiple ways: | ||
1. `` regex`…` `` - Regex pattern as a raw string. | ||
2. `` regex('gi')`…` `` - To specify flags. | ||
3. `` regex({flags: 'gi'})`…` `` - With options. | ||
4. `` regex.bind(RegExpSubclass)`…` `` - With a `this` that specifies a different constructor. | ||
@overload | ||
@param {TemplateStringsArray} template | ||
@param {...(string | RegExp | Pattern)} substitutions | ||
@returns {RegExp} | ||
@overload | ||
@param {string} [flags] | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@overload | ||
@param {RegexTagOptions} options | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@typedef {string | RegExp | Pattern | number} InterpolatedValue | ||
@typedef {{flags: string; useEmulationGroups: boolean;}} PluginData | ||
@typedef {TemplateStringsArray | {raw: Array<string>}} RawTemplate | ||
@typedef {{ | ||
flags?: string; | ||
subclass?: boolean; | ||
plugins?: Array<(expression: string, data: PluginData) => string>; | ||
unicodeSetsPlugin?: ((expression: string, data: PluginData) => string) | null; | ||
disable?: { | ||
x?: boolean; | ||
n?: boolean; | ||
v?: boolean; | ||
atomic?: boolean; | ||
subroutines?: boolean; | ||
}; | ||
force?: { | ||
v?: boolean; | ||
}; | ||
}} RegexTagOptions | ||
*/ | ||
export function regex(template: TemplateStringsArray, ...substitutions: (string | RegExp | Pattern)[]): RegExp; | ||
/** | ||
Template tag for constructing a regex with advanced features and context-aware interpolation of | ||
regexes, strings, and patterns. | ||
Can be called in multiple ways: | ||
1. `` regex`…` `` - Regex pattern as a raw string. | ||
2. `` regex('gi')`…` `` - To specify flags. | ||
3. `` regex({flags: 'gi'})`…` `` - With options. | ||
4. `` regex.bind(RegExpSubclass)`…` `` - With a `this` that specifies a different constructor. | ||
@overload | ||
@param {TemplateStringsArray} template | ||
@param {...(string | RegExp | Pattern)} substitutions | ||
@returns {RegExp} | ||
@overload | ||
@param {string} [flags] | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@overload | ||
@param {RegexTagOptions} options | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@template T | ||
@typedef RegexTag | ||
@type {{ | ||
(template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T; | ||
(flags?: string): RegexTag<T>; | ||
(options: RegexTagOptions & {subclass?: false}): RegexTag<T>; | ||
(options: RegexTagOptions & {subclass: true}): RegexTag<WrappedRegex>; | ||
}} | ||
*/ | ||
export function regex(flags?: string): (template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp; | ||
/** | ||
Template tag for constructing a regex with advanced features and context-aware interpolation of | ||
Template tag for constructing a regex with extended syntax and context-aware interpolation of | ||
regexes, strings, and patterns. | ||
Can be called in multiple ways: | ||
Can be called in several ways: | ||
1. `` regex`…` `` - Regex pattern as a raw string. | ||
2. `` regex('gi')`…` `` - To specify flags. | ||
3. `` regex({flags: 'gi'})`…` `` - With options. | ||
4. `` regex.bind(RegExpSubclass)`…` `` - With a `this` that specifies a different constructor. | ||
@overload | ||
@param {TemplateStringsArray} template | ||
@param {...(string | RegExp | Pattern)} substitutions | ||
@returns {RegExp} | ||
@overload | ||
@param {string} [flags] | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@overload | ||
@param {RegexTagOptions} options | ||
@returns {(template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp} | ||
@type {RegexTag<RegExp>} | ||
*/ | ||
export function regex(options: RegexTagOptions): (template: TemplateStringsArray, ...substitutions: Array<string | RegExp | Pattern>) => RegExp; | ||
export const regex: RegexTag<RegExp>; | ||
import { pattern } from './pattern.js'; | ||
import { Pattern } from './pattern.js'; | ||
/** | ||
@typedef {Array<number | null>} EmulationGroupSlots | ||
*/ | ||
declare class WrappedRegex extends RegExp { | ||
/** | ||
@param {string | WrappedRegex} expression | ||
@param {string} [flags] | ||
@param {{captureNums: EmulationGroupSlots;}} [data] | ||
*/ | ||
constructor(expression: string | WrappedRegex, flags?: string, data?: { | ||
captureNums: EmulationGroupSlots; | ||
}); | ||
#private; | ||
} | ||
export { pattern }; |
/** | ||
@typedef {import('./regex.js').PluginData} PluginData | ||
*/ | ||
/** | ||
@param {string} expression | ||
@param {PluginData} data | ||
@returns {string} | ||
*/ | ||
export function subroutinesPostprocessor(expression: string): string; | ||
export function subroutinesPlugin(expression: string, data: PluginData): string; | ||
export type PluginData = import("./regex.js").PluginData; | ||
export type NamedCapturingGroupsMap = Map<string, { | ||
@@ -7,0 +12,0 @@ isUnique: boolean; |
@@ -18,14 +18,19 @@ /** | ||
export function getBreakoutChar(expression: any, regexContext: any, charClassContext: any): any; | ||
export function getEndContextForIncompleteExpression(incompleteExpression: any, { regexContext, charClassContext, charClassDepth, lastPos, }: { | ||
regexContext?: string; | ||
charClassContext?: string; | ||
charClassDepth?: number; | ||
lastPos?: number; | ||
}): { | ||
regexContext: string; | ||
charClassContext: string; | ||
charClassDepth: number; | ||
lastPos: any; | ||
}; | ||
/** | ||
@typedef {{ | ||
regexContext: string; | ||
charClassContext: string; | ||
charClassDepth: number; | ||
lastPos: number; | ||
}} RunningContext | ||
*/ | ||
/** | ||
Accepts and returns its full state so it doesn't have to reprocess parts that have already been | ||
seen. Assumes flag v and doesn't worry about syntax errors that are caught by it. | ||
@param {string} incompleteExpression | ||
@param {Partial<RunningContext>} [runningContext] | ||
@returns {RunningContext} | ||
*/ | ||
export function getEndContextForIncompleteExpression(incompleteExpression: string, { regexContext, charClassContext, charClassDepth, lastPos, }?: Partial<RunningContext>): RunningContext; | ||
/** | ||
@param {string} expression | ||
@@ -43,15 +48,20 @@ @returns {number} | ||
/** | ||
@typedef {import('./regex.js').InterpolatedValue} InterpolatedValue | ||
@typedef {import('./regex.js').RawTemplate} RawTemplate | ||
@typedef {(value: InterpolatedValue, runningContext: RunningContext) => { | ||
transformed: string; | ||
runningContext: RunningContext; | ||
}} Preprocessor | ||
*/ | ||
/** | ||
Returns transformed versions of a template and substitutions, using the given preprocessor. Only | ||
processes substitutions that are instanceof `Pattern`. | ||
@param {TemplateStringsArray} template | ||
@param {Array<string | RegExp | Pattern>} substitutions | ||
@param {(value, runningContext) => {transformed: string; runningContext: Object}} preprocessor | ||
@returns {{template: TemplateStringsArray; substitutions: Array<string | RegExp | Pattern>}} | ||
@param {RawTemplate} template | ||
@param {ReadonlyArray<InterpolatedValue>} substitutions | ||
@param {Preprocessor} preprocessor | ||
@returns {{template: RawTemplate; substitutions: ReadonlyArray<InterpolatedValue>;}} | ||
*/ | ||
export function preprocess(template: TemplateStringsArray, substitutions: Array<string | RegExp | Pattern>, preprocessor: (value: any, runningContext: any) => { | ||
transformed: string; | ||
runningContext: any; | ||
}): { | ||
template: TemplateStringsArray; | ||
substitutions: Array<string | RegExp | Pattern>; | ||
export function preprocess(template: RawTemplate, substitutions: ReadonlyArray<InterpolatedValue>, preprocessor: Preprocessor): { | ||
template: RawTemplate; | ||
substitutions: ReadonlyArray<InterpolatedValue>; | ||
}; | ||
@@ -79,5 +89,17 @@ export namespace RegexContext { | ||
export const doublePunctuatorChars: "&!#$%*+,.:;<=>?@^`~"; | ||
export const emulationGroupMarker: "$E$"; | ||
export const namedCapturingDelim: any; | ||
export const capturingDelim: any; | ||
export const noncapturingDelim: any; | ||
import { Pattern } from './pattern.js'; | ||
export type RunningContext = { | ||
regexContext: string; | ||
charClassContext: string; | ||
charClassDepth: number; | ||
lastPos: number; | ||
}; | ||
export type InterpolatedValue = import("./regex.js").InterpolatedValue; | ||
export type RawTemplate = import("./regex.js").RawTemplate; | ||
export type Preprocessor = (value: InterpolatedValue, runningContext: RunningContext) => { | ||
transformed: string; | ||
runningContext: RunningContext; | ||
}; |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
777
0
109968
5
13
1443