regex-recursion
Advanced tools
Comparing version 3.1.0 to 4.0.0
@@ -1,60 +0,1 @@ | ||
var Regex;(Regex||={}).ext=(()=>{var y=Object.defineProperty;var me=Object.getOwnPropertyDescriptor;var Ne=Object.getOwnPropertyNames;var Ae=Object.prototype.hasOwnProperty;var Te=(e,t)=>{for(var n in t)y(e,n,{get:t[n],enumerable:!0})},Se=(e,t,n,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of Ne(t))!Ae.call(e,o)&&o!==n&&y(e,o,{get:()=>t[o],enumerable:!(r=me(t,o))||r.enumerable});return e};var _e=e=>Se(y({},"__esModule",{value:!0}),e);var Xe={};Te(Xe,{recursion:()=>we,rregex:()=>Je});var w=Object.freeze({DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"});function T(e,t,n,r){let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu"),i=0,s="";for(let a of e.matchAll(o)){let{0:u,groups:{skip:l}}=a;if(!l&&(!r||r===w.DEFAULT==!i)){n instanceof Function?s+=n(a):s+=n;continue}u==="["?i++:u==="]"&&i&&i--,s+=u}return s}function H(e,t,n,r){T(e,t,n,r)}function x(e,t,n=0,r){if(!new RegExp(t,"su").test(e))return null;let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu");o.lastIndex=n;let i=0,s;for(;s=o.exec(e);){let{0:a,groups:{skip:u}}=s;if(!u&&(!r||r===w.DEFAULT==!i))return s;a==="["?i++:a==="]"&&i&&i--,o.lastIndex==s.index&&o.lastIndex++}return null}function P(e,t,n){return!!x(e,t,0,n)}function oe(e,t){let n=/\\?./gsu;n.lastIndex=t;let r=e.length,o=0,i=1,s;for(;s=n.exec(e);){let[a]=s;if(a==="[")o++;else if(o)a==="]"&&o--;else if(a==="(")i++;else if(a===")"&&(i--,!i)){r=s.index;break}}return e.slice(t,r)}var U=class{#e;constructor(e){this.#e=e}toString(){return String(this.#e)}};function Le(e,...t){if(Array.isArray(e?.raw))return new U(e.raw.flatMap((n,r)=>r<e.raw.length-1?[n,t[r]]:n).join(""));if(!t.length)return new U(e??"");throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var E={DEFAULT:"R_DEFAULT",CHAR_CLASS:"R_CHAR_CLASS",GROUP_NAME:"R_GROUP_NAME",ENCLOSED_TOKEN:"R_ENCLOSED_TOKEN",INTERVAL_QUANTIFIER:"R_INTERVAL_QUANTIFIER",INVALID_INCOMPLETE_TOKEN:"R_INVALID_INCOMPLETE_TOKEN"},g={DEFAULT:"CC_DEFAULT",RANGE:"CC_RANGE",ENCLOSED_TOKEN:"CC_ENCLOSED_TOKEN",Q_TOKEN:"CC_Q_TOKEN",INVALID_INCOMPLETE_TOKEN:"CC_INVALID_INCOMPLETE_TOKEN"},b=(()=>{try{new RegExp("(?i:)")}catch{return!1}return!0})(),Ie=(()=>{try{new RegExp("","v")}catch{return!1}return!0})(),v="&!#$%*+,.:;<=>?@^`~",Q=String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`,q=String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${Q}`,W=String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;function ie(e,t){return t===w.CHAR_CLASS?e.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${v}]`,"g"),"\\$&"):e.replace(/[()\[\]{}|\\^$*+?.]/g,"\\$&")}function ae(e){return e.replace(new RegExp(`^([${v}])(?!\\1)`),(t,n,r)=>`\\${t}${r+1===e.length?"":t}`)}function $e(e){return e.replace(/^\^/,"\\^^")}function z(e,t){return T(e,String.raw`\\0(?!\d)`,"\\u{0}",t)}function X(e,t,n){let r=0;for(let[o]of e.matchAll(new RegExp(`[${ie(t+n,w.CHAR_CLASS)}]`,"g")))if(r+=o===t?1:-1,r<0)return n;return r>0?t:""}function Re(e,t,n){let r=e.replace(/\\./gsu,"");if(r.endsWith("\\"))return"\\";if(t===E.DEFAULT)return X(r,"(",")");if(t===E.CHAR_CLASS&&!(n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN))return X(r,"[","]");if(t===E.ENCLOSED_TOKEN||t===E.INTERVAL_QUANTIFIER||n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN){if(r.includes("}"))return"}"}else if(t===E.GROUP_NAME&&r.includes(">"))return">";return""}var Y=new RegExp(String.raw` | ||
(?<groupN>\(\?<(?![=!])|\\[gk]<) | ||
| (?<enclosedT>\\[pPu]\{) | ||
| (?<qT>\\q\{) | ||
| (?<intervalQ>\{) | ||
| (?<incompleteT>\\(?: $ | ||
| c(?![A-Za-z]) | ||
| u(?![A-Fa-f\d]{4})[A-Fa-f\d]{0,3} | ||
| x(?![A-Fa-f\d]{2})[A-Fa-f\d]? | ||
) | ||
) | ||
| -- | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function B(e,{regexContext:t=E.DEFAULT,charClassContext:n=g.DEFAULT,charClassDepth:r=0,lastPos:o=0}){Y.lastIndex=o;let i;for(;i=Y.exec(e);){let{0:s,groups:{groupN:a,enclosedT:u,qT:l,intervalQ:p,incompleteT:c}}=i;s==="["?(r++,t=E.CHAR_CLASS,n=g.DEFAULT):s==="]"&&t===E.CHAR_CLASS?(r&&r--,r||(t=E.DEFAULT),n=g.DEFAULT):t===E.CHAR_CLASS?c?n=g.INVALID_INCOMPLETE_TOKEN:s==="-"?n=g.RANGE:u?n=g.ENCLOSED_TOKEN:l?n=g.Q_TOKEN:(s==="}"&&(n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN)||n===g.INVALID_INCOMPLETE_TOKEN||n===g.RANGE)&&(n=g.DEFAULT):c?t=E.INVALID_INCOMPLETE_TOKEN:a?t=E.GROUP_NAME:u?t=E.ENCLOSED_TOKEN:p?t=E.INTERVAL_QUANTIFIER:(s===">"&&t===E.GROUP_NAME||s==="}"&&(t===E.ENCLOSED_TOKEN||t===E.INTERVAL_QUANTIFIER)||t===E.INVALID_INCOMPLETE_TOKEN)&&(t=E.DEFAULT)}return{regexContext:t,charClassContext:n,charClassDepth:r,lastPos:e.length}}function k(e){let t=0;return H(e,q,()=>t++,w.DEFAULT),t}function De(e,t){return T(e,String.raw`\\(?<num>[1-9]\d*)`,({groups:{num:n}})=>`\\${+n+t}`,w.DEFAULT)}var Oe=["Basic_Emoji","Emoji_Keycap_Sequence","RGI_Emoji_Modifier_Sequence","RGI_Emoji_Flag_Sequence","RGI_Emoji_Tag_Sequence","RGI_Emoji_ZWJ_Sequence","RGI_Emoji"].join("|"),Ce=new RegExp(String.raw` | ||
\\(?: c[A-Za-z] | ||
| p\{(?<pStrProp>${Oe})\} | ||
| [pP]\{[^\}]+\} | ||
| (?<qStrProp>q) | ||
| u(?:[A-Fa-f\d]{4}|\{[A-Fa-f\d]+\}) | ||
| x[A-Fa-f\d]{2} | ||
| . | ||
) | ||
| -- | ||
| && | ||
| . | ||
`.replace(/\s+/g,""),"gsu");function ee(e){let t=!1,n;for(let{0:r,groups:o}of e.matchAll(Ce)){if(o.pStrProp||o.qStrProp||r==="["&&t)return!0;if(["-","--","&&"].includes(r))t=!1;else if(!["[","]"].includes(r)){if(t||n==="]")return!0;t=!0}n=r}return!1}function te(e,t,n){let r={raw:[]},o=[],i={};return e.raw.forEach((s,a)=>{let u=n(s,{...i,lastPos:0});if(r.raw.push(u.transformed),i=u.runningContext,a<e.raw.length-1){let l=t[a];if(l instanceof U){let p=n(l,{...i,lastPos:0});o.push(Le(p.transformed)),i=p.runningContext}else o.push(l)}}),{template:r,substitutions:o}}var Fe=new RegExp(String.raw` | ||
${W} | ||
| \(\?< | ||
| (?<backrefNum>\\[1-9]\d*) | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function Ue(e,t){e=String(e);let n="",r="";for(let{0:o,groups:{backrefNum:i}}of e.matchAll(Fe)){n+=o,t=B(n,t);let{regexContext:s}=t;if(s===E.DEFAULT)if(o==="(")r+="(?:";else{if(i)throw new Error(`Invalid decimal escape "${o}" with implicit flag n; replace with named backreference`);r+=o}else r+=o}return{transformed:r,runningContext:t}}var ne=/^\s$/,be=/^\\[\s#]$/,re=/^[ \t]$/,ke=/^\\[ \t]$/,Pe=new RegExp(String.raw` | ||
\\(?: [gk]< | ||
| [pPu]\{ | ||
| c[A-Za-z] | ||
| u[A-Fa-f\d]{4} | ||
| x[A-Fa-f\d]{2} | ||
| 0\d+ | ||
) | ||
| \[\^ | ||
| ${W} | ||
| \(\?< | ||
| (?<dp>[${v}])\k<dp> | ||
| -- | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function ve(e,t){e=String(e);let n=!1,r=!1,o=!1,i="",s="",a="",u="",l=!1,p=(c,{prefix:f=!0,postfix:d=!1}={})=>(c=(l&&f?"(?:)":"")+c+(d?"(?:)":""),l=!1,c);for(let[c]of e.matchAll(Pe)){if(o){c===` | ||
`&&(o=!1,l=!0);continue}if(n){if(ne.test(c))continue;n=!1,l=!0}else if(r){if(re.test(c))continue;r=!1}i+=c,t=B(i,t);let{regexContext:f,charClassContext:d}=t;if(c==="-"&&f===E.CHAR_CLASS&&u===g.RANGE)throw new Error("Invalid unescaped hyphen as the end value for a range");if(f===E.DEFAULT&&/^(?:[?*+]|\?\?)$/.test(c)||f===E.INTERVAL_QUANTIFIER&&c==="{")s+=p(c,{prefix:!1,postfix:a==="("&&c==="?"});else if(f===E.DEFAULT)ne.test(c)?n=!0:c.startsWith("#")?o=!0:be.test(c)?s+=p(c[1],{prefix:!1}):s+=p(c);else if(f===E.CHAR_CLASS&&c!=="["&&c!=="[^")if(re.test(c)&&(d===g.DEFAULT||d===g.RANGE||d===g.Q_TOKEN))r=!0;else{if(d===g.INVALID_INCOMPLETE_TOKEN)throw new Error(`Invalid incomplete token in character class: "${c}"`);ke.test(c)&&(d===g.DEFAULT||d===g.Q_TOKEN)?s+=p(c[1],{prefix:!1}):d===g.DEFAULT?s+=p(ae(z(c))):s+=p(c)}else s+=p(c);n||r||o||(a=c,u=d)}return{transformed:s,runningContext:t}}function Ge(e){let t=String.raw`\(\?:\)`;return e=T(e,`(?:${t}){2,}`,"(?:)",w.DEFAULT),e=T(e,String.raw`${t}(?=[)|.[$\\]|\((?!DEFINE)|$)|(?<=[()|.\]^>]|\\[bBdDfnrsStvwW]|\(\?(?:[:=!]|<[=!])|^)${t}(?![?*+{])`,"",w.DEFAULT),e}function ye(e){if(!P(e,"\\(\\?>",w.DEFAULT))return e;let t=new RegExp(String.raw`(?<noncapturingStart>${W})|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`,"gsu"),n="(?>",r="(?:(?=(",o=0,i=0,s=NaN,a;do{a=!1;let u=0,l=0,p=!1,c;for(t.lastIndex=Number.isNaN(s)?0:s+r.length;c=t.exec(e);){let{0:f,index:d,groups:{backrefNum:h,capturingStart:A,noncapturingStart:N}}=c;if(f==="[")u++;else if(u)f==="]"&&u--;else if(f===n&&!p)s=d,p=!0;else if(p&&N)l++;else if(A)p&&l++,o++;else if(f===")"&&p){if(!l){i++,e=`${e.slice(0,s)}${r}${e.slice(s+n.length,d)}))\\k<$$${i+o}>)${e.slice(d+1)}`,a=!0;break}l--}else if(h)throw new Error(`Invalid decimal escape "${f}" in interpolated regex; cannot be used with atomic group`)}}while(a);return e=T(e,String.raw`\\k<\$\$(?<backrefNum>\d+)>`,({groups:{backrefNum:u}})=>`\\${u}`,w.DEFAULT),e}function Ke(e){let t=ue(e,{includeContents:!0}),n=Ve(e,t);return je(n,t)}var Me=String.raw`\\g<(?<subroutineName>[^>&]+)>`,F=new RegExp(String.raw` | ||
${Me} | ||
| (?<capturingStart>${q}) | ||
| \\(?<backrefNum>[1-9]\d*) | ||
| \\k<(?<backrefName>[^>]+)> | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function Ve(e,t){if(!P(e,"\\\\g<",w.DEFAULT))return e;let n=P(e,"\\\\(?:[1-9]|k<[^>]+>)",w.DEFAULT),r=n?"(":"(?:",o=new Map,i=[],s=[0],a=0,u=0,l=0,p=0,c=0,f=e,d;for(F.lastIndex=0;d=F.exec(f);){let{0:h,index:A,groups:{subroutineName:N,capturingStart:O,backrefNum:L,backrefName:R}}=d;if(h==="[")c++;else if(c)h==="]"&&c--;else if(N){if(!t.has(N))throw new Error(`Invalid named capture referenced by subroutine ${h}`);if(o.has(N))throw new Error(`Subroutine ${h} followed a recursive reference`);let m=t.get(N).contents,I=`${r}${m})`;n&&(l=0,u++),o.set(N,{unclosedGroupCount:He(I)}),i.push(N),f=V(f,A,h,I),F.lastIndex-=h.length-r.length}else if(O)o.size?(n&&(l++,u++),h!=="("&&(f=V(f,A,h,r),F.lastIndex-=h.length-r.length)):n&&(s.push(M(s)+1+u-p),p=u,a++);else if((L||R)&&o.size){let m=L?+L:t.get(R)?.groupNum,I=!1;for(let _ of i){let $=t.get(_);if(m>=$.groupNum&&m<=$.groupNum+$.numCaptures){I=!0;break}}if(I){let _=t.get(M(i)),$=a+u-l,C=`\\k<$$b${m}s${$}r${_.groupNum}c${_.numCaptures}>`;f=V(f,A,h,C),F.lastIndex+=C.length-h.length}}else if(h===")"&&o.size){let m=o.get(M(i));m.unclosedGroupCount--,m.unclosedGroupCount||o.delete(i.pop())}}return n&&(f=T(f,String.raw`\\(?:(?<bNum>[1-9]\d*)|k<\$\$b(?<bNumSub>\d+)s(?<subNum>\d+)r(?<refNum>\d+)c(?<refCaps>\d+)>)`,({0:h,groups:{bNum:A,bNumSub:N,subNum:O,refNum:L,refCaps:R}})=>{if(A){let C=+A;if(C>s.length-1)throw new Error(`Backref "${h}" greater than number of captures`);return`\\${s[C]}`}let m=+N,I=+O,_=+L,$=+R;return m<_||m>_+$?`\\${s[m]}`:`\\${I-_+m}`},w.DEFAULT)),f}var K=new RegExp(String.raw`${Q}|\(\?:\)|(?<invalid>\\?.)`,"gsu");function je(e,t){let n=x(e,String.raw`\(\?\(DEFINE\)`,0,w.DEFAULT);if(!n)return e;let r=se(e,n);if(r.afterPos<e.length)throw new Error("DEFINE group allowed only at the end of a regex");if(r.afterPos>e.length)throw new Error("DEFINE group is unclosed");let o;for(K.lastIndex=0;o=K.exec(r.contents);){let{captureName:i,invalid:s}=o.groups;if(i){let a=se(r.contents,o),u;if(!t.get(i).isUnique)u=i;else{let l=ue(a.contents);for(let p of l.keys())if(!t.get(p).isUnique){u=p;break}}if(u)throw new Error(`Duplicate group name "${u}" within DEFINE`);K.lastIndex=a.afterPos}else if(s)throw new Error("DEFINE group includes unsupported syntax at top level")}return e.slice(0,n.index)}function He(e){let t=0;return H(e,"\\(",()=>t++,w.DEFAULT),t}function xe(e,t){let n=0,r=0,o;for(;o=x(e,q,r,w.DEFAULT);){let{0:i,index:s,groups:{captureName:a}}=o;if(n++,a===t)break;r=s+i.length}return n}function se(e,t){let n=t.index+t[0].length,r=oe(e,n),o=n+r.length+1;return{contents:r,afterPos:o}}function ue(e,{includeContents:t}={}){let n=new Map;return H(e,Q,({0:r,index:o,groups:{captureName:i}})=>{if(n.has(i))n.get(i).isUnique=!1;else{let s={isUnique:!0};if(t){let a=oe(e,o+r.length);Object.assign(s,{contents:a,groupNum:xe(e,i),numCaptures:k(a)})}n.set(i,s)}},w.DEFAULT),n}function M(e){return e[e.length-1]}function V(e,t,n,r){return e.slice(0,t)+r+e.slice(t+n.length)}var Qe="&!#%,:;<=>@`~",qe=new RegExp(String.raw` | ||
\[\^?-? | ||
| --?\] | ||
| (?<dp>[${v}])\k<dp> | ||
| -- | ||
| \\(?<vOnlyEscape>[${Qe}]) | ||
| \\[pPu]\{[^}]+\} | ||
| \\?. | ||
`.replace(/\s+/g,""),"gsu");function We(e,t){let n='Invalid unescaped "-" in character class',r=!1,o=!1,i="";for(let{0:s,groups:{dp:a,vOnlyEscape:u}}of e.matchAll(qe)){if(s[0]==="["){if(r)throw new Error("Invalid nested character class when flag v not supported; possibly from interpolation");if(s.endsWith("-"))throw new Error(n);r=!0,o=s[1]==="^"}else if(s.endsWith("]")){if(s[0]==="-")throw new Error(n);r=o=!1}else if(r){if(s==="&&"||s==="--")throw new Error(`Invalid set operator "${s}" when flag v not supported`);if(a)throw new Error(`Invalid double punctuator "${s}", reserved by flag v`);if("(){}/|".includes(s))throw new Error(`Invalid unescaped "${s}" in character class`);if(o&&s.startsWith("\\P")&&t.includes("i"))throw new Error("Negated \\P in negated character class with flag i works differently with flag v");if(u){i+=u;continue}}i+=s}return i}function Z(e,...t){let n=this instanceof Function?this:RegExp;if(Array.isArray(e?.raw))return j(n,{flags:""},e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return j.bind(null,n,{flags:e});if({}.toString.call(e)==="[object Object]"&&!t.length)return j.bind(null,n,e);throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}function j(e,t,n,...r){let{flags:o="",postprocessors:i=[],__extendSyntax:s=t.__flagN??!0,__flagN:a=!0,__flagV:u=Ie,__flagX:l=!0,__rake:p=!0}=t;if(/[vu]/.test(o))throw new Error("Flags v/u cannot be explicitly added");l&&({template:n,substitutions:r}=te(n,r,ve)),a&&({template:n,substitutions:r}=te(n,r,Ue));let c=0,f="",d={};n.raw.forEach((A,N)=>{let O=!!(n.raw[N]||n.raw[N+1]);c+=k(A),f+=z(A,w.CHAR_CLASS),d=B(f,d);let{regexContext:L,charClassContext:R}=d;if(N<n.raw.length-1){let m=r[N];f+=ze(m,o,L,R,O,c),m instanceof RegExp?c+=k(m.source):m instanceof U&&(c+=k(String(m)))}});let h=[...i];return s&&h.push(ye,Ke),u||h.push(We),p&&h.push(Ge),h.forEach(A=>f=A(f,o)),new e(f,(u?"v":"u")+o)}function ze(e,t,n,r,o,i){if(e instanceof RegExp&&n!==E.DEFAULT)throw new Error("Cannot interpolate a RegExp at this position because the syntax context does not match");if(n===E.INVALID_INCOMPLETE_TOKEN||r===g.INVALID_INCOMPLETE_TOKEN)throw new Error("Interpolation preceded by invalid incomplete token");let s=e instanceof U,a;if(!(e instanceof RegExp)){e=String(e),s||(a=ie(e,n===E.CHAR_CLASS?w.CHAR_CLASS:w.DEFAULT));let u=Re(a||e,n,r);if(u)throw new Error(`Unescaped stray "${u}" in the interpolated value would have side effects outside it`)}if(n===E.ENCLOSED_TOKEN||n===E.INTERVAL_QUANTIFIER||n===E.GROUP_NAME||r===g.ENCLOSED_TOKEN||r===g.Q_TOKEN)return s?e:a;if(n===E.CHAR_CLASS){if(s){if(P(e,"^-|^&&|-$|&&$"))throw new Error("Cannot use range or set operator at boundary of interpolated pattern; move the operation into the pattern or the operator outside of it");let u=$e(ae(e));return ee(e)?`[${u}]`:z(u)}return ee(a)?`[${a}]`:a}if(e instanceof RegExp){let u=Be(e,t),l=De(u.value,i);return u.usedModifier?l:`(?:${l})`}return s?`(?:${e})`:o?`(?:${a})`:a}function Be(e,t){let n={i:null,m:null,s:null},r="\\n\\r\\u2028\\u2029",o=e.source;if(e.ignoreCase!==t.includes("i"))if(b)n.i=e.ignoreCase;else throw new Error("Pattern modifiers not supported, so the value of flag i on the interpolated RegExp must match the outer regex");if(e.dotAll!==t.includes("s")&&(b?n.s=e.dotAll:o=T(o,"\\.",e.dotAll?"[^]":`[^${r}]`,w.DEFAULT)),e.multiline!==t.includes("m")&&(b?n.m=e.multiline:(o=T(o,"\\^",e.multiline?`(?<=^|[${r}])`:"(?<![^])",w.DEFAULT),o=T(o,"\\$",e.multiline?`(?=$|[${r}])`:"(?![^])",w.DEFAULT))),b){let i=Object.keys(n),s=i.filter(u=>n[u]===!0).join(""),a=i.filter(u=>n[u]===!1).join("");if(a&&(s+=`-${a}`),s)return{value:`(?${s}:${o})`,usedModifier:!0}}return{value:o}}var S=Object.freeze({DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"});function ce(e,t,n,r){let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu"),i=0,s="";for(let a of e.matchAll(o)){let{0:u,groups:{skip:l}}=a;if(!l&&(!r||r===S.DEFAULT==!i)){n instanceof Function?s+=n(a):s+=n;continue}u==="["?i++:u==="]"&&i&&i--,s+=u}return s}function Ze(e,t,n=0,r){if(!new RegExp(t,"su").test(e))return null;let o=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu");o.lastIndex=n;let i=0,s;for(;s=o.exec(e);){let{0:a,groups:{skip:u}}=s;if(!u&&(!r||r===S.DEFAULT==!i))return s;a==="["?i++:a==="]"&&i&&i--,o.lastIndex==s.index&&o.lastIndex++}return null}function D(e,t,n){return!!Ze(e,t,0,n)}function le(e,t){let n=/\\?./gsu;n.lastIndex=t;let r=e.length,o=0,i=1,s;for(;s=n.exec(e);){let[a]=s;if(a==="[")o++;else if(o)a==="]"&&o--;else if(a==="(")i++;else if(a===")"&&(i--,!i)){r=s.index;break}}return e.slice(t,r)}function Je(e,...t){let n=(e?.postprocessors||[]).concat(we),r=this instanceof Function?Z.bind(this):Z;if(Array.isArray(e?.raw))return r({flags:"",postprocessors:n})(e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return r({flags:e,postprocessors:n});if({}.toString.call(e)==="[object Object]"&&!t.length)return r({...e,postprocessors:n});throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var de=String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`,J=String.raw`\(\?R=(?<rDepth>\d+)\)|${de}`,he=String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`,G=new RegExp(String.raw`${he}|${J}|\\?.`,"gsu");function we(e){if(!D(e,J,S.DEFAULT))return e;if(D(e,String.raw`\\[1-9]`,S.DEFAULT))throw new Error("Invalid decimal escape in interpolated regex; cannot be used with recursion");if(D(e,String.raw`\(\?\(DEFINE\)`,S.DEFAULT))throw new Error("Definition groups cannot be used with recursion");let t=new Map,n=0,r;for(G.lastIndex=0;r=G.exec(e);){let{0:o,groups:{captureName:i,rDepth:s,gRName:a,gRDepth:u}}=r;if(o==="[")n++;else if(n)o==="]"&&n--;else if(i)t.set(i,G.lastIndex);else if(s){let l=+s;fe(l);let p=e.slice(0,r.index),c=e.slice(G.lastIndex);return pe(c),Ee(p,c,l)}else if(a){let l=+u;fe(l);let p=`Recursion via \\g<${a}&R=${u}> must be used within the referenced group`;if(!t.has(a))throw new Error(p);let c=t.get(a),f=le(e,c);if(!D(f,de,S.DEFAULT))throw new Error(p);let d=e.slice(c,r.index),h=f.slice(d.length+o.length);return pe(h),e.slice(0,c)+Ee(d,h,l)+e.slice(c+f.length)}}throw new Error("Unexpected error; recursion was not processed")}function fe(e){if(e<2||e>100)throw new Error(`Max depth must be between 2 and 100; used ${e}`)}function pe(e){if(D(e,J,S.DEFAULT))throw new Error("Recursion can only be used once per regex")}function Ee(e,t,n){let r=n-1;return`${e}${ge(`(?:${e}`,r)}(?:)${ge(`${t})`,r,"backward")}${t}`}function ge(e,t,n="forward"){let o=s=>n==="backward"?t-s+2-1:s+2,i="";for(let s=0;s<t;s++){let a=o(s);i+=ce(e,String.raw`${he}|\\k<(?<backref>[^>]+)>`,({groups:{captureName:u,backref:l}})=>{let p=`_$${a}`;return u?`(?<${u}${p}>`:`\\k<${l}${p}>`},S.DEFAULT)}return i}return _e(Xe);})(); | ||
var Regex;(Regex||={}).plugins=(()=>{var w=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var N=Object.getOwnPropertyNames;var b=Object.prototype.hasOwnProperty;var I=(e,t)=>{for(var n in t)w(e,n,{get:t[n],enumerable:!0})},S=(e,t,n,s)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of N(t))!b.call(e,r)&&r!==n&&w(e,r,{get:()=>t[r],enumerable:!(s=T(t,r))||s.enumerable});return e};var O=e=>S(w({},"__esModule",{value:!0}),e);var M={};I(M,{recursion:()=>v});var l=Object.freeze({DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"});function m(e,t,n,s){let r=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu"),c=0,u="";for(let o of e.matchAll(r)){let{0:a,groups:{skip:f}}=o;if(!f&&(!s||s===l.DEFAULT==!c)){n instanceof Function?u+=n(o):u+=n;continue}a==="["?c++:a==="]"&&c&&c--,u+=a}return u}function U(e,t,n,s){m(e,t,n,s)}function G(e,t,n=0,s){if(!new RegExp(t,"su").test(e))return null;let r=new RegExp(`${t}|(?<skip>\\\\?.)`,"gsu");r.lastIndex=n;let c=0,u;for(;u=r.exec(e);){let{0:o,groups:{skip:a}}=u;if(!a&&(!s||s===l.DEFAULT==!c))return u;o==="["?c++:o==="]"&&c&&c--,r.lastIndex==u.index&&r.lastIndex++}return null}function d(e,t,n){return!!G(e,t,0,n)}function x(e,t){let n=/\\?./gsu;n.lastIndex=t;let s=e.length,r=0,c=1,u;for(;u=n.exec(e);){let[o]=u;if(o==="[")r++;else if(r)o==="]"&&r--;else if(o==="(")c++;else if(o===")"&&(c--,!c)){s=u.index;break}}return e.slice(t,s)}var L=String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`,E=String.raw`\(\?R=(?<rDepth>\d+)\)|${L}`,D=String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`,g=new RegExp(String.raw`${D}|${E}|\\?.`,"gsu");function v(e){if(!d(e,E,l.DEFAULT))return e;if(d(e,String.raw`\\[1-9]`,l.DEFAULT))throw new Error("Numbered backrefs cannot be used with recursion; use named backref");if(d(e,String.raw`\(\?\(DEFINE\)`,l.DEFAULT))throw new Error("DEFINE groups cannot be used with recursion");let t=new Map,n=0,s;for(g.lastIndex=0;s=g.exec(e);){let{0:r,groups:{captureName:c,rDepth:u,gRName:o,gRDepth:a}}=s;if(r==="[")n++;else if(n)r==="]"&&n--;else if(c)t.set(c,g.lastIndex);else if(u){let f=+u;R(f);let h=e.slice(0,s.index),i=e.slice(g.lastIndex);return A(i),k(h,i,f,!1)}else if(o){let f=+a;R(f);let h=`Recursion via \\g<${o}&R=${a}> must be used within the referenced group`;if(!t.has(o))throw new Error(h);let i=t.get(o),p=x(e,i);if(!d(p,L,l.DEFAULT))throw new Error(h);let $=e.slice(i,s.index),C=p.slice($.length+r.length);return A(C),e.slice(0,i)+k($,C,f,!0)+e.slice(i+p.length)}}throw new Error("Unexpected error; recursion was not processed")}function R(e){if(e<2||e>100)throw new Error(`Max depth must be between 2 and 100; used ${e}`)}function A(e){if(d(e,E,l.DEFAULT))throw new Error("Recursion can only be used once per regex")}function k(e,t,n,s){let r=new Set;s&&U(e+t,D,({groups:{captureName:u}})=>{r.add(u)},l.DEFAULT);let c=n-1;return`${e}${F(`(?:${e}`,c,s?r:null)}(?:)${F(`${t})`,c,s?r:null,"backward")}${t}`}function F(e,t,n,s="forward"){let c=o=>s==="backward"?t-o+2-1:o+2,u="";for(let o=0;o<t;o++){let a=c(o);u+=m(e,String.raw`${D}|\\k<(?<backref>[^>]+)>`,({0:f,groups:{captureName:h,backref:i}})=>{if(i&&n&&!n.has(i))return f;let p=`_$${a}`;return h?`(?<${h}${p}>`:`\\k<${i}${p}>`},l.DEFAULT)}return u}return O(M);})(); |
{ | ||
"name": "regex-recursion", | ||
"version": "3.1.0", | ||
"description": "Recursive matching extension for the regex package", | ||
"version": "4.0.0", | ||
"description": "Recursive matching plugin for the regex package", | ||
"author": "Steven Levithan", | ||
@@ -9,5 +9,6 @@ "license": "MIT", | ||
"exports": "./src/index.js", | ||
"browser": "./dist/regex-recursion.min.js", | ||
"scripts": { | ||
"prebuild": "rimraf --glob dist/*", | ||
"build": "esbuild src/index.js --bundle --minify --outfile=dist/regex-recursion.min.js --global-name=Regex.ext", | ||
"build": "esbuild src/index.js --bundle --minify --outfile=dist/regex-recursion.min.js --global-name=Regex.plugins", | ||
"pretest": "npm run build", | ||
@@ -31,6 +32,6 @@ "test": "jasmine", | ||
"dependencies": { | ||
"regex": "^3.1.0", | ||
"regex-utilities": "^2.1.0" | ||
}, | ||
"devDependencies": { | ||
"regex": "^4.0.0", | ||
"esbuild": "^0.23.0", | ||
@@ -37,0 +38,0 @@ "jasmine": "^5.2.0", |
@@ -1,9 +0,14 @@ | ||
# regex-recursion [![npm](https://img.shields.io/npm/v/regex-recursion)](https://www.npmjs.com/package/regex-recursion) | ||
# regex-recursion | ||
This is an extension for the [`regex`](https://github.com/slevithan/regex) package that adds support for recursive matching up to a specified max depth *N*, where *N* must be between 2 and 100. Generated regexes are native `RegExp` instances, and support all JavaScript regular expression features. | ||
[![build status](https://github.com/slevithan/regex-recursion/workflows/CI/badge.svg)](https://github.com/slevithan/regex-recursion/actions) | ||
[![npm](https://img.shields.io/npm/v/regex-recursion)](https://www.npmjs.com/package/regex-recursion) | ||
[![bundle size](https://deno.bundlejs.com/badge?q=regex-recursion&treeshake=[*])](https://bundlejs.com/?q=regex-recursion&treeshake=[*]) | ||
This is a plugin for the [`regex`](https://github.com/slevithan/regex) library that adds support for recursive matching up to a specified max depth *N*, where *N* must be between 2 and 100. Generated regexes are native `RegExp` instances, and support all JavaScript regular expression features. | ||
Recursive matching is added to a regex via one of the following: | ||
- `(?R=N)` — Recursively match the entire regex at this position. | ||
- `\g<name&R=N>` — Recursively match the contents of group *name* at this position. The `\g` subroutine must be called *within* the referenced group. | ||
- `\g<name&R=N>` — Recursively match the contents of group *name* at this position. | ||
- The `\g` subroutine must be called *within* the referenced group. | ||
@@ -15,15 +20,22 @@ Recursive matching supports named captures/backreferences, and makes them independent per depth level. So e.g. `groups.name` on a match object is the value captured by group `name` at the top level of the recursion stack. | ||
```sh | ||
npm install regex-recursion | ||
npm install regex regex-recursion | ||
``` | ||
```js | ||
import {rregex} from 'regex-recursion'; | ||
import {regex} from 'regex'; | ||
import {recursion} from 'regex-recursion'; | ||
const re = regex({plugins: [recursion]})`…`; | ||
``` | ||
In browsers: | ||
In browsers (using a global name): | ||
```html | ||
<script src="https://cdn.jsdelivr.net/npm/regex-recursion/dist/regex-recursion.min.js"></script> | ||
<script src="https://cdn.jsdelivr.net/npm/regex@4.0.0/dist/regex.min.js"></script> | ||
<script src="https://cdn.jsdelivr.net/npm/regex-recursion@4.0.0/dist/regex-recursion.min.js"></script> | ||
<script> | ||
const {rregex} = Regex.ext; | ||
const {regex} = Regex; | ||
const {recursion} = Regex.plugins; | ||
const re = regex({plugins: [recursion]})`…`; | ||
</script> | ||
@@ -40,3 +52,4 @@ ``` | ||
// Matches sequences of up to 50 'a' chars followed by the same number of 'b' | ||
rregex`a(?R=50)?b`.exec('test aaaaaabbb')[0]; | ||
const re = regex({plugins: [recursion]})`a(?R=50)?b`; | ||
re.exec('test aaaaaabbb')[0]; | ||
// → 'aaabbb' | ||
@@ -48,3 +61,3 @@ ``` | ||
```js | ||
const re = rregex`^ | ||
const re = regex({plugins: [recursion]})`^ | ||
(?<balanced> | ||
@@ -61,2 +74,4 @@ a | ||
Note the `^` and `$` anchors outside of the recursive subpattern. | ||
### Match balanced parentheses | ||
@@ -66,3 +81,3 @@ | ||
// Matches all balanced parentheses up to depth 50 | ||
const parens = rregex('g')`\( | ||
const parens = regex({flags: 'g', plugins: [recursion]})`\( | ||
( [^\(\)] | (?R=50) )* | ||
@@ -80,6 +95,6 @@ \)`; | ||
Here's an alternative that matches the same strings, but adds a nested quantifier. It then uses an atomic group to prevent this nested quantifier from creating the potential for runaway backtracking: | ||
Following is an alternative that matches the same strings, but adds a nested quantifier. It then uses an atomic group to prevent this nested quantifier from creating the potential for [catastrophic backtracking](https://www.regular-expressions.info/catastrophic.html). | ||
```js | ||
const parens = rregex('g')`\( | ||
const parens = regex({flags: 'g', plugins: [recursion]})`\( | ||
( (?> [^\(\)]+ ) | (?R=50) )* | ||
@@ -89,5 +104,5 @@ \)`; | ||
This matches sequences of non-parens in one step with the nested `+` quantifier, and avoids backtracking into these sequences by wrapping it with an atomic group `(?>…)`. Given that what the nested quantifier `+` matches overlaps with what the outer group can match with its `*` quantifier, the atomic group is important here. It avoids runaway backtracking when matching long strings with unbalanced parens. | ||
This matches sequences of non-parens in one step with the nested `+` quantifier, and avoids backtracking into these sequences by wrapping it with an atomic group `(?>…)`. Given that what the nested quantifier `+` matches overlaps with what the outer group can match with its `*` quantifier, the atomic group is important here. It avoids exponential backtracking when matching long strings with unbalanced parens. | ||
Atomic groups are provided by the base `regex` package. | ||
Atomic groups are provided by the base `regex` library. | ||
@@ -99,3 +114,8 @@ ### Match palindromes | ||
```js | ||
const palindromes = rregex('gi')`(?<char>\w) ((?R=15)|\w?) \k<char>`; | ||
const palindromes = regex({flags: 'gi', plugins: [recursion]})` | ||
(?<char> \w ) | ||
# Recurse, or match a lone unbalanced char in the middle | ||
( (?R=15) | \w? ) | ||
\k<char> | ||
`; | ||
@@ -106,3 +126,3 @@ 'Racecar, ABBA, and redivided'.match(palindromes); | ||
In this example, the max length of matched palindromes is 31. That's because it sets the max recursion depth to 15 with `(?R=15)`. So, depth 15 × 2 chars (left + right) for each depth level + 1 optional unbalanced char in the center = 31. The max recursion depth can increased (up to a max of 100) to match longer palindromes. | ||
In the example above, the max length of matched palindromes is 31. That's because it sets the max recursion depth to 15 with `(?R=15)`. So, depth 15 × 2 chars (left + right) for each depth level + 1 optional unbalanced char in the middle = 31. To match longer palindromes, the max recursion depth can be increased to a max of 100, which would enable matching palindromes up to 201 characters long. | ||
@@ -112,6 +132,5 @@ #### Match palindromes as complete words | ||
```js | ||
const palindromeWords = rregex('gi')`\b | ||
const palindromeWords = regex({flags: 'gi', plugins: [recursion]})`\b | ||
(?<palindrome> | ||
(?<char> \w ) | ||
# Recurse, or match a lone unbalanced char in the center | ||
( \g<palindrome&R=15> | \w? ) | ||
@@ -126,11 +145,2 @@ \k<char> | ||
## Sugar free | ||
Template tag `rregex` is sugar for using the base `regex` tag and adding recursion support via a postprocessor. You can also add recursion support the verbose way: | ||
```js | ||
import {regex} from 'regex'; | ||
import {recursion} from 'regex-recursion'; | ||
regex({flags: 'i', postprocessors: [recursion]})`a(?R=2)?b`; | ||
``` | ||
Note the `\b` word boundaries outside of the recursive subpattern. |
@@ -1,21 +0,3 @@ | ||
import {regex} from 'regex'; | ||
import {Context, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities'; | ||
import {Context, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities'; | ||
export function rregex(first, ...values) { | ||
const postprocessors = (first?.postprocessors || []).concat(recursion); | ||
// Allow binding to other constructors | ||
const tag = this instanceof Function ? regex.bind(this) : regex; | ||
// Given a template | ||
if (Array.isArray(first?.raw)) { | ||
return tag({flags: '', postprocessors})(first, ...values); | ||
// Given flags | ||
} else if ((typeof first === 'string' || first === undefined) && !values.length) { | ||
return tag({flags: first, postprocessors}); | ||
// Given an options object | ||
} else if ({}.toString.call(first) === '[object Object]' && !values.length) { | ||
return tag({...first, postprocessors}); | ||
} | ||
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...values])}`); | ||
} | ||
const gRToken = String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`; | ||
@@ -35,12 +17,18 @@ const recursiveToken = String.raw`\(\?R=(?<rDepth>\d+)\)|${gRToken}`; | ||
if (hasUnescaped(expression, String.raw`\\[1-9]`, Context.DEFAULT)) { | ||
// Could allow this with extra effort but it's probably not worth it. To trigger this, the | ||
// regex must contain both recursion and an interpolated regex with a numbered backref (since | ||
// numbered backrefs outside regex interpolation are prevented by implicit flag n). Note that | ||
// some of `regex`'s built-in features (atomic groups and subroutines) can add numbered | ||
// backrefs. However, those work fine with recursion because postprocessors from extensions | ||
// (like `regex-recursion`) run before built-in postprocessors | ||
throw new Error(`Invalid decimal escape in interpolated regex; cannot be used with recursion`); | ||
// Could add support for numbered backrefs with extra effort, but it's probably not worth it. | ||
// To trigger this error, the regex must include recursion and one of the following: | ||
// - An interpolated regex that contains a numbered backref (since other numbered backrefs are | ||
// prevented by implicit flag n). | ||
// - A numbered backref, when flag n is explicitly disabled. | ||
// Note that `regex`'s extended syntax (atomic groups and sometimes subroutines) can also add | ||
// numbered backrefs, but those work fine because external plugins like this one run *before* | ||
// the transpilation of built-in syntax extensions. | ||
// To support numbered backrefs, they would need to be automatically adjusted when they're | ||
// duplicated by recursion and refer to a group inside the expression being recursed. | ||
// Additionally, numbered backrefs inside and outside of the recursed expression would need to | ||
// be adjusted based on any capturing groups added by recursion. | ||
throw new Error(`Numbered backrefs cannot be used with recursion; use named backref`); | ||
} | ||
if (hasUnescaped(expression, String.raw`\(\?\(DEFINE\)`, Context.DEFAULT)) { | ||
throw new Error(`Definition groups cannot be used with recursion`); | ||
throw new Error(`DEFINE groups cannot be used with recursion`); | ||
} | ||
@@ -66,3 +54,3 @@ const groupContentsStartPos = new Map(); | ||
assertNoFollowingRecursion(post); | ||
return makeRecursive(pre, post, maxDepth); | ||
return makeRecursive(pre, post, maxDepth, false); | ||
// \g<name&R=N> | ||
@@ -87,3 +75,3 @@ } else if (gRName) { | ||
return expression.slice(0, startPos) + | ||
makeRecursive(pre, post, maxDepth) + | ||
makeRecursive(pre, post, maxDepth, true) + | ||
expression.slice(startPos + recursiveGroupContents.length); | ||
@@ -118,9 +106,21 @@ } | ||
@param {number} maxDepth | ||
@param {boolean} isSubpattern | ||
@returns {string} | ||
*/ | ||
function makeRecursive(pre, post, maxDepth) { | ||
function makeRecursive(pre, post, maxDepth, isSubpattern) { | ||
const namesInRecursed = new Set(); | ||
// Avoid this work if not needed | ||
if (isSubpattern) { | ||
forEachUnescaped(pre + post, namedCapturingDelim, ({groups: {captureName}}) => { | ||
namesInRecursed.add(captureName); | ||
}, Context.DEFAULT); | ||
} | ||
const reps = maxDepth - 1; | ||
// Depth 2: 'pre(?:pre(?:)post)post' | ||
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post' | ||
return `${pre}${repeatWithDepth(`(?:${pre}`, reps)}(?:)${repeatWithDepth(`${post})`, reps, 'backward')}${post}`; | ||
return `${pre}${ | ||
repeatWithDepth(`(?:${pre}`, reps, isSubpattern ? namesInRecursed: null) | ||
}(?:)${ | ||
repeatWithDepth(`${post})`, reps, isSubpattern ? namesInRecursed: null, 'backward') | ||
}${post}`; | ||
} | ||
@@ -131,6 +131,7 @@ | ||
@param {number} reps | ||
@param {Set<string> | null} namesInRecursed | ||
@param {'forward' | 'backward'} [direction] | ||
@returns {string} | ||
*/ | ||
function repeatWithDepth(expression, reps, direction = 'forward') { | ||
function repeatWithDepth(expression, reps, namesInRecursed, direction = 'forward') { | ||
const startNum = 2; | ||
@@ -144,3 +145,6 @@ const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum; | ||
String.raw`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>`, | ||
({groups: {captureName, backref}}) => { | ||
({0: m, groups: {captureName, backref}}) => { | ||
if (backref && namesInRecursed && !namesInRecursed.has(backref)) { | ||
return m; | ||
} | ||
const suffix = `_$${captureNum}`; | ||
@@ -147,0 +151,0 @@ return captureName ? `(?<${captureName}${suffix}>` : `\\k<${backref}${suffix}>`; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
1
136
15778
4
155
- Removedregex@^3.1.0
- Removedregex@3.1.0(transitive)