regex-recursion
Advanced tools
Comparing version 1.0.0 to 2.0.0
@@ -1,3 +0,3 @@ | ||
var Regex;(Regex||={}).ext=(()=>{var C=Object.defineProperty;var ne=Object.getOwnPropertyDescriptor;var oe=Object.getOwnPropertyNames;var se=Object.prototype.hasOwnProperty;var ie=(e,t)=>{for(var r in t)C(e,r,{get:t[r],enumerable:!0})},ae=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of oe(t))!se.call(e,o)&&o!==r&&C(e,o,{get:()=>t[o],enumerable:!(n=ne(t,o))||n.enumerable});return e};var ce=e=>ae(C({},"__esModule",{value:!0}),e);var Ae={};ie(Ae,{recursion:()=>X,rregex:()=>ge});var g={DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"};function A(e,t,r,n){let o=new RegExp(String.raw`(?<found>${t})|\\?.`,"gsu"),c=0,s="";for(let i of e.matchAll(o)){let{0:l,groups:{found:u}}=i;if(u&&(!n||n===g.DEFAULT==!c)){r instanceof Function?s+=r(i):s+=r;continue}l==="["?c++:l==="]"&&c&&c--,s+=l}return s}function le(e,t,r,n){if(!new RegExp(t,"su").test(e))return!1;let o=new RegExp(String.raw`(?<found>${t})|\\?.`,"gsu"),c=0;for(let s of e.matchAll(o)){let{0:i,groups:{found:l}}=s;if(l&&(!n||n===g.DEFAULT==!c))return r&&r(s),!0;i==="["?c++:i==="]"&&c&&c--}return!1}function _(e,t,r){return le(e,t,null,r)}function G(e){if(!_(e,String.raw`\(\?>`,g.DEFAULT))return e;let t=new RegExp(String.raw`(?<noncapturingStart>\(\?(?:[:=!>A-Za-z\-]|<[=!]))|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`,"gsu"),r=3,n,o=0,c=0,s=NaN;do{n=!1;let i=0,l=0,u=!1,E;for(t.lastIndex=Number.isNaN(s)?0:s+r;E=t.exec(e);){let{0:a,index:m,groups:{backrefNum:d,capturingStart:T,noncapturingStart:$}}=E;if(a==="[")i++;else if(i)a==="]"&&i--;else if(a==="(?>"&&!u)s=m,u=!0;else if(u&&$)l++;else if(T)u&&l++,o++;else if(a===")"&&u){if(!l){c++,e=`${e.slice(0,s)}(?:(?=(${e.slice(s+r,m)}))\\k<${c+o}>)${e.slice(m+1)}`,n=!0,o--;break}l--}else if(d)throw new Error(`Invalid decimal escape "${a}" in interpolated regex; cannot be used with atomic group`)}}while(n);return e=A(e,String.raw`\\k<(?<backrefNum>\d+)>`,({groups:{backrefNum:i}})=>`\\${i}`,g.DEFAULT),e}var N=class{#e;constructor(t){this.#e=t}toString(){return String(this.#e)}};function U(e,...t){if(Array.isArray(e?.raw))return new N(e.raw.flatMap((r,n)=>n<e.raw.length-1?[r,t[n]]:r).join(""));if(!t.length)return new N(e??"");throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var f={...g,GROUP_NAME:"GROUP_NAME",ENCLOSED_TOKEN:"ENCLOSED_TOKEN",INTERVAL_QUANTIFIER:"INTERVAL_QUANTIFIER",INVALID_INCOMPLETE_TOKEN:"INVALID_INCOMPLETE_TOKEN"},p={DEFAULT:"CC_DEFAULT",RANGE:"CC_RANGE",ENCLOSED_TOKEN:"CC_ENCLOSED_TOKEN",Q_TOKEN:"CC_Q_TOKEN",INVALID_INCOMPLETE_TOKEN:"CC_INVALID_INCOMPLETE_TOKEN"},O=(()=>{let e=!0;try{new RegExp("(?i-ms:)")}catch{e=!1}return e})(),D="&!#$%*+,.:;<=>?@^`~";function F(e,t){return t===f.CHAR_CLASS?e.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${D}]`,"g"),"\\$&"):e.replace(/[()\[\]{}|\\^$*+?.]/g,"\\$&")}function S(e){return e.replace(new RegExp(String.raw`^([${D}])(?!\1)`),(t,r,n)=>`\\${t}${n+1===e.length?"":t}`)}function y(e){return e.replace(/^\^/,"\\^^")}function L(e,t){return A(e,String.raw`\\0(?!\d)`,"\\u{0}",t)}function M(e,t,r){let n=0;for(let[o]of e.matchAll(new RegExp(`[${F(t+r)}]`,"g")))if(n+=o===t?1:-1,n<0)return r;return n>0?t:""}function V(e,t,r){let n=e.replace(/\\./gsu,"");if(n.at(-1)==="\\")return"\\";if(t===f.DEFAULT)return M(n,"(",")");if(t===f.CHAR_CLASS&&!(r===p.ENCLOSED_TOKEN||r===p.Q_TOKEN))return M(n,"[","]");if(t===f.ENCLOSED_TOKEN||t===f.INTERVAL_QUANTIFIER||r===p.ENCLOSED_TOKEN||r===p.Q_TOKEN){if(n.includes("}"))return"}"}else if(t===f.GROUP_NAME&&n.includes(">"))return">";return""}var h=new RegExp(String.raw` | ||
(?<groupN> \(\?< (?! [=!] ) | \\k< ) | ||
var Regex;(Regex||={}).ext=(()=>{var y=Object.defineProperty;var de=Object.getOwnPropertyDescriptor;var ge=Object.getOwnPropertyNames;var me=Object.prototype.hasOwnProperty;var Ae=(e,t)=>{for(var n in t)y(e,n,{get:t[n],enumerable:!0})},he=(e,t,n,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let s of ge(t))!me.call(e,s)&&s!==n&&y(e,s,{get:()=>t[s],enumerable:!(r=de(t,s))||r.enumerable});return e};var Ne=e=>he(y({},"__esModule",{value:!0}),e);var be={};Ae(be,{recursion:()=>Ee,rregex:()=>De});var E={DEFAULT:"DEFAULT",CHAR_CLASS:"CHAR_CLASS"};function A(e,t,n,r){let s=new RegExp(String.raw`${t}|(?<skip>\\?.)`,"gsu"),a=0,o="";for(let u of e.matchAll(s)){let{0:c,groups:{skip:l}}=u;if(!l&&(!r||r===E.DEFAULT==!a)){n instanceof Function?o+=n(u):o+=n;continue}c==="["?a++:c==="]"&&a&&a--,o+=c}return o}function I(e,t,n,r){A(e,t,n,r)}function F(e,t,n=0,r){if(!new RegExp(t,"su").test(e))return null;let s=new RegExp(String.raw`${t}|(?<skip>\\?.)`,"gsu");s.lastIndex=n;let a=0,o;for(;o=s.exec(e);){let{0:u,groups:{skip:c}}=o;if(!c&&(!r||r===E.DEFAULT==!a))return o;u==="["?a++:u==="]"&&a&&a--,s.lastIndex==o.index&&s.lastIndex++}return null}function h(e,t,n){return!!F(e,t,0,n)}function P(e,t){let n=/\\?./gsu;n.lastIndex=t;let r=e.length,s=0,a=1,o;for(;o=n.exec(e);){let[u]=o;if(u==="[")s++;else if(s)u==="]"&&s--;else if(u==="(")a++;else if(u===")"&&(a--,!a)){r=o.index;break}}return e.slice(t,r)}var L=class{#e;constructor(t){this.#e=t}toString(){return String(this.#e)}};function V(e,...t){if(Array.isArray(e?.raw))return new L(e.raw.flatMap((n,r)=>r<e.raw.length-1?[n,t[r]]:n).join(""));if(!t.length)return new L(e??"");throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var p={DEFAULT:"R_DEFAULT",CHAR_CLASS:"R_CHAR_CLASS",GROUP_NAME:"R_GROUP_NAME",ENCLOSED_TOKEN:"R_ENCLOSED_TOKEN",INTERVAL_QUANTIFIER:"R_INTERVAL_QUANTIFIER",INVALID_INCOMPLETE_TOKEN:"R_INVALID_INCOMPLETE_TOKEN"},g={DEFAULT:"CC_DEFAULT",RANGE:"CC_RANGE",ENCLOSED_TOKEN:"CC_ENCLOSED_TOKEN",Q_TOKEN:"CC_Q_TOKEN",INVALID_INCOMPLETE_TOKEN:"CC_INVALID_INCOMPLETE_TOKEN"},x=(()=>{let e=!0;try{new RegExp("(?i-ms:)")}catch{e=!1}return e})(),k="&!#$%*+,.:;<=>?@^`~",$=String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!])`;function j(e,t){return t===E.CHAR_CLASS?e.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${k}]`,"g"),"\\$&"):e.replace(/[()\[\]{}|\\^$*+?.]/g,"\\$&")}function G(e){return e.replace(new RegExp(String.raw`^([${k}])(?!\1)`),(t,n,r)=>`\\${t}${r+1===e.length?"":t}`)}function X(e){return e.replace(/^\^/,"\\^^")}function D(e,t){return A(e,String.raw`\\0(?!\d)`,"\\u{0}",t)}function Z(e,t,n){let r=0;for(let[s]of e.matchAll(new RegExp(`[${j(t+n,E.CHAR_CLASS)}]`,"g")))if(r+=s===t?1:-1,r<0)return n;return r>0?t:""}function v(e,t,n){let r=e.replace(/\\./gsu,"");if(r.at(-1)==="\\")return"\\";if(t===p.DEFAULT)return Z(r,"(",")");if(t===p.CHAR_CLASS&&!(n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN))return Z(r,"[","]");if(t===p.ENCLOSED_TOKEN||t===p.INTERVAL_QUANTIFIER||n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN){if(r.includes("}"))return"}"}else if(t===p.GROUP_NAME&&r.includes(">"))return">";return""}var J=new RegExp(String.raw` | ||
(?<groupN> \(\?< (?! [=!] ) | \\[gk]< ) | ||
| (?<enclosedT> \\[pPu]\{ ) | ||
@@ -13,20 +13,10 @@ | (?<qT> \\q\{ ) | ||
) | ||
| \\ (?: | ||
c [A-Za-z] | ||
| u [A-Fa-f\d]{4} | ||
| x [A-Fa-f\d]{2} | ||
| 0 \d+ | ||
) | ||
| \[\^ | ||
| \(\? [:=!<>A-Za-z\-] | ||
| (?<dp> [${D}] ) \k<dp> | ||
| \\[1-9]\d* | ||
| -- | ||
| \\? . | ||
`.replace(/\s+/g,""),"gsu");function w(e,{regexContext:t=f.DEFAULT,charClassContext:r=p.DEFAULT,charClassDepth:n=0,lastPos:o=0}){h.lastIndex=o;let c;for(;c=h.exec(e);){let{0:s,groups:{groupN:i,enclosedT:l,qT:u,intervalQ:E,incompleteT:a}}=c;s==="["||s==="[^"?(n++,t=f.CHAR_CLASS,r=p.DEFAULT):s==="]"&&t===f.CHAR_CLASS?(n&&n--,n||(t=f.DEFAULT),r=p.DEFAULT):t===f.CHAR_CLASS?a?r=p.INVALID_INCOMPLETE_TOKEN:s==="-"?r=p.RANGE:l?r=p.ENCLOSED_TOKEN:u?r=p.Q_TOKEN:(s==="}"&&(r===p.ENCLOSED_TOKEN||r===p.Q_TOKEN)||r===p.INVALID_INCOMPLETE_TOKEN||r===p.RANGE)&&(r=p.DEFAULT):a?t=f.INVALID_INCOMPLETE_TOKEN:i?t=f.GROUP_NAME:l?t=f.ENCLOSED_TOKEN:E?t=f.INTERVAL_QUANTIFIER:(s===">"&&t===f.GROUP_NAME||s==="}"&&(t===f.ENCLOSED_TOKEN||t===f.INTERVAL_QUANTIFIER)||t===f.INVALID_INCOMPLETE_TOKEN)&&(t=f.DEFAULT)}return{regexContext:t,charClassContext:r,charClassDepth:n,lastPos:e.length}}function x(e){let t=/(?<capture>\((?:(?!\?)|\?<[^>]+>))|\\?./gsu;return Array.from(e.matchAll(t)).filter(r=>r.groups.capture).length}function j(e,t){return e.replace(/\\([1-9]\d*)|\\?./gsu,(r,n)=>n?"\\"+(Number(n)+t):r)}var ue=["Basic_Emoji","Emoji_Keycap_Sequence","RGI_Emoji_Modifier_Sequence","RGI_Emoji_Flag_Sequence","RGI_Emoji_Tag_Sequence","RGI_Emoji_ZWJ_Sequence","RGI_Emoji"].join("|");function b(e){let t=new RegExp(String.raw` | ||
`.replace(/\s+/g,""),"gsu");function U(e,{regexContext:t=p.DEFAULT,charClassContext:n=g.DEFAULT,charClassDepth:r=0,lastPos:s=0}){J.lastIndex=s;let a;for(;a=J.exec(e);){let{0:o,groups:{groupN:u,enclosedT:c,qT:l,intervalQ:f,incompleteT:i}}=a;o==="["?(r++,t=p.CHAR_CLASS,n=g.DEFAULT):o==="]"&&t===p.CHAR_CLASS?(r&&r--,r||(t=p.DEFAULT),n=g.DEFAULT):t===p.CHAR_CLASS?i?n=g.INVALID_INCOMPLETE_TOKEN:o==="-"?n=g.RANGE:c?n=g.ENCLOSED_TOKEN:l?n=g.Q_TOKEN:(o==="}"&&(n===g.ENCLOSED_TOKEN||n===g.Q_TOKEN)||n===g.INVALID_INCOMPLETE_TOKEN||n===g.RANGE)&&(n=g.DEFAULT):i?t=p.INVALID_INCOMPLETE_TOKEN:u?t=p.GROUP_NAME:c?t=p.ENCLOSED_TOKEN:f?t=p.INTERVAL_QUANTIFIER:(o===">"&&t===p.GROUP_NAME||o==="}"&&(t===p.ENCLOSED_TOKEN||t===p.INTERVAL_QUANTIFIER)||t===p.INVALID_INCOMPLETE_TOKEN)&&(t=p.DEFAULT)}return{regexContext:t,charClassContext:n,charClassDepth:r,lastPos:e.length}}function b(e){let t=0;return I(e,String.raw`\((?:(?!\?)|\?<[^>]+>)`,()=>t++,E.DEFAULT),t}function Y(e,t){return A(e,String.raw`\\(?<num>[1-9]\d*)`,({groups:{num:n}})=>`\\${+n+t}`,E.DEFAULT)}var we=["Basic_Emoji","Emoji_Keycap_Sequence","RGI_Emoji_Modifier_Sequence","RGI_Emoji_Flag_Sequence","RGI_Emoji_Tag_Sequence","RGI_Emoji_ZWJ_Sequence","RGI_Emoji"].join("|"),Se=new RegExp(String.raw` | ||
\\ (?: | ||
c [A-Za-z] | ||
| p \{ (?<pPropOfStr> ${ue} ) \} | ||
| p \{ (?<pStrProp> ${we} ) \} | ||
| [pP] \{ [^\}]+ \} | ||
| (?<qPropOfStr> q ) | ||
| (?<qStrProp> q ) | ||
| u (?: [A-Fa-f\d]{4} | \{ [A-Fa-f\d]+ \} ) | ||
@@ -39,9 +29,35 @@ | x [A-Fa-f\d]{2} | ||
| . | ||
`.replace(/\s+/g,""),"gsu"),r=!1,n;for(let{0:o,groups:c}of e.matchAll(t)){if(c.pPropOfStr||c.qPropOfStr||o==="["&&r)return!0;if(["-","--","&&"].includes(o))r=!1;else if(!["[","]"].includes(o)){if(r||n==="]")return!0;r=!0}n=o}return!1}function P(e,t,r){let n={raw:[]},o=[],c={};return e.raw.forEach((s,i)=>{let l=r(s,{...c,lastPos:0});if(n.raw.push(l.transformed),c=l.runningContext,i<e.raw.length-1){let u=t[i];if(u instanceof N){let E=r(u,{...c,lastPos:0});o.push(U(E.transformed)),c=E.runningContext}else o.push(u)}}),{template:n,values:o}}function Q(e,t){e=String(e);let r="",n="";for(let[o]of e.matchAll(h)){r+=o,t=w(r,t);let{regexContext:c}=t;if(c===f.DEFAULT)if(o==="(")n+="(?:";else{if(/^\\[1-9]/.test(o))throw new Error(`Invalid decimal escape "${o}" with implicit flag n; replace with named backreference`);n+=o}else n+=o}return{transformed:n,runningContext:t}}var H=/^\s$/,fe=/^\\[\s#]$/,q=/^[ \t]$/,pe=/^\\[ \t]$/;function W(e,t){e=String(e);let r=!1,n=!1,o=!1,c="",s="",i="",l="",u=!1,E=(a,{prefix:m=!0,postfix:d=!1}={})=>(a=(u&&m?"(?:)":"")+a+(d?"(?:)":""),u=!1,a);for(let[a]of e.matchAll(h)){if(o){a===` | ||
`&&(o=!1,u=!0);continue}if(r){if(H.test(a))continue;r=!1,u=!0}else if(n){if(q.test(a))continue;n=!1}c+=a,t=w(c,t);let{regexContext:m,charClassContext:d}=t;if(a==="-"&&m===f.CHAR_CLASS&&l===p.RANGE)throw new Error("Invalid unescaped hyphen as the end value for a range");if(m===f.DEFAULT&&/^[?*+]\??$/.test(a)||m===f.INTERVAL_QUANTIFIER&&a==="{")s+=E(a,{prefix:!1,postfix:i==="("});else if(m===f.DEFAULT)H.test(a)?r=!0:a.startsWith("#")?o=!0:fe.test(a)?s+=E(a[1],{prefix:!1}):s+=E(a);else if(m===f.CHAR_CLASS&&a!=="["&&a!=="[^")if(q.test(a)&&(d===p.DEFAULT||d===p.RANGE||d===p.Q_TOKEN))n=!0;else{if(d===p.INVALID_INCOMPLETE_TOKEN)throw new Error(`Invalid incomplete token in character class: "${a}"`);pe.test(a)&&(d===p.DEFAULT||d===p.Q_TOKEN)?s+=E(a[1],{prefix:!1}):d===p.DEFAULT?s+=E(S(L(a))):s+=E(a)}else s+=E(a);r||n||o||(i=a,l=d)}return{transformed:s,runningContext:t}}function v(e){let t=String.raw`\(\?:\)`;return e=A(e,`${t}(?:${t})+`,"(?:)",g.DEFAULT),e=A(e,String.raw`^${t}(?![?*+{])|${t}$|${t}(?=[()|$\\])|(?<=[()|>^]|\(\?(?:[:=!]|<[=!]))${t}`,"",g.DEFAULT),e}var I=function(e,...t){let r=this instanceof Function?this:RegExp;if(Array.isArray(e?.raw))return k(r,{flags:""},e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return k.bind(null,r,{flags:e});if({}.toString.call(e)==="[object Object]"&&!t.length)return k.bind(null,r,e);throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)};function k(e,t,r,...n){let{flags:o="",postprocessors:c=[],__flagN:s=!0,__flagX:i=!0,__rake:l=!0}=t;if(/[vu]/.test(o))throw new Error("Flags v/u cannot be explicitly added since v is always enabled");i&&({template:r,values:n}=P(r,n,W)),s&&({template:r,values:n}=P(r,n,Q));let u=0,E="",a={};r.raw.forEach((d,T)=>{let $=r.raw[T]||r.raw[T+1];u+=x(d),E+=L(d,g.CHAR_CLASS),a=w(E,a);let{regexContext:te,charClassContext:re}=a;if(T<r.raw.length-1){let K=Ee(n[T],o,te,re,$,u);u+=K.capturesAdded||0,E+=K.value}});let m=[G,...c];l&&m.push(v);for(let d of m)E=d(E);return new e(E,`v${o}`)}function Ee(e,t,r,n,o,c){if(e instanceof RegExp&&r!==f.DEFAULT)throw new Error("Cannot interpolate a RegExp at this position because the syntax context does not match");if(r===f.INVALID_INCOMPLETE_TOKEN||n===p.INVALID_INCOMPLETE_TOKEN)throw new Error("Interpolation preceded by invalid incomplete token");let s=e instanceof N,i;if(!(e instanceof RegExp)){e=String(e),s||(i=F(e,r));let l=V(i||e,r,n);if(l)throw new Error(`Unescaped stray "${l}" in the interpolated value would have side effects outside it`)}if(r===f.ENCLOSED_TOKEN||r===f.INTERVAL_QUANTIFIER||r===f.GROUP_NAME||n===p.ENCLOSED_TOKEN||n===p.Q_TOKEN)return{value:s?e:i};if(r===f.CHAR_CLASS){if(s){if(A(e,"^-|^&&|-$|&&$","")!==e)throw new Error("In character classes, a partial cannot use a range/set operator at its boundary; move the operation into the partial or the operator outside of it");let u=y(S(e));return{value:b(e)?`[${u}]`:L(u)}}return{value:b(i)?`[${i}]`:i}}if(e instanceof RegExp){let l=de(e,t),u=j(l.value,c);return{value:l.usedModifier?u:`(?:${u})`,capturesAdded:x(e.source)}}return s?{value:`(?:${e})`}:{value:o?`(?:${i})`:i}}function de(e,t){let r={i:null,m:null,s:null},n="\\n\\r\\u2028\\u2029",o=e.source;if(e.ignoreCase!==t.includes("i"))if(O)r.i=e.ignoreCase;else throw new Error("Pattern modifiers not supported, so the value of flag i on the interpolated RegExp must match the outer regex");if(e.dotAll!==t.includes("s")&&(O?r.s=e.dotAll:o=A(o,"\\.",e.dotAll?"[^]":`[^${n}]`,g.DEFAULT)),e.multiline!==t.includes("m")&&(O?r.m=e.multiline:(o=A(o,"\\^",e.multiline?`(?<=^|[${n}])`:"(?<![^])",g.DEFAULT),o=A(o,"\\$",e.multiline?`(?=$|[${n}])`:"(?![^])",g.DEFAULT))),O){let c=Object.keys(r),s=c.filter(l=>r[l]===!0).join(""),i=c.filter(l=>r[l]===!1).join("");if(i&&(s+=`-${i}`),s)return{value:`(?${s}:${o})`,usedModifier:!0}}return{value:o}}function ge(e,...t){let r=(e?.postprocessors||[]).concat(X),n=this instanceof Function?this:RegExp;if(Array.isArray(e?.raw))return I.bind(n)({flags:"",postprocessors:r})(e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return I.bind(n)({flags:e,postprocessors:r});if({}.toString.call(e)==="[object Object]"&&!t.length)return I.bind(n)({...e,postprocessors:r});throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}function X(e){let t={},r=0,n;for(R.lastIndex=0;n=R.exec(e);){let{0:o,groups:{capGroupName:c,rDepth:s,gRName:i,gRDepth:l}}=n;if(o==="[")r++;else if(r)o==="]"&&r--;else if(c)t[c]=R.lastIndex;else if(s){let u=+s;Z(u);let E=e.slice(0,n.index),a=e.slice(R.lastIndex);return z(a),B(E,a,u)}else if(i){let u=+l;Z(u);let E=`Recursion via \\g<${i}> must be within the referenced group`;if(!Object.hasOwn(t,i))throw new Error(E);let a=me(e,t[i]);if(!_(a,Y,g.DEFAULT))throw new Error(E);let m=e.slice(t[i],n.index),d=a.slice(m.length+o.length);return z(d),e.slice(0,t[i])+B(m,d,u)+e.slice(t[i]+a.length)}}return e}var Y=String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`,ee=String.raw`\(\?R=(?<rDepth>\d+)\)|${Y}`,R=new RegExp(String.raw`\(\?<(?![=!])(?<capGroupName>[^>]+)>|${ee}|\\?.`,"gsu");function Z(e){if(e<2||e>100)throw new Error(`Max depth must be between 2 and 100; used ${e}`)}function z(e){if(_(e,ee,g.DEFAULT))throw new Error("Cannot use recursion more than once in a pattern")}function me(e,t){let r=/(?<groupStart>\(\?[:=!<>A-Za-z\-])|\\?./gsu;r.lastIndex=t;let n=e.length,o=0,c=1,s;for(;s=r.exec(e);){let{0:i,groups:{groupStart:l}}=s;if(i==="[")o++;else if(o)i==="]"&&o--;else if(l)c++;else if(i===")"&&(c--,!c)){n=s.index;break}}return e.slice(t,n)}function B(e,t,r){let n=r-1;return`${e}${J(`(?:${e}`,n)}(?:)${J(`${t})`,n,"backward")}${t}`}function J(e,t,r="forward"){let o=s=>r==="backward"?t-s+2-1:s+2,c="";for(let s=0;s<t;s++){let i=o(s);c+=A(e,String.raw`\(\?<(?<captureName>[^>]+)>|\\k<(?<backref>[^>]+)>`,({groups:{captureName:l,backref:u}})=>l?`(?<${l}$r${i}>`:`\\k<${u}$r${i}>`,g.DEFAULT)}return c}return ce(Ae);})(); | ||
`.replace(/\s+/g,""),"gsu");function Q(e){let t=!1,n;for(let{0:r,groups:s}of e.matchAll(Se)){if(s.pStrProp||s.qStrProp||r==="["&&t)return!0;if(["-","--","&&"].includes(r))t=!1;else if(!["[","]"].includes(r)){if(t||n==="]")return!0;t=!0}n=r}return!1}function H(e,t,n){let r={raw:[]},s=[],a={};return e.raw.forEach((o,u)=>{let c=n(o,{...a,lastPos:0});if(r.raw.push(c.transformed),a=c.runningContext,u<e.raw.length-1){let l=t[u];if(l instanceof L){let f=n(l,{...a,lastPos:0});s.push(V(f.transformed)),a=f.runningContext}else s.push(l)}}),{template:r,values:s}}function ee(e){if(!h(e,String.raw`\(\?>`,E.DEFAULT))return e;let t=new RegExp(String.raw`(?<noncapturingStart>${$})|(?<capturingStart>\((?:\?<[^>]+>)?)|(?<backrefNum>\\[1-9]\d*)|\\?.`,"gsu"),n="(?>",r="(?:(?=(",s=0,a=0,o=NaN,u;do{u=!1;let c=0,l=0,f=!1,i;for(t.lastIndex=Number.isNaN(o)?0:o+r.length;i=t.exec(e);){let{0:m,index:d,groups:{backrefNum:_,capturingStart:R,noncapturingStart:C}}=i;if(m==="[")c++;else if(c)m==="]"&&c--;else if(m===n&&!f)o=d,f=!0;else if(f&&C)l++;else if(R)f&&l++,s++;else if(m===")"&&f){if(!l){a++,e=`${e.slice(0,o)}${r}${e.slice(o+n.length,d)}))\\k<$$${a+s}>)${e.slice(d+1)}`,u=!0;break}l--}else if(_)throw new Error(`Invalid decimal escape "${m}" in interpolated regex; cannot be used with atomic group`)}}while(u);return e=A(e,String.raw`\\k<\$\$(?<backrefNum>\d+)>`,({groups:{backrefNum:c}})=>`\\${c}`,E.DEFAULT),e}var Te=new RegExp(String.raw` | ||
${$} | ||
| \(\?< | ||
| (?<backrefNum> \\[1-9]\d* ) | ||
| \\? . | ||
`.replace(/\s+/g,""),"gsu");function te(e,t){e=String(e);let n="",r="";for(let{0:s,groups:{backrefNum:a}}of e.matchAll(Te)){n+=s,t=U(n,t);let{regexContext:o}=t;if(o===p.DEFAULT)if(s==="(")r+="(?:";else{if(a)throw new Error(`Invalid decimal escape "${s}" with implicit flag n; replace with named backreference`);r+=s}else r+=s}return{transformed:r,runningContext:t}}var ne=/^\s$/,Le=/^\\[\s#]$/,re=/^[ \t]$/,_e=/^\\[ \t]$/,Ce=new RegExp(String.raw` | ||
\\ (?: | ||
[gk] < | ||
| [pPu] \{ | ||
| c [A-Za-z] | ||
| u [A-Fa-f\d]{4} | ||
| x [A-Fa-f\d]{2} | ||
| 0 \d+ | ||
) | ||
| \[\^ | ||
| ${$} | ||
| \(\?< | ||
| (?<dp> [${k}] ) \k<dp> | ||
| -- | ||
| \\? . | ||
`.replace(/\s+/g,""),"gsu");function oe(e,t){e=String(e);let n=!1,r=!1,s=!1,a="",o="",u="",c="",l=!1,f=(i,{prefix:m=!0,postfix:d=!1}={})=>(i=(l&&m?"(?:)":"")+i+(d?"(?:)":""),l=!1,i);for(let[i]of e.matchAll(Ce)){if(s){i===` | ||
`&&(s=!1,l=!0);continue}if(n){if(ne.test(i))continue;n=!1,l=!0}else if(r){if(re.test(i))continue;r=!1}a+=i,t=U(a,t);let{regexContext:m,charClassContext:d}=t;if(i==="-"&&m===p.CHAR_CLASS&&c===g.RANGE)throw new Error("Invalid unescaped hyphen as the end value for a range");if(m===p.DEFAULT&&/^(?:[?*+]|\?\?)$/.test(i)||m===p.INTERVAL_QUANTIFIER&&i==="{")o+=f(i,{prefix:!1,postfix:u==="("&&i==="?"});else if(m===p.DEFAULT)ne.test(i)?n=!0:i.startsWith("#")?s=!0:Le.test(i)?o+=f(i[1],{prefix:!1}):o+=f(i);else if(m===p.CHAR_CLASS&&i!=="["&&i!=="[^")if(re.test(i)&&(d===g.DEFAULT||d===g.RANGE||d===g.Q_TOKEN))r=!0;else{if(d===g.INVALID_INCOMPLETE_TOKEN)throw new Error(`Invalid incomplete token in character class: "${i}"`);_e.test(i)&&(d===g.DEFAULT||d===g.Q_TOKEN)?o+=f(i[1],{prefix:!1}):d===g.DEFAULT?o+=f(G(D(i))):o+=f(i)}else o+=f(i);n||r||s||(u=i,c=d)}return{transformed:o,runningContext:t}}function se(e){let t=String.raw`\(\?:\)`;return e=A(e,`(?:${t}){2,}`,"(?:)",E.DEFAULT),e=A(e,String.raw`^${t}(?![?*+{])|${t}$|${t}(?=[()|$\\])|(?<=[()|>^]|\(\?(?:[:=!]|<[=!]))${t}`,"",E.DEFAULT),e}var ie=String.raw`\\g<(?<subroutineName>[^>&]+)>`,q=String.raw`\((?:(?!\?)|\?<(?![=!])(?<captureName>[^>]+)>)`,O=new RegExp(String.raw` | ||
${ie} | ||
| (?<capturingStart> ${q} ) | ||
| \\ (?<backrefNum> [1-9]\d* ) | ||
| \\k< (?<backrefName> [^>]+ ) > | ||
| \\? . | ||
`.replace(/\s+/g,""),"gsu");function ae(e){if(!h(e,"\\\\g<",E.DEFAULT))return e;let t=$e(e),n=[0],r=Re(e),s=0,a=0,o=new Map,u=[],c=0,l=e,f;for(O.lastIndex=0;f=O.exec(l);){let{0:i,index:m,groups:{subroutineName:d,capturingStart:_,backrefNum:R,backrefName:C}}=f;if(i==="[")c++;else if(c)i==="]"&&c--;else{let N=o.size?o.get(u.at(-1)):null;if(d){if(!t.has(d))throw new Error(`Invalid named capture referenced by subroutine ${i}`);if(o.has(d))throw new Error(`Subroutine ${i} followed a recursive reference`);let w=t.get(d)[0].contents,S=b(w)+1;a+=S;let T=`(${w})`;o.set(d,{contents:w,unclosedGroupCount:Ue(T),numCaptures:S}),u.push(d),l=K(l,m,i,T),O.lastIndex-=i.length}else if(_)o.size?(i!=="("&&(l=K(l,m,i,"("),O.lastIndex-=i.length),n.push(n.at(-1)+N.numCaptures)):(s++,n.length===s&&n.push(n.at(-1)));else if(R){let w=+R,S;if(o.size?w>r&&(S=s+a-r-N.numCaptures):S=n[w],S){let T=`\\${w+S}`;l=K(l,m,i,T),O.lastIndex+=T.length-i.length}}else if(C){if(o.size){let w=!1;for(let S of u){let T=o.get(S).contents;if(h(T,String.raw`\(\?<${C}>`,E.DEFAULT)){w=!0;break}}if(w){let T=`\\${Ie(e,C)+1}`;l=K(l,m,i,T),O.lastIndex-=i.length}}}else i===")"&&o.size&&(N.unclosedGroupCount--,N.unclosedGroupCount||o.delete(u.pop()))}}return l}function Re(e){let t=new Set;I(e,ie,({groups:{subroutineName:a}})=>{t.add(a)},E.DEFAULT);let n=0,r=0,s;for(;s=F(e,q,r,E.DEFAULT);){let{0:a,index:o,groups:{captureName:u}}=s;if(t.has(u))break;n++,r=o+a.length}return n}function Ie(e,t){let n=0,r=0,s;for(;s=F(e,q,r,E.DEFAULT);){let{0:a,index:o,groups:{captureName:u}}=s;if(u===t)break;n++,r=o+a.length}return n}function K(e,t,n,r){return e.slice(0,t)+r+e.slice(t+n.length)}function $e(e){let t=new Map;return I(e,String.raw`\(\?<(?<captureName>[^>]+)>`,({0:n,index:r,groups:{captureName:s}})=>{t.has(s)||t.set(s,[]);let a=r+n.length,o=P(e,a);t.get(s).push({contents:o,endPos:a+o.length})},E.DEFAULT),t}function Ue(e){let t=0;return I(e,String.raw`\(`,()=>t++,E.DEFAULT),t}var z=function(e,...t){let n=this instanceof Function?this:RegExp;if(Array.isArray(e?.raw))return B(n,{flags:""},e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return B.bind(null,n,{flags:e});if({}.toString.call(e)==="[object Object]"&&!t.length)return B.bind(null,n,e);throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)};function B(e,t,n,...r){let{flags:s="",postprocessors:a=[],__flagN:o=!0,__flagX:u=!0,__rake:c=!0}=t;if(/[vu]/.test(s))throw new Error("Flags v/u cannot be explicitly added since v is always enabled");u&&({template:n,values:r}=H(n,r,oe)),o&&({template:n,values:r}=H(n,r,te));let l=0,f="",i={};return n.raw.forEach((m,d)=>{let _=n.raw[d]||n.raw[d+1];l+=b(m),f+=D(m,E.CHAR_CLASS),i=U(f,i);let{regexContext:R,charClassContext:C}=i;if(d<n.raw.length-1){let N=r[d];f+=Oe(N,s,R,C,_,l),(N instanceof RegExp||N instanceof L)&&(l+=b(N.source||String(N)))}}),[...a,ee,ae,...c?[se]:[]].forEach(m=>f=m(f)),new e(f,`v${s}`)}function Oe(e,t,n,r,s,a){if(e instanceof RegExp&&n!==p.DEFAULT)throw new Error("Cannot interpolate a RegExp at this position because the syntax context does not match");if(n===p.INVALID_INCOMPLETE_TOKEN||r===g.INVALID_INCOMPLETE_TOKEN)throw new Error("Interpolation preceded by invalid incomplete token");let o=e instanceof L,u;if(!(e instanceof RegExp)){e=String(e),o||(u=j(e,n===p.CHAR_CLASS?E.CHAR_CLASS:E.DEFAULT));let c=v(u||e,n,r);if(c)throw new Error(`Unescaped stray "${c}" in the interpolated value would have side effects outside it`)}if(n===p.ENCLOSED_TOKEN||n===p.INTERVAL_QUANTIFIER||n===p.GROUP_NAME||r===g.ENCLOSED_TOKEN||r===g.Q_TOKEN)return o?e:u;if(n===p.CHAR_CLASS){if(o){if(h(e,"^-|^&&|-$|&&$"))throw new Error("In character classes, a partial cannot use a range or set operator at its boundary; move the operation into the partial or the operator outside of it");let c=X(G(e));return Q(e)?`[${c}]`:D(c)}return Q(u)?`[${u}]`:u}if(e instanceof RegExp){let c=xe(e,t),l=Y(c.value,a);return c.usedModifier?l:`(?:${l})`}return o?`(?:${e})`:s?`(?:${u})`:u}function xe(e,t){let n={i:null,m:null,s:null},r="\\n\\r\\u2028\\u2029",s=e.source;if(e.ignoreCase!==t.includes("i"))if(x)n.i=e.ignoreCase;else throw new Error("Pattern modifiers not supported, so the value of flag i on the interpolated RegExp must match the outer regex");if(e.dotAll!==t.includes("s")&&(x?n.s=e.dotAll:s=A(s,"\\.",e.dotAll?"[^]":`[^${r}]`,E.DEFAULT)),e.multiline!==t.includes("m")&&(x?n.m=e.multiline:(s=A(s,"\\^",e.multiline?`(?<=^|[${r}])`:"(?<![^])",E.DEFAULT),s=A(s,"\\$",e.multiline?`(?=$|[${r}])`:"(?![^])",E.DEFAULT))),x){let a=Object.keys(n),o=a.filter(c=>n[c]===!0).join(""),u=a.filter(c=>n[c]===!1).join("");if(u&&(o+=`-${u}`),o)return{value:`(?${o}:${s})`,usedModifier:!0}}return{value:s}}function De(e,...t){let n=(e?.postprocessors||[]).concat(Ee),r=this instanceof Function?z.bind(this):z;if(Array.isArray(e?.raw))return r({flags:"",postprocessors:n})(e,...t);if((typeof e=="string"||e===void 0)&&!t.length)return r({flags:e,postprocessors:n});if({}.toString.call(e)==="[object Object]"&&!t.length)return r({...e,postprocessors:n});throw new Error(`Unexpected arguments: ${JSON.stringify([e,...t])}`)}var pe=String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`,W=String.raw`\(\?R=(?<rDepth>\d+)\)|${pe}`,M=new RegExp(String.raw`\(\?<(?![=!])(?<capturingGroupName>[^>]+)>|${W}|\\?.`,"gsu");function Ee(e){if(!h(e,W,E.DEFAULT))return e;if(h(e,String.raw`\\[1-9]`,E.DEFAULT))throw new Error("Invalid decimal escape in interpolated regex; cannot be used with recursion");let t=new Map,n=0,r;for(M.lastIndex=0;r=M.exec(e);){let{0:s,groups:{capturingGroupName:a,rDepth:o,gRName:u,gRDepth:c}}=r;if(s==="[")n++;else if(n)s==="]"&&n--;else if(a)t.set(a,M.lastIndex);else if(o){let l=+o;ue(l);let f=e.slice(0,r.index),i=e.slice(M.lastIndex);return ce(i),le(f,i,l)}else if(u){let l=+c;ue(l);let f=`Recursion via \\g<${u}&R=${c}> must be used within the referenced group`;if(!t.has(u))throw new Error(f);let i=t.get(u),m=P(e,i);if(!h(m,pe,E.DEFAULT))throw new Error(f);let d=e.slice(i,r.index),_=m.slice(d.length+s.length);return ce(_),e.slice(0,i)+le(d,_,l)+e.slice(i+m.length)}}throw new Error("Unexpected error; recursion was not processed")}function ue(e){if(e<2||e>100)throw new Error(`Max depth must be between 2 and 100; used ${e}`)}function ce(e){if(h(e,W,E.DEFAULT))throw new Error("Cannot use recursion more than once in a pattern")}function le(e,t,n){let r=n-1;return`${e}${fe(`(?:${e}`,r)}(?:)${fe(`${t})`,r,"backward")}${t}`}function fe(e,t,n="forward"){let s=o=>n==="backward"?t-o+2-1:o+2,a="";for(let o=0;o<t;o++){let u=s(o);a+=A(e,String.raw`\(\?<(?<captureName>[^>]+)>|\\k<(?<backref>[^>]+)>`,({groups:{captureName:c,backref:l}})=>{let f=`_$${u}`;return c?`(?<${c}${f}>`:`\\k<${l}${f}>`},E.DEFAULT)}return a}return Ne(be);})(); | ||
/*! Bundled license information: | ||
regex/src/index.js: | ||
(*! regex 1.1.1; Steven Levithan; MIT License *) | ||
(*! regex 2.0.0; Steven Levithan; MIT License *) | ||
*/ | ||
//# sourceMappingURL=regex-recursion.min.js.map |
{ | ||
"name": "regex-recursion", | ||
"version": "1.0.0", | ||
"description": "Recursive matching extension for tag regex", | ||
"version": "2.0.0", | ||
"description": "Recursive matching extension for the regex package", | ||
"author": "Steven Levithan", | ||
"license": "MIT", | ||
"type": "module", | ||
"exports": "./src/index.js", | ||
"type": "module", | ||
"scripts": { | ||
@@ -13,4 +15,6 @@ "build": "esbuild src/index.js --bundle --minify --sourcemap --outfile=dist/regex-recursion.min.js --global-name=Regex.ext", | ||
}, | ||
"author": "Steven Levithan", | ||
"license": "MIT", | ||
"files": [ | ||
"src", | ||
"dist" | ||
], | ||
"repository": { | ||
@@ -26,9 +30,9 @@ "type": "git", | ||
"dependencies": { | ||
"regex": "^1.1.1", | ||
"regex-utilities": "^1.0.0" | ||
"regex": "^2.0.0", | ||
"regex-utilities": "^2.0.0" | ||
}, | ||
"devDependencies": { | ||
"esbuild": "^0.21.4", | ||
"esbuild": "^0.21.5", | ||
"jasmine": "^5.1.0" | ||
} | ||
} |
# `regex-recursion` | ||
This is an extension for the [`regex`](https://github.com/slevithan/regex) template tag that adds support for recursive patterns up to a specified max depth *N*, where *N* must be 2–100. | ||
This is an extension for the [`regex`](https://github.com/slevithan/regex) package that adds support for matching recursive patterns up to a specified max depth *N*, where *N* must be 2–100. | ||
You can add recursion to a regex pattern via one of the following: | ||
Recursive matching is added to a regex pattern via one of the following: | ||
- `(?R=N)` — Recursively match the entire pattern at this position. | ||
- `\g<name&R=N>` — Recursively match the contents of group *name* at this position. The `\g` subroutine must be used within the referenced group. | ||
- `\g<name&R=N>` — Recursively match the contents of group *name* at this position. The `\g` subroutine must be called within the referenced group. | ||
Any backreferences are unique per depth level. | ||
Recursive matching supports named captures and backreferences, and makes them independent per depth level. So e.g. `groups.name` on a `RegExp` match array is the value captured by group `name` at the top level of the recursion stack. | ||
@@ -24,7 +24,8 @@ ## Examples | ||
Match an equal number of two different patterns, as the whole string: | ||
Match an equal number of two different patterns, as the entire string: | ||
```js | ||
const re = rregex` | ||
^ | ||
import {rregex} from 'regex-recursion'; | ||
const re = rregex`^ | ||
(?<balanced> | ||
@@ -36,4 +37,3 @@ a | ||
) | ||
$ | ||
`; | ||
$`; | ||
re.test('aaabbb'); // → true | ||
@@ -46,5 +46,7 @@ re.test('aaabb'); // → false | ||
```js | ||
// Matches all balanced parentheses up to depth 10 | ||
import {rregex} from 'regex-recursion'; | ||
// Matches all balanced parentheses up to depth 50 | ||
const parens = rregex('g')`\( | ||
( [^\(\)] | (?R=10) )* | ||
( [^\(\)] | (?R=50) )* | ||
\)`; | ||
@@ -54,2 +56,13 @@ | ||
// → ['(balanced ((parens)))', '()', '((a))', '(b)'] | ||
// ---------- | ||
// Here's an alternative that matches the same strings | ||
const parens = rregex('g')`\( | ||
( (?> [^\(\)]+ ) | (?R=50) )* | ||
\)`; | ||
// This matches sequences of non-parens in one step with the `+` quantifier, | ||
// and avoids backtracking into these sequences by using an atomic group | ||
// `(?>…)`. Given the nested quantifier, the atomic group is important here. | ||
// It avoids runaway backtracking when matching long strings with unbalanced | ||
// parens. Atomic groups are provided by the base `regex` package | ||
``` | ||
@@ -60,5 +73,7 @@ | ||
```js | ||
const palindromes = rregex('gi')`(?<char>\w) ((?R=10)|\w?) \k<char>`; | ||
// Palindrome maxlength: 21 = 2 chars (left+right) × depth 10 + 1 in the center | ||
import {rregex} from 'regex-recursion'; | ||
const palindromes = rregex('gi')`(?<char>\w) ((?R=15)|\w?) \k<char>`; | ||
// Palindrome max length: 31 = 2 chars (left + right) × depth 15 + 1 in center | ||
'Racecar, ABBA, and redivided'.match(palindromes); | ||
@@ -71,12 +86,12 @@ // → ['Racecar', 'ABBA', 'edivide'] | ||
```js | ||
const palindromeWords = rregex('gi')` | ||
\b | ||
import {rregex} from 'regex-recursion'; | ||
const palindromeWords = rregex('gi')`\b | ||
(?<palindrome> | ||
(?<char>\w) | ||
(?<char> \w ) | ||
# Recurse, or match a lone unbalanced char in the center | ||
( \g<palindrome&R=10> | \w? ) | ||
( \g<palindrome&R=15> | \w? ) | ||
\k<char> | ||
) | ||
\b | ||
`; | ||
\b`; | ||
@@ -89,3 +104,3 @@ 'Racecar, ABBA, and redivided'.match(palindromeWords); | ||
Template tag `rregex` is sugar for applying recursion support via a postprocessor with tag `regex`. You can also add recursion support the verbose way: | ||
Template tag `rregex` is sugar for using the base `regex` tag and adding recursion support via a postprocessor. You can also add recursion support the verbose way: | ||
@@ -96,3 +111,3 @@ ```js | ||
regex({flags: 'g', postprocessors: [recursion]})`a(?R=2)?b`; | ||
regex({flags: 'i', postprocessors: [recursion]})`a(?R=2)?b`; | ||
``` | ||
@@ -102,3 +117,3 @@ | ||
```bash | ||
```sh | ||
npm install regex-recursion | ||
@@ -116,5 +131,4 @@ ``` | ||
<script> | ||
// Recommended | ||
const {rregex} = Regex.ext; | ||
</script> | ||
``` |
117
src/index.js
import {regex} from 'regex'; | ||
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities'; | ||
import {Context, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities'; | ||
@@ -7,12 +7,12 @@ export function rregex(first, ...values) { | ||
// Allow binding to other constructors | ||
const constructor = this instanceof Function ? this : RegExp; | ||
const tag = this instanceof Function ? regex.bind(this) : regex; | ||
// Given a template | ||
if (Array.isArray(first?.raw)) { | ||
return regex.bind(constructor)({flags: '', postprocessors})(first, ...values); | ||
return tag({flags: '', postprocessors})(first, ...values); | ||
// Given flags | ||
} else if ((typeof first === 'string' || first === undefined) && !values.length) { | ||
return regex.bind(constructor)({flags: first, postprocessors}); | ||
return tag({flags: first, postprocessors}); | ||
// Given an options object | ||
} else if ({}.toString.call(first) === '[object Object]' && !values.length) { | ||
return regex.bind(constructor)({...first, postprocessors}); | ||
return tag({...first, postprocessors}); | ||
} | ||
@@ -22,4 +22,24 @@ throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...values])}`); | ||
const gRToken = String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`; | ||
const recursiveToken = String.raw`\(\?R=(?<rDepth>\d+)\)|${gRToken}`; | ||
const token = new RegExp(String.raw`\(\?<(?![=!])(?<capturingGroupName>[^>]+)>|${recursiveToken}|\\?.`, 'gsu'); | ||
/** | ||
@param {string} pattern | ||
@returns {string} | ||
*/ | ||
export function recursion(pattern) { | ||
const groupContentsStartPos = {}; | ||
if (!hasUnescaped(pattern, recursiveToken, Context.DEFAULT)) { | ||
return pattern; | ||
} | ||
if (hasUnescaped(pattern, String.raw`\\[1-9]`, Context.DEFAULT)) { | ||
// Could allow this with extra effort but it's probably not worth it. To trigger this, the | ||
// regex must contain both recursion and an interpolated regex with a numbered backref (since | ||
// numbered backrefs outside regex interpolation are prevented by implicit flag n). Note that | ||
// some of `regex`'s built-in features (atomic groups and subroutines) can add numbered | ||
// backrefs. However, those work fine with recursion because postprocessors from extensions | ||
// (like `regex-recursion`) run before built-in postprocessors | ||
throw new Error(`Invalid decimal escape in interpolated regex; cannot be used with recursion`); | ||
} | ||
const groupContentsStartPos = new Map(); | ||
let numCharClassesOpen = 0; | ||
@@ -29,8 +49,9 @@ let match; | ||
while (match = token.exec(pattern)) { | ||
const {0: m, groups: {capGroupName, rDepth, gRName, gRDepth}} = match; | ||
const {0: m, groups: {capturingGroupName, rDepth, gRName, gRDepth}} = match; | ||
if (m === '[') { | ||
numCharClassesOpen++; | ||
} else if (!numCharClassesOpen) { | ||
if (capGroupName) { | ||
groupContentsStartPos[capGroupName] = token.lastIndex; | ||
if (capturingGroupName) { | ||
groupContentsStartPos.set(capturingGroupName, token.lastIndex); | ||
// (?R=N) | ||
@@ -48,19 +69,21 @@ } else if (rDepth) { | ||
assertMaxInBounds(maxDepth); | ||
const outsideOwnGroupMsg = `Recursion via \\g<${gRName}> must be within the referenced group`; | ||
// Appears before/outside group | ||
if (!Object.hasOwn(groupContentsStartPos, gRName)) { | ||
const outsideOwnGroupMsg = `Recursion via \\g<${gRName}&R=${gRDepth}> must be used within the referenced group`; | ||
// Appears before/outside the referenced group | ||
if (!groupContentsStartPos.has(gRName)) { | ||
throw new Error(outsideOwnGroupMsg); | ||
} | ||
const recursiveGroupContents = getContentsOfGroup(pattern, groupContentsStartPos[gRName]); | ||
// Appears after/outside group | ||
if (!hasUnescaped(recursiveGroupContents, gToken, Context.DEFAULT)) { | ||
const startPos = groupContentsStartPos.get(gRName); | ||
const recursiveGroupContents = getGroupContents(pattern, startPos); | ||
// Appears after/outside the referenced group | ||
if (!hasUnescaped(recursiveGroupContents, gRToken, Context.DEFAULT)) { | ||
throw new Error(outsideOwnGroupMsg) | ||
} | ||
const pre = pattern.slice(groupContentsStartPos[gRName], match.index); | ||
const pre = pattern.slice(startPos, match.index); | ||
const post = recursiveGroupContents.slice(pre.length + m.length); | ||
assertNoFollowingRecursion(post); | ||
return pattern.slice(0, groupContentsStartPos[gRName]) + | ||
return pattern.slice(0, startPos) + | ||
makeRecursive(pre, post, maxDepth) + | ||
pattern.slice(groupContentsStartPos[gRName] + recursiveGroupContents.length); | ||
pattern.slice(startPos + recursiveGroupContents.length); | ||
} | ||
} else if (m === ']') { | ||
@@ -70,10 +93,8 @@ numCharClassesOpen--; | ||
} | ||
// No change | ||
return pattern; | ||
throw new Error('Unexpected error; recursion was not processed'); | ||
} | ||
const gToken = String.raw`\\g<(?<gRName>[^>&]+)&R=(?<gRDepth>\d+)>`; | ||
const recursiveToken = String.raw`\(\?R=(?<rDepth>\d+)\)|${gToken}`; | ||
const token = new RegExp(String.raw`\(\?<(?![=!])(?<capGroupName>[^>]+)>|${recursiveToken}|\\?.`, 'gsu'); | ||
/** | ||
@param {number} max | ||
*/ | ||
function assertMaxInBounds(max) { | ||
@@ -91,31 +112,8 @@ if (max < 2 || max > 100) { | ||
function getContentsOfGroup(pattern, contentsStartPos) { | ||
const token = /(?<groupStart>\(\?[:=!<>A-Za-z\-])|\\?./gsu; | ||
token.lastIndex = contentsStartPos; | ||
let contentsEndPos = pattern.length; | ||
let numCharClassesOpen = 0; | ||
// Starting search within an open group, after the group's opening | ||
let numGroupsOpen = 1; | ||
let match; | ||
while (match = token.exec(pattern)) { | ||
const {0: m, groups: {groupStart}} = match; | ||
if (m === '[') { | ||
numCharClassesOpen++; | ||
} else if (!numCharClassesOpen) { | ||
if (groupStart) { | ||
numGroupsOpen++; | ||
} else if (m === ')') { | ||
numGroupsOpen--; | ||
if (!numGroupsOpen) { | ||
contentsEndPos = match.index; | ||
break; | ||
} | ||
} | ||
} else if (m === ']') { | ||
numCharClassesOpen--; | ||
} | ||
} | ||
return pattern.slice(contentsStartPos, contentsEndPos); | ||
} | ||
/** | ||
@param {string} pre | ||
@param {string} post | ||
@param {number} maxDepth | ||
@returns {string} | ||
*/ | ||
function makeRecursive(pre, post, maxDepth) { | ||
@@ -128,8 +126,14 @@ const reps = maxDepth - 1; | ||
/** | ||
@param {string} pattern | ||
@param {number} reps | ||
@param {'forward' | 'backward'} [direction] | ||
@returns {string} | ||
*/ | ||
function repeatWithDepth(pattern, reps, direction = 'forward') { | ||
const startNum = 2; | ||
const value = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum; | ||
const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum; | ||
let result = ''; | ||
for (let i = 0; i < reps; i++) { | ||
const captureNum = value(i); | ||
const captureNum = depthNum(i); | ||
result += replaceUnescaped( | ||
@@ -139,3 +143,4 @@ pattern, | ||
({groups: {captureName, backref}}) => { | ||
return captureName ? `(?<${captureName}$r${captureNum}>` : `\\k<${backref}$r${captureNum}>`; | ||
const suffix = `_$${captureNum}`; | ||
return captureName ? `(?<${captureName}${suffix}>` : `\\k<${backref}${suffix}>`; | ||
}, | ||
@@ -142,0 +147,0 @@ Context.DEFAULT |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
103096
125
6
244
1
+ Addedregex@2.1.0(transitive)
+ Addedregex-utilities@2.3.0(transitive)
- Removedregex@1.1.1(transitive)
- Removedregex-utilities@1.1.1(transitive)
Updatedregex@^2.0.0
Updatedregex-utilities@^2.0.0