url-sanitizer
Advanced tools
Comparing version 0.4.1 to 0.4.2
@@ -594,2 +594,10 @@ // src/mjs/common.js | ||
var HEX = 16; | ||
var REG_CHARS = /[<>"'\s]/g; | ||
var REG_DATA_URL = /data:[^,]*,[^"]+/g; | ||
var REG_DATA_URL_BASE64 = /data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/; | ||
var REG_DATA_URL_HEADER = /data:[^,]*,/; | ||
var REG_NUM_REF = /&#(x(?:00)?[\dA-F]{2}|0?\d{1,3});?/ig; | ||
var REG_SCHEME = /^[a-z][a-z0-9+\-.]*$/; | ||
var REG_SCHEME_CUSTOM = /^(?:ext|web)\+[a-z]+$/; | ||
var REG_SCRIPT = /(?:java|vb)script/; | ||
var getUrlEncodedString = (str) => { | ||
@@ -647,2 +655,34 @@ if (!isString(str)) { | ||
}; | ||
var parseUrlEncodedNumCharRef = (str) => { | ||
if (!isString(str)) { | ||
throw new TypeError(`Expected String but got ${getType(str)}.`); | ||
} | ||
let res = decodeURIComponent(str); | ||
if (/&#/.test(res)) { | ||
const textChars = new Set(text_chars_default); | ||
const items = [...res.matchAll(REG_NUM_REF)].reverse(); | ||
for (const item of items) { | ||
const [num1, num2] = item; | ||
let num; | ||
if (/^[\d]+$/.test(num2)) { | ||
num = parseInt(num2); | ||
} else if (num2.startsWith("x")) { | ||
num = parseInt(`0${num2}`, HEX); | ||
} | ||
if (Number.isInteger(num)) { | ||
const { index } = item; | ||
const [preNum, postNum] = [ | ||
res.substring(0, index), | ||
res.substring(index + num1.length) | ||
]; | ||
if (textChars.has(num)) { | ||
res = `${preNum}${String.fromCharCode(num)}${postNum}`; | ||
} else if (num < HEX * HEX) { | ||
res = `${preNum}${postNum}`; | ||
} | ||
} | ||
} | ||
} | ||
return res; | ||
}; | ||
var URISchemes = class { | ||
@@ -687,3 +727,3 @@ /* private fields */ | ||
throw new TypeError(`Expected String but got ${getType(scheme)}.`); | ||
} else if (/(?:java|vb)script/.test(scheme) || !/^[a-z][a-z0-9+\-.]*$/.test(scheme)) { | ||
} else if (REG_SCRIPT.test(scheme) || !REG_SCHEME.test(scheme)) { | ||
throw new Error(`Invalid scheme: ${scheme}`); | ||
@@ -716,3 +756,3 @@ } | ||
const schemeParts = scheme.split("+"); | ||
res = /^(?:ext|web)\+[a-z]+$/.test(scheme) || schemeParts.every((s) => this.#schemes.has(s)); | ||
res = !REG_SCRIPT.test(scheme) && REG_SCHEME_CUSTOM.test(scheme) || schemeParts.every((s) => this.#schemes.has(s)); | ||
} catch (e) { | ||
@@ -756,6 +796,11 @@ res = false; | ||
const { allow, deny } = opt ?? {}; | ||
const { href, pathname, protocol } = new URL(url); | ||
const { hash, href, pathname, protocol, search } = new URL(url); | ||
const scheme = protocol.replace(/:$/, ""); | ||
const schemeParts = scheme.split("+"); | ||
const schemeMap = /* @__PURE__ */ new Map([["data", false], ["file", false]]); | ||
const schemeMap = /* @__PURE__ */ new Map([ | ||
["data", false], | ||
["file", false], | ||
["javascrpt", false], | ||
["vbscript", false] | ||
]); | ||
if (Array.isArray(allow) && allow.length) { | ||
@@ -766,5 +811,3 @@ const items = Object.values(allow); | ||
item = item.trim(); | ||
if (/(?:java|vb)script/.test(item)) { | ||
schemeMap.set(item, false); | ||
} else if (item) { | ||
if (!REG_SCRIPT.test(item)) { | ||
schemeMap.set(item, true); | ||
@@ -795,3 +838,2 @@ } | ||
const [amp, lt, gt, quot, apos] = ["&", "<", ">", '"', "'"].map(getUrlEncodedString); | ||
const regChars = /[<>"']/g; | ||
const regAmp = new RegExp(amp, "g"); | ||
@@ -802,32 +844,43 @@ const regEncodedChars = new RegExp(`(${lt}|${gt}|${quot}|${apos})`, "g"); | ||
if (schemeParts.includes("data")) { | ||
const [header, data] = pathname.split(","); | ||
const [header, ...body] = pathname.split(","); | ||
const data = `${body.join(",")}${search}${hash}`; | ||
const mediaType = header.split(";"); | ||
const isBase64 = mediaType.pop() === "base64"; | ||
if (isBase64) { | ||
let parsedData = parseBase64(data); | ||
if (parsedData !== data) { | ||
if (/data:[^,]*,/.test(parsedData)) { | ||
const regDataUrl = /data:[^,]*,[^"]+/g; | ||
const regBase64DataUrl = /data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/; | ||
const matchedDataUrls = parsedData.matchAll(regDataUrl); | ||
const items = [...matchedDataUrls].reverse(); | ||
for (const item of items) { | ||
let [dataUrl] = item; | ||
if (regBase64DataUrl.test(dataUrl)) { | ||
[dataUrl] = regBase64DataUrl.exec(dataUrl); | ||
} | ||
this.#nest++; | ||
this.#recurse.add(dataUrl); | ||
const parsedDataUrl = this.sanitize(dataUrl, { | ||
allow: ["data"] | ||
}); | ||
if (parsedDataUrl) { | ||
const { index } = item; | ||
const [beforeDataUrl, afterDataUrl] = [ | ||
parsedData.substring(0, index), | ||
parsedData.substring(index + dataUrl.length) | ||
]; | ||
parsedData = `${beforeDataUrl}${parsedDataUrl}${afterDataUrl}`; | ||
} | ||
let parsedData = data; | ||
if (mediaType[mediaType.length - 1] === "base64") { | ||
mediaType.pop(); | ||
parsedData = parseBase64(data); | ||
} else { | ||
try { | ||
const decodedData = parseUrlEncodedNumCharRef(parsedData); | ||
const { protocol: dataScheme } = new URL(decodedData.trim()); | ||
const dataSchemeParts = dataScheme.replace(/:$/, "").split("+"); | ||
if (dataSchemeParts.some((s) => REG_SCRIPT.test(s))) { | ||
urlToSanitize = ""; | ||
} | ||
} catch (e) { | ||
} | ||
} | ||
const containsDataUrl = REG_DATA_URL_HEADER.test(parsedData); | ||
if (parsedData !== data || containsDataUrl) { | ||
if (containsDataUrl) { | ||
const matchedDataUrls = parsedData.matchAll(REG_DATA_URL); | ||
const items = [...matchedDataUrls].reverse(); | ||
for (const item of items) { | ||
let [dataUrl] = item; | ||
if (REG_DATA_URL_BASE64.test(dataUrl)) { | ||
[dataUrl] = REG_DATA_URL_BASE64.exec(dataUrl); | ||
} | ||
this.#nest++; | ||
this.#recurse.add(dataUrl); | ||
const parsedDataUrl = this.sanitize(dataUrl, { | ||
allow: ["data"] | ||
}); | ||
if (parsedDataUrl) { | ||
const { index } = item; | ||
const [preDataUrl, postDataUrl] = [ | ||
parsedData.substring(0, index), | ||
parsedData.substring(index + dataUrl.length) | ||
]; | ||
parsedData = `${preDataUrl}${parsedDataUrl}${postDataUrl}`; | ||
} | ||
} | ||
@@ -839,6 +892,8 @@ if (this.#recurse.has(url)) { | ||
} | ||
urlToSanitize = `${scheme}:${mediaType.join(";")},${parsedData}`; | ||
} else if (this.#recurse.has(url)) { | ||
this.#recurse.delete(url); | ||
} else { | ||
escapeHtml = true; | ||
} | ||
urlToSanitize = `${scheme}:${mediaType.join(";")},${parsedData}`; | ||
} else if (this.#recurse.has(url)) { | ||
@@ -852,5 +907,10 @@ this.#recurse.delete(url); | ||
} | ||
sanitizedUrl = urlToSanitize.replace(regChars, getUrlEncodedString).replace(regAmp, escapeUrlEncodedHtmlChars); | ||
if (escapeHtml) { | ||
sanitizedUrl = sanitizedUrl.replace(regEncodedChars, escapeUrlEncodedHtmlChars); | ||
if (urlToSanitize) { | ||
sanitizedUrl = urlToSanitize.replace(REG_CHARS, getUrlEncodedString).replace(regAmp, escapeUrlEncodedHtmlChars); | ||
if (escapeHtml) { | ||
sanitizedUrl = sanitizedUrl.replace(regEncodedChars, escapeUrlEncodedHtmlChars); | ||
this.#nest = 0; | ||
} | ||
} else { | ||
sanitizedUrl = urlToSanitize; | ||
this.#nest = 0; | ||
@@ -857,0 +917,0 @@ } |
@@ -1,2 +0,2 @@ | ||
var g=s=>Object.prototype.toString.call(s).slice(8,-1),c=s=>typeof s=="string"||s instanceof String;var A=[7,8,9,10,11,12,13,27,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255];var C=["aaa","aaas","about","acap","acct","acd","acr","adiumxtra","adt","afp","afs","aim","amss","android","appdata","apt","ar","ark","attachment","aw","barion","beshare","bitcoin","bitcoincash","blob","bolo","browserext","cabal","calculator","callto","cap","cast","casts","chrome","chrome-extension","cid","coap","coaps","com-eventbrite-attendee","content","content-type","crid","cstr","cvs","dab","dat","data","dav","diaspora","dict","did","dis","dlna-playcontainer","dlna-playsingle","dns","dntp","doi","dpp","drm","dtmi","dtn","dvb","dvx","dweb","ed2k","eid","elsi","embedded","ens","ethereum","example","facetime","feed","feedready","fido","file","finger","first-run-pen-experience","fish","fm","ftp","fuchsia-pkg","geo","gg","git","gitoid","gizmoproject","go","gopher","graph","gtalk","h323","ham","hcap","hcp","http","https","hxxp","hxxps","hydrazone","hyper","iax","icap","icon","im","imap","info","iotdisco","ipfs","ipn","ipns","ipp","ipps","irc","irc6","ircs","iris","iris.beep","iris.lwz","iris.xpc","iris.xpcs","isostore","itms","jabber","jar","jms","keyparc","lastfm","lbry","ldap","ldaps","leaptofrogans","lorawan","lpa","lvlt","magnet","mailto","maps","market","matrix","message","microsoft.windows.camera","microsoft.windows.camera.multipicker","microsoft.windows.camera.picker","mid","mms","mongodb","moz","moz-extension","ms-access","ms-appinstaller","ms-browser-extension","ms-calculator","ms-drive-to","ms-enrollment","ms-excel","ms-eyecontrolspeech","ms-gamebarservices","ms-gamingoverlay","ms-getoffice","ms-help","ms-infopath","ms-inputapp","ms-lockscreencomponent-config","ms-media-stream-id","ms-meetnow","ms-mixedrealitycapture","ms-mobileplans","ms-newsandinterests","ms-officeapp","ms-people","ms-powerpoint","ms-project","ms-publisher","ms-remotedesktop-launch","ms-restoretabcompanion","ms-screenclip","ms-screensketch","ms-search","ms-search-repair","ms-secondary-screen-controller","ms-secondary-screen-setup","ms-settings","ms-settings-airplanemode","ms-settings-bluetooth","ms-settings-camera","ms-settings-cellular","ms-settings-cloudstorage","ms-settings-connectabledevices","ms-settings-displays-topology","ms-settings-emailandaccounts","ms-settings-language","ms-settings-location","ms-settings-lock","ms-settings-nfctransactions","ms-settings-notifications","ms-settings-power","ms-settings-privacy","ms-settings-proximity","ms-settings-screenrotation","ms-settings-wifi","ms-settings-workplace","ms-spd","ms-stickers","ms-sttoverlay","ms-transit-to","ms-useractivityset","ms-virtualtouchpad","ms-visio","ms-walk-to","ms-whiteboard","ms-whiteboard-cmd","ms-word","msnim","msrp","msrps","mss","mt","mtqp","mumble","mupdate","mvn","news","nfs","ni","nih","nntp","notes","num","ocf","oid","onenote","onenote-cmd","opaquelocktoken","openpgp4fpr","otpauth","palm","paparazzi","payment","payto","pkcs11","platform","pop","pres","proxy","psyc","pttp","pwid","qb","query","quic-transport","redis","rediss","reload","res","resource","rmi","rsync","rtmfp","rtmp","rtsp","rtsps","rtspu","sarif","secondlife","secret-token","service","session","sftp","sgn","shc","sieve","simpleledger","simplex","sip","sips","skype","smb","smp","sms","smtp","snmp","soap.beep","soap.beeps","soldat","spiffe","spotify","ssb","ssh","starknet","steam","stun","stuns","submit","svn","swh","swid","swidpath","tag","taler","teamspeak","tel","teliaeid","telnet","tftp","things","thismessage","tip","tn3270","tool","turn","turns","tv","udp","unreal","urn","ut2004","uuid-in-package","v-event","vemmi","ventrilo","ves","view-source","vnc","vscode","vscode-insiders","vsls","w3","wcr","web3","webcal","wifi","ws","wss","wtai","wyciwyg","xcon","xcon-userid","xfire","xmlrpc.beep","xmlrpc.beeps","xmpp","xri","ymsgr","z39.50r","z39.50s"];var q=16,u=s=>{if(!c(s))throw new TypeError(`Expected String but got ${g(s)}.`);let e=[];for(let r of s)e.push(`%${r.charCodeAt(0).toString(q).toUpperCase()}`);return e.join("")},D=s=>{if(c(s))if(/^%[\dA-F]{2}$/i.test(s))s=s.toUpperCase();else throw new Error(`${s} is not a URL encoded character.`);else throw new TypeError(`Expected String but got ${g(s)}.`);let[e,r,i,a,o,d]=["&","#","<",">",'"',"'"].map(u),n;return s===e?n=`${e}amp;`:s===i?n=`${e}lt;`:s===a?n=`${e}gt;`:s===o?n=`${e}quot;`:s===d?n=`${e}${r}39;`:n=s,n},N=s=>{if(!c(s))throw new TypeError(`Expected String but got ${g(s)}.`);let e=atob(s),r=Uint8Array.from([...e].map(o=>o.charCodeAt(0))),i=new Set(A),a;return r.every(o=>i.has(o))?a=e.replace(/\s/g,u):a=s,a},x=class{#e;constructor(){this.#e=new Set(C)}get(){return[...this.#e]}has(e){return this.#e.has(e)}add(e){if(c(e)){if(/(?:java|vb)script/.test(e)||!/^[a-z][a-z0-9+\-.]*$/.test(e))throw new Error(`Invalid scheme: ${e}`)}else throw new TypeError(`Expected String but got ${g(e)}.`);return this.#e.add(e),[...this.#e]}remove(e){return this.#e.delete(e)}isURI(e){let r;if(c(e))try{let{protocol:i}=new URL(e),a=i.replace(/:$/,""),o=a.split("+");r=/^(?:ext|web)\+[a-z]+$/.test(a)||o.every(d=>this.#e.has(d))}catch{r=!1}return!!r}},v=class extends x{#e;#s;constructor(){super(),this.#e=0,this.#s=new Set}sanitize(e,r={allow:[],deny:[]}){if(this.#e>q)throw this.#e=0,new Error("The nesting of data URLs is too deep.");let i;if(super.isURI(e)){let{allow:a,deny:o}=r??{},{href:d,pathname:n,protocol:O}=new URL(e),w=O.replace(/:$/,""),$=w.split("+"),h=new Map([["data",!1],["file",!1]]);if(Array.isArray(a)&&a.length){let p=Object.values(a);for(let t of p)c(t)&&(t=t.trim(),/(?:java|vb)script/.test(t)?h.set(t,!1):t&&h.set(t,!0))}if(Array.isArray(o)&&o.length){let p=Object.values(o);for(let t of p)c(t)&&(t=t.trim(),t&&h.set(t,!1))}let b;for(let[p,t]of h.entries())if(b=t||w!==p&&$.every(y=>y!==p),!b)break;if(b){let[p,t,y,P,_]=["&","<",">",'"',"'"].map(u),B=/[<>"']/g,M=new RegExp(p,"g"),Y=new RegExp(`(${t}|${y}|${P}|${_})`,"g"),f,k=d;if($.includes("data")){let[F,z]=n.split(","),E=F.split(";");if(E.pop()==="base64"){let m=N(z);if(m!==z){if(/data:[^,]*,/.test(m)){let H=/data:[^,]*,[^"]+/g,S=/data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/,X=[...m.matchAll(H)].reverse();for(let R of X){let[l]=R;S.test(l)&&([l]=S.exec(l)),this.#e++,this.#s.add(l);let T=this.sanitize(l,{allow:["data"]});if(T){let{index:j}=R,[Z,G]=[m.substring(0,j),m.substring(j+l.length)];m=`${Z}${T}${G}`}}}this.#s.has(e)?this.#s.delete(e):f=!0,k=`${w}:${E.join(";")},${m}`}else f=!0}else this.#s.has(e)?this.#s.delete(e):f=!0}else f=!0;i=k.replace(B,u).replace(M,D),f&&(i=i.replace(Y,D),this.#e=0)}}return i||null}},U=new v,I=s=>U.isURI(s),Q=async s=>await I(s),L=(s,e)=>U.sanitize(s,e??{allow:[],deny:[]}),V=async(s,e)=>await L(s,e);export{U as default,Q as isURI,I as isURISync,V as sanitizeURL,L as sanitizeURLSync}; | ||
var f=t=>Object.prototype.toString.call(t).slice(8,-1),m=t=>typeof t=="string"||t instanceof String;var _=[7,8,9,10,11,12,13,27,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255];var G=["aaa","aaas","about","acap","acct","acd","acr","adiumxtra","adt","afp","afs","aim","amss","android","appdata","apt","ar","ark","attachment","aw","barion","beshare","bitcoin","bitcoincash","blob","bolo","browserext","cabal","calculator","callto","cap","cast","casts","chrome","chrome-extension","cid","coap","coaps","com-eventbrite-attendee","content","content-type","crid","cstr","cvs","dab","dat","data","dav","diaspora","dict","did","dis","dlna-playcontainer","dlna-playsingle","dns","dntp","doi","dpp","drm","dtmi","dtn","dvb","dvx","dweb","ed2k","eid","elsi","embedded","ens","ethereum","example","facetime","feed","feedready","fido","file","finger","first-run-pen-experience","fish","fm","ftp","fuchsia-pkg","geo","gg","git","gitoid","gizmoproject","go","gopher","graph","gtalk","h323","ham","hcap","hcp","http","https","hxxp","hxxps","hydrazone","hyper","iax","icap","icon","im","imap","info","iotdisco","ipfs","ipn","ipns","ipp","ipps","irc","irc6","ircs","iris","iris.beep","iris.lwz","iris.xpc","iris.xpcs","isostore","itms","jabber","jar","jms","keyparc","lastfm","lbry","ldap","ldaps","leaptofrogans","lorawan","lpa","lvlt","magnet","mailto","maps","market","matrix","message","microsoft.windows.camera","microsoft.windows.camera.multipicker","microsoft.windows.camera.picker","mid","mms","mongodb","moz","moz-extension","ms-access","ms-appinstaller","ms-browser-extension","ms-calculator","ms-drive-to","ms-enrollment","ms-excel","ms-eyecontrolspeech","ms-gamebarservices","ms-gamingoverlay","ms-getoffice","ms-help","ms-infopath","ms-inputapp","ms-lockscreencomponent-config","ms-media-stream-id","ms-meetnow","ms-mixedrealitycapture","ms-mobileplans","ms-newsandinterests","ms-officeapp","ms-people","ms-powerpoint","ms-project","ms-publisher","ms-remotedesktop-launch","ms-restoretabcompanion","ms-screenclip","ms-screensketch","ms-search","ms-search-repair","ms-secondary-screen-controller","ms-secondary-screen-setup","ms-settings","ms-settings-airplanemode","ms-settings-bluetooth","ms-settings-camera","ms-settings-cellular","ms-settings-cloudstorage","ms-settings-connectabledevices","ms-settings-displays-topology","ms-settings-emailandaccounts","ms-settings-language","ms-settings-location","ms-settings-lock","ms-settings-nfctransactions","ms-settings-notifications","ms-settings-power","ms-settings-privacy","ms-settings-proximity","ms-settings-screenrotation","ms-settings-wifi","ms-settings-workplace","ms-spd","ms-stickers","ms-sttoverlay","ms-transit-to","ms-useractivityset","ms-virtualtouchpad","ms-visio","ms-walk-to","ms-whiteboard","ms-whiteboard-cmd","ms-word","msnim","msrp","msrps","mss","mt","mtqp","mumble","mupdate","mvn","news","nfs","ni","nih","nntp","notes","num","ocf","oid","onenote","onenote-cmd","opaquelocktoken","openpgp4fpr","otpauth","palm","paparazzi","payment","payto","pkcs11","platform","pop","pres","proxy","psyc","pttp","pwid","qb","query","quic-transport","redis","rediss","reload","res","resource","rmi","rsync","rtmfp","rtmp","rtsp","rtsps","rtspu","sarif","secondlife","secret-token","service","session","sftp","sgn","shc","sieve","simpleledger","simplex","sip","sips","skype","smb","smp","sms","smtp","snmp","soap.beep","soap.beeps","soldat","spiffe","spotify","ssb","ssh","starknet","steam","stun","stuns","submit","svn","swh","swid","swidpath","tag","taler","teamspeak","tel","teliaeid","telnet","tftp","things","thismessage","tip","tn3270","tool","turn","turns","tv","udp","unreal","urn","ut2004","uuid-in-package","v-event","vemmi","ventrilo","ves","view-source","vnc","vscode","vscode-insiders","vsls","w3","wcr","web3","webcal","wifi","ws","wss","wtai","wyciwyg","xcon","xcon-userid","xfire","xmlrpc.beep","xmlrpc.beeps","xmpp","xri","ymsgr","z39.50r","z39.50s"];var u=16,ee=/[<>"'\s]/g,te=/data:[^,]*,[^"]+/g,M=/data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/,se=/data:[^,]*,/,re=/&#(x(?:00)?[\dA-F]{2}|0?\d{1,3});?/ig,ae=/^[a-z][a-z0-9+\-.]*$/,ie=/^(?:ext|web)\+[a-z]+$/,v=/(?:java|vb)script/,E=t=>{if(!m(t))throw new TypeError(`Expected String but got ${f(t)}.`);let e=[];for(let i of t)e.push(`%${i.charCodeAt(0).toString(u).toUpperCase()}`);return e.join("")},O=t=>{if(m(t))if(/^%[\dA-F]{2}$/i.test(t))t=t.toUpperCase();else throw new Error(`${t} is not a URL encoded character.`);else throw new TypeError(`Expected String but got ${f(t)}.`);let[e,i,o,s,n,c]=["&","#","<",">",'"',"'"].map(E),r;return t===e?r=`${e}amp;`:t===o?r=`${e}lt;`:t===s?r=`${e}gt;`:t===n?r=`${e}quot;`:t===c?r=`${e}${i}39;`:r=t,r},oe=t=>{if(!m(t))throw new TypeError(`Expected String but got ${f(t)}.`);let e=atob(t),i=Uint8Array.from([...e].map(n=>n.charCodeAt(0))),o=new Set(_),s;return i.every(n=>o.has(n))?s=e.replace(/\s/g,E):s=t,s},ne=t=>{if(!m(t))throw new TypeError(`Expected String but got ${f(t)}.`);let e=decodeURIComponent(t);if(/&#/.test(e)){let i=new Set(_),o=[...e.matchAll(re)].reverse();for(let s of o){let[n,c]=s,r;if(/^[\d]+$/.test(c)?r=parseInt(c):c.startsWith("x")&&(r=parseInt(`0${c}`,u)),Number.isInteger(r)){let{index:w}=s,[b,y]=[e.substring(0,w),e.substring(w+n.length)];i.has(r)?e=`${b}${String.fromCharCode(r)}${y}`:r<u*u&&(e=`${b}${y}`)}}}return e},C=class{#e;constructor(){this.#e=new Set(G)}get(){return[...this.#e]}has(e){return this.#e.has(e)}add(e){if(m(e)){if(v.test(e)||!ae.test(e))throw new Error(`Invalid scheme: ${e}`)}else throw new TypeError(`Expected String but got ${f(e)}.`);return this.#e.add(e),[...this.#e]}remove(e){return this.#e.delete(e)}isURI(e){let i;if(m(e))try{let{protocol:o}=new URL(e),s=o.replace(/:$/,""),n=s.split("+");i=!v.test(s)&&ie.test(s)||n.every(c=>this.#e.has(c))}catch{i=!1}return!!i}},j=class extends C{#e;#t;constructor(){super(),this.#e=0,this.#t=new Set}sanitize(e,i={allow:[],deny:[]}){if(this.#e>u)throw this.#e=0,new Error("The nesting of data URLs is too deep.");let o;if(super.isURI(e)){let{allow:s,deny:n}=i??{},{hash:c,href:r,pathname:w,protocol:b,search:y}=new URL(e),$=b.replace(/:$/,""),I=$.split("+"),U=new Map([["data",!1],["file",!1],["javascrpt",!1],["vbscript",!1]]);if(Array.isArray(s)&&s.length){let l=Object.values(s);for(let a of l)m(a)&&(a=a.trim(),v.test(a)||U.set(a,!0))}if(Array.isArray(n)&&n.length){let l=Object.values(n);for(let a of l)m(a)&&(a=a.trim(),a&&U.set(a,!1))}let R;for(let[l,a]of U.entries())if(R=a||$!==l&&I.every(S=>S!==l),!R)break;if(R){let[l,a,S,N,Y]=["&","<",">",'"',"'"].map(E),B=new RegExp(l,"g"),W=new RegExp(`(${a}|${S}|${N}|${Y})`,"g"),h,g=r;if(I.includes("data")){let[X,...Z]=w.split(","),k=`${Z.join(",")}${y}${c}`,x=X.split(";"),p=k;if(x[x.length-1]==="base64")x.pop(),p=oe(k);else try{let z=ne(p),{protocol:A}=new URL(z.trim());A.replace(/:$/,"").split("+").some(d=>v.test(d))&&(g="")}catch{}let L=se.test(p);if(p!==k||L){if(L){let A=[...p.matchAll(te)].reverse();for(let T of A){let[d]=T;M.test(d)&&([d]=M.exec(d)),this.#e++,this.#t.add(d);let P=this.sanitize(d,{allow:["data"]});if(P){let{index:q}=T,[J,K]=[p.substring(0,q),p.substring(q+d.length)];p=`${J}${P}${K}`}}this.#t.has(e)?this.#t.delete(e):h=!0}else this.#t.has(e)?this.#t.delete(e):h=!0;g=`${$}:${x.join(";")},${p}`}else this.#t.has(e)?this.#t.delete(e):h=!0}else h=!0;g?(o=g.replace(ee,E).replace(B,O),h&&(o=o.replace(W,O),this.#e=0)):(o=g,this.#e=0)}}return o||null}},D=new j,H=t=>D.isURI(t),ce=async t=>await H(t),F=(t,e)=>D.sanitize(t,e??{allow:[],deny:[]}),pe=async(t,e)=>await F(t,e);export{D as default,ce as isURI,H as isURISync,pe as sanitizeURL,F as sanitizeURLSync}; | ||
//# sourceMappingURL=url-sanitizer.min.js.map |
@@ -23,3 +23,3 @@ { | ||
"scripts": { | ||
"build": "npm run include && npm run lint && npm test && npm run bundle", | ||
"build": "npm run include && npm run char && npm run lint && npm test && npm run bundle", | ||
"bundle": "npm-run-all -s bundle-*", | ||
@@ -54,3 +54,3 @@ "bundle-src": "esbuild --format=esm --platform=browser --outfile=./dist/url-sanitizer.js --bundle --sourcemap ./index.js", | ||
}, | ||
"version": "0.4.1" | ||
"version": "0.4.2" | ||
} |
@@ -35,3 +35,2 @@ # URL Sanitizer | ||
* `data` and `file` schemes must be explicitly allowed. | ||
* `javascript` and `vbscript` schemes can not be allowed. | ||
@@ -41,7 +40,7 @@ ### Parameters | ||
* `url` **[string][1]** URL input | ||
* `opt` **[object][3]** options | ||
* `opt.allow` **[Array][4]<[string][1]>** array of allowed schemes | ||
* `opt.deny` **[Array][4]<[string][1]>** array of denied schemes | ||
* `opt` **[object][3]** Options | ||
* `opt.allow` **[Array][4]<[string][1]>** Array of allowed schemes, e.g. `['data']` | ||
* `opt.deny` **[Array][4]<[string][1]>** Array of denied schemes, e.g. `['web+foo']` | ||
Returns **[Promise][5]<[string][1]?>** sanitized URL, `null`able | ||
Returns **[Promise][5]<[string][1]?>** Sanitized URL, `null`able | ||
@@ -64,2 +63,7 @@ ```javascript | ||
// -> 'data:text/html,<script>alert(1);</script>' | ||
const res4 = await sanitizeURL('web+foo://example.com', { | ||
deny: ['web+foo'] | ||
}); | ||
// -> null | ||
``` | ||
@@ -79,3 +83,3 @@ | ||
Returns **[Promise][5]<[boolean][2]>** result | ||
Returns **[Promise][5]<[boolean][2]>** Result | ||
* Always `true` for `web+*` and `ext+*` schemes | ||
@@ -103,2 +107,4 @@ | ||
Instance of the sanitizer. | ||
### urlSanitizer.get() | ||
@@ -110,3 +116,3 @@ | ||
Returns **[Array][4]<[string][1]>** array of registered URI schemes | ||
Returns **[Array][4]<[string][1]>** Array of registered URI schemes | ||
@@ -124,5 +130,5 @@ ```javascript | ||
* `scheme` **[string][1]** scheme | ||
* `scheme` **[string][1]** Scheme | ||
Returns **[boolean][2]** result | ||
Returns **[boolean][2]** Result | ||
@@ -144,5 +150,5 @@ ```javascript | ||
* `scheme` **[string][1]** scheme | ||
* `scheme` **[string][1]** Scheme | ||
Returns **[Array][4]<[string][1]>** array of registered URI schemes | ||
Returns **[Array][4]<[string][1]>** Array of registered URI schemes | ||
@@ -166,5 +172,5 @@ ```javascript | ||
* `scheme` **[string][1]** scheme | ||
* `scheme` **[string][1]** Scheme | ||
Returns **[boolean][2]** result | ||
Returns **[boolean][2]** Result | ||
* `true` if the scheme is successfully removed, `false` otherwise. | ||
@@ -171,0 +177,0 @@ |
@@ -12,2 +12,10 @@ /** | ||
const HEX = 16; | ||
const REG_CHARS = /[<>"'\s]/g; | ||
const REG_DATA_URL = /data:[^,]*,[^"]+/g; | ||
const REG_DATA_URL_BASE64 = /data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/; | ||
const REG_DATA_URL_HEADER = /data:[^,]*,/; | ||
const REG_NUM_REF = /&#(x(?:00)?[\dA-F]{2}|0?\d{1,3});?/ig; | ||
const REG_SCHEME = /^[a-z][a-z0-9+\-.]*$/; | ||
const REG_SCHEME_CUSTOM = /^(?:ext|web)\+[a-z]+$/; | ||
const REG_SCRIPT = /(?:java|vb)script/; | ||
@@ -89,2 +97,41 @@ /** | ||
/** | ||
* parse URL encoded numeric character reference | ||
* | ||
* @param {string} str - string | ||
* @returns {string} - parsed string | ||
*/ | ||
export const parseUrlEncodedNumCharRef = str => { | ||
if (!isString(str)) { | ||
throw new TypeError(`Expected String but got ${getType(str)}.`); | ||
} | ||
let res = decodeURIComponent(str); | ||
if (/&#/.test(res)) { | ||
const textChars = new Set(textCharTable); | ||
const items = [...res.matchAll(REG_NUM_REF)].reverse(); | ||
for (const item of items) { | ||
const [num1, num2] = item; | ||
let num; | ||
if (/^[\d]+$/.test(num2)) { | ||
num = parseInt(num2); | ||
} else if (num2.startsWith('x')) { | ||
num = parseInt(`0${num2}`, HEX); | ||
} | ||
if (Number.isInteger(num)) { | ||
const { index } = item; | ||
const [preNum, postNum] = [ | ||
res.substring(0, index), | ||
res.substring(index + num1.length) | ||
]; | ||
if (textChars.has(num)) { | ||
res = `${preNum}${String.fromCharCode(num)}${postNum}`; | ||
} else if (num < HEX * HEX) { | ||
res = `${preNum}${postNum}`; | ||
} | ||
} | ||
} | ||
} | ||
return res; | ||
}; | ||
/** | ||
* URI schemes | ||
@@ -136,4 +183,3 @@ * | ||
throw new TypeError(`Expected String but got ${getType(scheme)}.`); | ||
} else if (/(?:java|vb)script/.test(scheme) || | ||
!/^[a-z][a-z0-9+\-.]*$/.test(scheme)) { | ||
} else if (REG_SCRIPT.test(scheme) || !REG_SCHEME.test(scheme)) { | ||
throw new Error(`Invalid scheme: ${scheme}`); | ||
@@ -168,3 +214,3 @@ } | ||
const schemeParts = scheme.split('+'); | ||
res = /^(?:ext|web)\+[a-z]+$/.test(scheme) || | ||
res = (!REG_SCRIPT.test(scheme) && REG_SCHEME_CUSTOM.test(scheme)) || | ||
schemeParts.every(s => this.#schemes.has(s)); | ||
@@ -213,6 +259,11 @@ } catch (e) { | ||
const { allow, deny } = opt ?? {}; | ||
const { href, pathname, protocol } = new URL(url); | ||
const { hash, href, pathname, protocol, search } = new URL(url); | ||
const scheme = protocol.replace(/:$/, ''); | ||
const schemeParts = scheme.split('+'); | ||
const schemeMap = new Map([['data', false], ['file', false]]); | ||
const schemeMap = new Map([ | ||
['data', false], | ||
['file', false], | ||
['javascrpt', false], | ||
['vbscript', false] | ||
]); | ||
if (Array.isArray(allow) && allow.length) { | ||
@@ -223,5 +274,3 @@ const items = Object.values(allow); | ||
item = item.trim(); | ||
if (/(?:java|vb)script/.test(item)) { | ||
schemeMap.set(item, false); | ||
} else if (item) { | ||
if (!REG_SCRIPT.test(item)) { | ||
schemeMap.set(item, true); | ||
@@ -253,3 +302,2 @@ } | ||
['&', '<', '>', '"', "'"].map(getUrlEncodedString); | ||
const regChars = /[<>"']/g; | ||
const regAmp = new RegExp(amp, 'g'); | ||
@@ -261,33 +309,44 @@ const regEncodedChars = | ||
if (schemeParts.includes('data')) { | ||
const [header, data] = pathname.split(','); | ||
const [header, ...body] = pathname.split(','); | ||
const data = `${body.join(',')}${search}${hash}`; | ||
const mediaType = header.split(';'); | ||
const isBase64 = mediaType.pop() === 'base64'; | ||
if (isBase64) { | ||
let parsedData = parseBase64(data); | ||
if (parsedData !== data) { | ||
if (/data:[^,]*,/.test(parsedData)) { | ||
const regDataUrl = /data:[^,]*,[^"]+/g; | ||
const regBase64DataUrl = /data:[^,]*;?base64,[\dA-Za-z+/\-_=]+/; | ||
const matchedDataUrls = parsedData.matchAll(regDataUrl); | ||
const items = [...matchedDataUrls].reverse(); | ||
for (const item of items) { | ||
let [dataUrl] = item; | ||
if (regBase64DataUrl.test(dataUrl)) { | ||
[dataUrl] = regBase64DataUrl.exec(dataUrl); | ||
} | ||
this.#nest++; | ||
this.#recurse.add(dataUrl); | ||
const parsedDataUrl = this.sanitize(dataUrl, { | ||
allow: ['data'] | ||
}); | ||
if (parsedDataUrl) { | ||
const { index } = item; | ||
const [beforeDataUrl, afterDataUrl] = [ | ||
parsedData.substring(0, index), | ||
parsedData.substring(index + dataUrl.length) | ||
]; | ||
parsedData = | ||
`${beforeDataUrl}${parsedDataUrl}${afterDataUrl}`; | ||
} | ||
let parsedData = data; | ||
if (mediaType[mediaType.length - 1] === 'base64') { | ||
mediaType.pop(); | ||
parsedData = parseBase64(data); | ||
} else { | ||
try { | ||
const decodedData = parseUrlEncodedNumCharRef(parsedData); | ||
const { protocol: dataScheme } = new URL(decodedData.trim()); | ||
const dataSchemeParts = dataScheme.replace(/:$/, '').split('+'); | ||
if (dataSchemeParts.some(s => REG_SCRIPT.test(s))) { | ||
urlToSanitize = ''; | ||
} | ||
} catch (e) { | ||
// fall through | ||
} | ||
} | ||
const containsDataUrl = REG_DATA_URL_HEADER.test(parsedData); | ||
if (parsedData !== data || containsDataUrl) { | ||
if (containsDataUrl) { | ||
const matchedDataUrls = parsedData.matchAll(REG_DATA_URL); | ||
const items = [...matchedDataUrls].reverse(); | ||
for (const item of items) { | ||
let [dataUrl] = item; | ||
if (REG_DATA_URL_BASE64.test(dataUrl)) { | ||
[dataUrl] = REG_DATA_URL_BASE64.exec(dataUrl); | ||
} | ||
this.#nest++; | ||
this.#recurse.add(dataUrl); | ||
const parsedDataUrl = this.sanitize(dataUrl, { | ||
allow: ['data'] | ||
}); | ||
if (parsedDataUrl) { | ||
const { index } = item; | ||
const [preDataUrl, postDataUrl] = [ | ||
parsedData.substring(0, index), | ||
parsedData.substring(index + dataUrl.length) | ||
]; | ||
parsedData = `${preDataUrl}${parsedDataUrl}${postDataUrl}`; | ||
} | ||
} | ||
@@ -299,6 +358,8 @@ if (this.#recurse.has(url)) { | ||
} | ||
urlToSanitize = `${scheme}:${mediaType.join(';')},${parsedData}`; | ||
} else if (this.#recurse.has(url)) { | ||
this.#recurse.delete(url); | ||
} else { | ||
escapeHtml = true; | ||
} | ||
urlToSanitize = `${scheme}:${mediaType.join(';')},${parsedData}`; | ||
} else if (this.#recurse.has(url)) { | ||
@@ -312,7 +373,12 @@ this.#recurse.delete(url); | ||
} | ||
sanitizedUrl = urlToSanitize.replace(regChars, getUrlEncodedString) | ||
.replace(regAmp, escapeUrlEncodedHtmlChars); | ||
if (escapeHtml) { | ||
sanitizedUrl = | ||
sanitizedUrl.replace(regEncodedChars, escapeUrlEncodedHtmlChars); | ||
if (urlToSanitize) { | ||
sanitizedUrl = urlToSanitize.replace(REG_CHARS, getUrlEncodedString) | ||
.replace(regAmp, escapeUrlEncodedHtmlChars); | ||
if (escapeHtml) { | ||
sanitizedUrl = | ||
sanitizedUrl.replace(regEncodedChars, escapeUrlEncodedHtmlChars); | ||
this.#nest = 0; | ||
} | ||
} else { | ||
sanitizedUrl = urlToSanitize; | ||
this.#nest = 0; | ||
@@ -319,0 +385,0 @@ } |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
116436
1985
192