Comparing version
@@ -0,35 +1,424 @@ | ||
// Copyright Joyent, Inc. and other Node contributors. | ||
// | ||
// Permission is hereby granted, free of charge, to any person obtaining a | ||
// copy of this software and associated documentation files (the | ||
// "Software"), to deal in the Software without restriction, including | ||
// without limitation the rights to use, copy, modify, merge, publish, | ||
// distribute, sublicense, and/or sell copies of the Software, and to permit | ||
// persons to whom the Software is furnished to do so, subject to the | ||
// following conditions: | ||
// | ||
// The above copyright notice and this permission notice shall be included | ||
// in all copies or substantial portions of the Software. | ||
// | ||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN | ||
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
// USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
var punycode = { encode : function (s) { return s; } }; | ||
if (!String.prototype.trim) { | ||
String.prototype.trim = function () { | ||
return this.replace(/^\s+|\s+$/g, ''); | ||
}; | ||
} | ||
var isObject = require('../lib/util').isObject; | ||
var isString = require('../lib/util').isString; | ||
var keys = require('../lib/util').keys; | ||
var substr = require('../lib/util').substr; | ||
exports.parse = function(uri){ | ||
var a = document.createElement('a'); | ||
a.href = uri; | ||
// "a" has these properties: protocol, hostname, port, | ||
// pathname, search, hash, host | ||
var fields = ['protocol', 'hostname', 'port', 'pathname', | ||
'search', 'hash', 'host']; | ||
output = {}; | ||
for( var i = 0; i < fields.length; i++){ | ||
output[fields[i]] = a[fields[i]].toString(); | ||
exports.parse = urlParse; | ||
exports.format = urlFormat; | ||
exports.Url = Url; | ||
function Url() { | ||
this.protocol = null; | ||
this.slashes = null; | ||
this.auth = null; | ||
this.host = null; | ||
this.port = null; | ||
this.hostname = null; | ||
this.hash = null; | ||
this.search = null; | ||
this.query = null; | ||
this.pathname = null; | ||
this.path = null; | ||
this.href = null; | ||
} | ||
// Reference: RFC 3986, RFC 1808, RFC 2396 | ||
// define these here so at least they only have to be | ||
// compiled once on the first module load. | ||
var protocolPattern = /^([a-z0-9.+-]+:)/i, | ||
portPattern = /:[0-9]*$/, | ||
// RFC 2396: characters reserved for delimiting URLs. | ||
// We actually just auto-escape these. | ||
delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'], | ||
// RFC 2396: characters not allowed for various reasons. | ||
unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims), | ||
// Allowed by RFCs, but cause of XSS attacks. Always escape these. | ||
autoEscape = ['\''].concat(unwise), | ||
// Characters that are never ever allowed in a hostname. | ||
// Note that any invalid chars are also handled, but these | ||
// are the ones that are *expected* to be seen, so we fast-path | ||
// them. | ||
nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape), | ||
hostEndingChars = ['/', '?', '#'], | ||
hostnameMaxLen = 255, | ||
hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/, | ||
hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/, | ||
// protocols that can allow "unsafe" and "unwise" chars. | ||
unsafeProtocol = { | ||
'javascript': true, | ||
'javascript:': true | ||
}, | ||
// protocols that never have a hostname. | ||
hostlessProtocol = { | ||
'javascript': true, | ||
'javascript:': true | ||
}, | ||
// protocols that always contain a // bit. | ||
slashedProtocol = { | ||
'http': true, | ||
'https': true, | ||
'ftp': true, | ||
'gopher': true, | ||
'file': true, | ||
'http:': true, | ||
'https:': true, | ||
'ftp:': true, | ||
'gopher:': true, | ||
'file:': true | ||
}, | ||
querystring = require('querystring'); | ||
function urlParse(url, parseQueryString, slashesDenoteHost) { | ||
if (url && isObject(url) && url instanceof Url) return url; | ||
var u = new Url(); | ||
u.parse(url, parseQueryString, slashesDenoteHost); | ||
return u; | ||
} | ||
Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { | ||
if (!isString(url)) { | ||
throw new TypeError("Parameter 'url' must be a string, not " + typeof url); | ||
} | ||
if (!output.protocol){ | ||
output.protocol = 'http'; | ||
var rest = url; | ||
// trim before proceeding. | ||
// This is to support parse stuff like " http://foo.com \n" | ||
rest = rest.trim(); | ||
var proto = protocolPattern.exec(rest); | ||
if (proto) { | ||
proto = proto[0]; | ||
var lowerProto = proto.toLowerCase(); | ||
this.protocol = lowerProto; | ||
rest = rest.substr(proto.length); | ||
} | ||
output.href = uri; | ||
output.query = output.search.slice(1); | ||
output.path = output.pathname + output.search; | ||
var auth = getAuth(uri, output.hostname); | ||
output.auth = auth; | ||
return output; | ||
// figure out if it's got a host | ||
// user@server is *always* interpreted as a hostname, and url | ||
// resolution will treat //foo/bar as host=foo,path=bar because that's | ||
// how the browser resolves relative URLs. | ||
var slashes; | ||
if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { | ||
slashes = rest.substr(0, 2) === '//'; | ||
if (slashes && !(proto && hostlessProtocol[proto])) { | ||
rest = rest.substr(2); | ||
this.slashes = true; | ||
} | ||
} | ||
if (!hostlessProtocol[proto] && | ||
(slashes || (proto && !slashedProtocol[proto]))) { | ||
// there's a hostname. | ||
// the first instance of /, ?, ;, or # ends the host. | ||
// | ||
// If there is an @ in the hostname, then non-host chars *are* allowed | ||
// to the left of the last @ sign, unless some host-ending character | ||
// comes *before* the @-sign. | ||
// URLs are obnoxious. | ||
// | ||
// ex: | ||
// http://a@b@c/ => user:a@b host:c | ||
// http://a@b?@c => user:a host:c path:/?@c | ||
// v0.12 TODO(isaacs): This is not quite how Chrome does things. | ||
// Review our test case against browsers more comprehensively. | ||
// find the first instance of any hostEndingChars | ||
var hostEnd = -1; | ||
for (var i = 0; i < hostEndingChars.length; i++) { | ||
var hec = rest.indexOf(hostEndingChars[i]); | ||
if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) | ||
hostEnd = hec; | ||
} | ||
// at this point, either we have an explicit point where the | ||
// auth portion cannot go past, or the last @ char is the decider. | ||
var auth, atSign; | ||
if (hostEnd === -1) { | ||
// atSign can be anywhere. | ||
atSign = rest.lastIndexOf('@'); | ||
} else { | ||
// atSign must be in auth portion. | ||
// http://a@b/c@d => host:b auth:a path:/c@d | ||
atSign = rest.lastIndexOf('@', hostEnd); | ||
} | ||
// Now we have a portion which is definitely the auth. | ||
// Pull that off. | ||
if (atSign !== -1) { | ||
auth = rest.slice(0, atSign); | ||
rest = rest.slice(atSign + 1); | ||
this.auth = decodeURIComponent(auth); | ||
} | ||
// the host is the remaining to the left of the first non-host char | ||
hostEnd = -1; | ||
for (var i = 0; i < nonHostChars.length; i++) { | ||
var hec = rest.indexOf(nonHostChars[i]); | ||
if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) | ||
hostEnd = hec; | ||
} | ||
// if we still have not hit it, then the entire thing is a host. | ||
if (hostEnd === -1) | ||
hostEnd = rest.length; | ||
this.host = rest.slice(0, hostEnd); | ||
rest = rest.slice(hostEnd); | ||
// pull out port. | ||
this.parseHost(); | ||
// we've indicated that there is a hostname, | ||
// so even if it's empty, it has to be present. | ||
this.hostname = this.hostname || ''; | ||
// if hostname begins with [ and ends with ] | ||
// assume that it's an IPv6 address. | ||
var ipv6Hostname = this.hostname[0] === '[' && | ||
this.hostname[this.hostname.length - 1] === ']'; | ||
// validate a little. | ||
if (!ipv6Hostname) { | ||
var hostparts = this.hostname.split(/\./); | ||
for (var i = 0, l = hostparts.length; i < l; i++) { | ||
var part = hostparts[i]; | ||
if (!part) continue; | ||
if (!part.match(hostnamePartPattern)) { | ||
var newpart = ''; | ||
for (var j = 0, k = part.length; j < k; j++) { | ||
if (part.charCodeAt(j) > 127) { | ||
// we replace non-ASCII char with a temporary placeholder | ||
// we need this to make sure size of hostname is not | ||
// broken by replacing non-ASCII by nothing | ||
newpart += 'x'; | ||
} else { | ||
newpart += part[j]; | ||
} | ||
} | ||
// we test again with ASCII char only | ||
if (!newpart.match(hostnamePartPattern)) { | ||
var validParts = hostparts.slice(0, i); | ||
var notHost = hostparts.slice(i + 1); | ||
var bit = part.match(hostnamePartStart); | ||
if (bit) { | ||
validParts.push(bit[1]); | ||
notHost.unshift(bit[2]); | ||
} | ||
if (notHost.length) { | ||
rest = '/' + notHost.join('.') + rest; | ||
} | ||
this.hostname = validParts.join('.'); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
if (this.hostname.length > hostnameMaxLen) { | ||
this.hostname = ''; | ||
} else { | ||
// hostnames are always lower case. | ||
this.hostname = this.hostname.toLowerCase(); | ||
} | ||
if (!ipv6Hostname) { | ||
// IDNA Support: Returns a puny coded representation of "domain". | ||
// It only converts the part of the domain name that | ||
// has non ASCII characters. I.e. it dosent matter if | ||
// you call it with a domain that already is in ASCII. | ||
var domainArray = this.hostname.split('.'); | ||
var newOut = []; | ||
for (var i = 0; i < domainArray.length; ++i) { | ||
var s = domainArray[i]; | ||
newOut.push(s.match(/[^A-Za-z0-9_-]/) ? | ||
'xn--' + punycode.encode(s) : s); | ||
} | ||
this.hostname = newOut.join('.'); | ||
} | ||
var p = this.port ? ':' + this.port : ''; | ||
var h = this.hostname || ''; | ||
this.host = h + p; | ||
this.href += this.host; | ||
// strip [ and ] from the hostname | ||
// the host field still retains them, though | ||
if (ipv6Hostname) { | ||
this.hostname = this.hostname.substr(1, this.hostname.length - 2); | ||
if (rest[0] !== '/') { | ||
rest = '/' + rest; | ||
} | ||
} | ||
} | ||
// now rest is set to the post-host stuff. | ||
// chop off any delim chars. | ||
if (!unsafeProtocol[lowerProto]) { | ||
// First, make 100% sure that any "autoEscape" chars get | ||
// escaped, even if encodeURIComponent doesn't think they | ||
// need to be. | ||
for (var i = 0, l = autoEscape.length; i < l; i++) { | ||
var ae = autoEscape[i]; | ||
var esc = encodeURIComponent(ae); | ||
if (esc === ae) { | ||
esc = escape(ae); | ||
} | ||
rest = rest.split(ae).join(esc); | ||
} | ||
} | ||
// chop off from the tail first. | ||
var hash = rest.indexOf('#'); | ||
if (hash !== -1) { | ||
// got a fragment string. | ||
this.hash = rest.substr(hash); | ||
rest = rest.slice(0, hash); | ||
} | ||
var qm = rest.indexOf('?'); | ||
if (qm !== -1) { | ||
this.search = rest.substr(qm); | ||
this.query = rest.substr(qm + 1); | ||
if (parseQueryString) { | ||
this.query = querystring.parse(this.query); | ||
} | ||
rest = rest.slice(0, qm); | ||
} else if (parseQueryString) { | ||
// no query string, but parseQueryString still requested | ||
this.search = ''; | ||
this.query = {}; | ||
} | ||
if (rest) this.pathname = rest; | ||
if (slashedProtocol[lowerProto] && | ||
this.hostname && !this.pathname) { | ||
this.pathname = '/'; | ||
} | ||
//to support http.request | ||
if (this.pathname || this.search) { | ||
var p = this.pathname || ''; | ||
var s = this.search || ''; | ||
this.path = p + s; | ||
} | ||
// finally, reconstruct the href based on what has been validated. | ||
this.href = this.format(); | ||
return this; | ||
}; | ||
var getAuth = function(uri, hostname){ | ||
var prefix = uri.slice(0, uri.indexOf(hostname)); | ||
if (prefix.indexOf('@') === -1){ | ||
return ''; | ||
// format a parsed object into a url string | ||
function urlFormat(obj) { | ||
// ensure it's an object, and not a string url. | ||
// If it's an obj, this is a no-op. | ||
// this way, you can call url_format() on strings | ||
// to clean up potentially wonky urls. | ||
if (isString(obj)) obj = urlParse(obj); | ||
if (!(obj instanceof Url)) return Url.prototype.format.call(obj); | ||
return obj.format(); | ||
} | ||
Url.prototype.format = function() { | ||
var auth = this.auth || ''; | ||
if (auth) { | ||
auth = encodeURIComponent(auth); | ||
auth = auth.replace(/%3A/i, ':'); | ||
auth += '@'; | ||
} | ||
prefix = prefix.slice(0, -1); | ||
var protoCreds = prefix.split('//'); | ||
var protocol = protoCreds[0]; | ||
return protoCreds[1]; | ||
var protocol = this.protocol || '', | ||
pathname = this.pathname || '', | ||
hash = this.hash || '', | ||
host = false, | ||
query = ''; | ||
if (this.host) { | ||
host = auth + this.host; | ||
} else if (this.hostname) { | ||
host = auth + (this.hostname.indexOf(':') === -1 ? | ||
this.hostname : | ||
'[' + this.hostname + ']'); | ||
if (this.port) { | ||
host += ':' + this.port; | ||
} | ||
} | ||
if (this.query && | ||
isObject(this.query) && | ||
keys(this.query).length) { | ||
query = querystring.stringify(this.query); | ||
} | ||
var search = this.search || (query && ('?' + query)) || ''; | ||
if (protocol && substr(protocol, -1) !== ':') protocol += ':'; | ||
// only the slashedProtocols get the //. Not mailto:, xmpp:, etc. | ||
// unless they had them to begin with. | ||
if (this.slashes || | ||
(!protocol || slashedProtocol[protocol]) && host !== false) { | ||
host = '//' + (host || ''); | ||
if (pathname && pathname.charAt(0) !== '/') pathname = '/' + pathname; | ||
} else if (!host) { | ||
host = ''; | ||
} | ||
if (hash && hash.charAt(0) !== '#') hash = '#' + hash; | ||
if (search && search.charAt(0) !== '?') search = '?' + search; | ||
pathname = pathname.replace(/[?#]/g, function(match) { | ||
return encodeURIComponent(match); | ||
}); | ||
search = search.replace('#', '%23'); | ||
return protocol + host + pathname + search + hash; | ||
}; | ||
Url.prototype.parseHost = function() { | ||
var host = this.host; | ||
var port = portPattern.exec(host); | ||
if (port) { | ||
port = port[0]; | ||
if (port !== ':') { | ||
this.port = port.substr(1); | ||
} | ||
host = host.substr(0, host.length - port.length); | ||
} | ||
if (host) this.hostname = host; | ||
}; |
@@ -1,7 +0,1 @@ | ||
var dir = './lib/'; | ||
if (process.env.URLGREY_COVERAGE){ | ||
dir = './lib-cov/'; | ||
} | ||
module.exports = require(dir + 'urlgrey'); | ||
module.exports = require('./lib/urlgrey'); |
@@ -8,3 +8,3 @@ { | ||
], | ||
"version": "0.1.1", | ||
"version": "0.3.0", | ||
"bugs": { | ||
@@ -16,2 +16,6 @@ "url": "https://github.com/cainus/urlgrey/issues" | ||
}, | ||
"browser": { | ||
"url": "./browser/url.js", | ||
"querystring": "./browser/querystring.js" | ||
}, | ||
"maintainers": [ | ||
@@ -21,11 +25,13 @@ "Gregg Caines <gregg@caines.ca> (http://caines.ca)" | ||
"dependencies": { | ||
"qs": "0.6.5", | ||
"chai": "1.8.1" | ||
"tape": "~2.3.0" | ||
}, | ||
"devDependencies": { | ||
"jshint": "2.3.0", | ||
"jscoverage": "0.3.6", | ||
"mocha-lcov-reporter": "0.0.1", | ||
"coveralls": "2.0.4", | ||
"coveralls": "2.5.0", | ||
"mocha": "1.8.1", | ||
"browserify": "2.35.2", | ||
"chai": "1.8.1", | ||
"istanbul": "0.1.45", | ||
"uglify-js": "2.4.3" | ||
@@ -40,4 +46,17 @@ }, | ||
}, | ||
"browser": { | ||
"url": "./browser/url.js" | ||
"testling": { | ||
"browsers": [ | ||
"ie10", | ||
"ie11", | ||
"firefox/nightly", | ||
"firefox/25", | ||
"firefox/8", | ||
"chrome/6", | ||
"chrome/18", | ||
"chrome/31", | ||
"chrome/canary", | ||
"opera/17" | ||
], | ||
"harness": "mocha", | ||
"files": "test/index.js" | ||
}, | ||
@@ -44,0 +63,0 @@ "repository": { |
@@ -8,2 +8,4 @@  | ||
[](http://ci.testling.com/cainus/urlgrey) | ||
Urlgrey is a library for url manipulation. It's got a chainable/fluent interface | ||
@@ -77,2 +79,12 @@ that makes a number of methods available for querying different aspects of a url, | ||
``` | ||
###url.extendedPath([string]); | ||
Setter/getter for the path, querystring and fragment portion of the url | ||
all at once. | ||
```javascript | ||
url.extendedPath(); // returns '/path/kid?asdf=1234#frag' | ||
url.extendedPath("/newpath?new=query#newfrag"); // returns a new uri object with the uri | ||
// https://user:newpass@subdomain.asdf.com/newpath?new=query#newfrag | ||
``` | ||
###url.path([mixed]); | ||
@@ -85,3 +97,3 @@ Setter/getter for the path portion of the url. | ||
// ALSO, req.uri.path() can take arrays of strings as input as well: | ||
// ALSO, .path() can take arrays of strings as input as well: | ||
url.path(['qwer', '/asdf'], 'qwer/1234/', '/1234/'); | ||
@@ -135,3 +147,3 @@ // this returns a new uri object with the uri | ||
Setter/getter for the querystring using a plain string representation. This is lower-level than $.req.query(), but allows complete control of the querystring. | ||
Setter/getter for the querystring using a plain string representation. This is lower-level than .query(), but allows complete control of the querystring. | ||
```javascript | ||
@@ -160,3 +172,3 @@ url.queryString(); // returns asdf=1234 (notice there is no leading '?') | ||
###url.toJson(); | ||
Returns the json representation of the uri object, which is simply the uri as a string. The output is exactly the same as req.uri.toString(). This method is read-only. | ||
Returns the json representation of the uri object, which is simply the uri as a string. The output is exactly the same as .toString(). This method is read-only. | ||
```javascript | ||
@@ -180,4 +192,14 @@ url.toJson(); // returns "https://user:pass@subdomain.asdf.com/path/kid/?asdf=1234#frag" | ||
##Installation: | ||
### node.js: | ||
`npm install urlgrey --save` | ||
Also! If you're using urlgrey in an http application, see [urlgrey-connect](https://github.com/cainus/urlgrey-connect). It gives you an urlgrey object already instantiated with the request url as req.uri in all your request handlers. | ||
### in the browser: | ||
Lots of options: | ||
* grab urlgrey.js from the root of this repo for [browserify](http://browserify.org/)-built, unminified version. | ||
* grab urlgrey.min.js from the root of this repo for a [browserify](http://browserify.org/)-built, minified version. | ||
* use [browserify](http://browserify.org/) and include this like any other node package. | ||
##Contributing: | ||
@@ -187,5 +209,7 @@ ###Testing: | ||
* `make test` | ||
####Run the browser file:// tests: | ||
* `make browser-build` | ||
* ...then open test.html in a browser | ||
####Run the browser tests on a real server: | ||
@@ -192,0 +216,0 @@ * `make browser-build` |
@@ -0,1 +1,2 @@ | ||
var isBrowser = !(typeof module !== 'undefined' && module.exports); | ||
@@ -11,2 +12,3 @@ if (!isBrowser){ | ||
describe("urlgrey", function(){ | ||
@@ -27,2 +29,3 @@ describe("chainability", function(){ | ||
url.query(false); | ||
url.extendedPath("/asdf?qwer=asdf#swqertwert23"); | ||
url.toString().should.equal(urlStr); // original object is unmodified | ||
@@ -47,5 +50,6 @@ }); | ||
if (u.protocol() === 'file'){ | ||
chai.expect(u.hostname()).to.eql(''); | ||
// chrome uses localhost. other browsers don't | ||
chai.expect((u.hostname() === '') || (u.hostname() === 'localhost')).to.eql(true); | ||
} else { | ||
u.hostname().should.equal('localhost'); | ||
chai.expect(u.hostname()).to.equal(window.location.hostname + ''); | ||
} | ||
@@ -176,2 +180,13 @@ }); | ||
}); | ||
describe("#extendedPath", function(){ | ||
it("returns the part of the url after the host:port", function(){ | ||
var url = "http://asdf.com:8080/path?asdf=1234#frag"; | ||
urlgrey(url).extendedPath().should.equal('/path?asdf=1234#frag'); | ||
}); | ||
it("lets you set the part of the url after the host:port", function(){ | ||
var url = "http://asdf.com:8080/path?asdf=1234#frag"; | ||
urlgrey(url).extendedPath('/asdf?qwer=1234#fraggle').toString() | ||
.should.equal('http://asdf.com:8080/asdf?qwer=1234#fraggle'); | ||
}); | ||
}); | ||
describe("#rawChild", function(){ | ||
@@ -348,3 +363,10 @@ it("returns a url with the given path suffix added", function(){ | ||
}); | ||
describe("tape tests", function(){ | ||
it ("works", function(done){ | ||
var tapetest = require('../tapetest'); | ||
tapetest(); | ||
done(); | ||
}); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
556829
107.41%1
-50%25
92.31%10342
12.22%222
12.12%15
-11.76%1
-50%9
50%+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed