parse-domain
Advanced tools
Comparing version 0.3.0 to 1.0.0
"use strict"; | ||
var fs = require("fs"), | ||
path = require("path"); | ||
var fs = require("fs"); | ||
var path = require("path"); | ||
var txtPath = path.resolve(__dirname, "./tld.txt"), | ||
regexPath = path.resolve(__dirname, "../tld.js"), | ||
tld, | ||
src; | ||
var txtPath = path.resolve(__dirname, "./tld.txt"); | ||
var regexPath = path.resolve(__dirname, "../tld.js"); | ||
var txtContent; | ||
var icannContent; | ||
var privateContent; | ||
var icannTld; | ||
var privateTld; | ||
var src; | ||
var regex; | ||
console.log("Reading " + txtPath + " ..."); | ||
tld = fs.readFileSync(txtPath, "utf8") | ||
.replace(/(\/\/.+)\r?\n/gi, "") | ||
txtContent = fs.readFileSync(txtPath, "utf8"); | ||
icannContent = txtContent.slice( | ||
txtContent.indexOf("// ===BEGIN ICANN DOMAINS==="), | ||
txtContent.indexOf("// ===END ICANN DOMAINS===") | ||
); | ||
privateContent = txtContent.slice( | ||
txtContent.indexOf("// ===BEGIN PRIVATE DOMAINS==="), | ||
txtContent.indexOf("// ===END PRIVATE DOMAINS===") | ||
); | ||
icannTld = icannContent.replace(/(\/\/.+)\r?\n/gi, "") | ||
.replace(/[\r?\n]+/g, "|") | ||
@@ -19,4 +34,13 @@ .replace(/\./g, "\\.") | ||
.slice(1, -1); | ||
privateTld = privateContent.replace(/(\/\/.+)\r?\n/gi, "") | ||
.replace(/[\r?\n]+/g, "|") | ||
.replace(/\./g, "\\.") | ||
.replace(/\*/g, "[^.]+") | ||
.slice(1, -1); | ||
src = "module.exports = /\\.(" + tld + ")$/;"; | ||
src = [ | ||
"exports = module.exports = /\\.(" + icannTld + "|$" + privateTld + ")$/;", | ||
"exports.icann = /\\.(" + icannTld + ")$/;", | ||
"exports.private = /\\.(" + privateTld + ")$/;" | ||
].join("\n"); | ||
@@ -29,3 +53,3 @@ console.log("Writing module at " + regexPath + " ..."); | ||
var regex = require(regexPath); | ||
regex = require(regexPath); | ||
@@ -36,2 +60,2 @@ if (regex instanceof RegExp === false) { | ||
console.log("Regex is ok, exiting now"); | ||
console.log("Regex is ok, exiting now"); |
"use strict"; | ||
var urlParts = /^(https?:\/\/)?(.+@)?(.+?)(:\d{2,5})?([/?].*)?$/,// 1 = protocol, 2 = auth, 3 = domain, 4 = port, 5 = path | ||
knownTlds = require("./tld.js"), | ||
dot = /\./g; | ||
var urlParts = /^(https?:\/\/)?(.+@)?(.+?)(:\d{2,5})?([/?].*)?$/; // 1 = protocol, 2 = auth, 3 = domain, 4 = port, 5 = path | ||
var knownTlds = require("./tld.js"); | ||
var dot = /\./g; | ||
/** | ||
* Removes all unnecessary parts of the domain (e.g. protocol, auth, port, path, query) and parses the remaining domain. | ||
* Removes all unnecessary parts of the domain (e.g. protocol, auth, port, path, query) | ||
* and parses the remaining domain. The returned object contains the properties 'subdomain', 'domain' and 'tld'. | ||
* | ||
* For example: | ||
* | ||
* parseDomain("http://www.google.com:1234/some/path?q=something") | ||
* | ||
* yields to | ||
* | ||
* { | ||
* subdomain: "www", | ||
* domain: "google" | ||
* tld: "com" | ||
* } | ||
* | ||
* Since the top-level domain is handled differently by every country, this function only | ||
* supports all tlds listed in lib/build/tld.txt | ||
* supports all tlds listed in lib/build/tld.txt. | ||
* | ||
* If the given url is not valid or isn't supported by the tld.txt, this function returns null. | ||
* | ||
* @param {String} url | ||
* @param {Object} [options] | ||
* @param {String[]|RegExp} [options.customTlds] | ||
* @param {string} url | ||
* @param {Object} [options] | ||
* @param {Array<string>|RegExp} [options.customTlds] | ||
* @param {boolean} [options.privateTlds] | ||
* @returns {Object|null} | ||
*/ | ||
function parseDomain(url, options) { | ||
var urlSplit, | ||
tld, | ||
domain, | ||
subdomain; | ||
var urlSplit; | ||
var tld = null; | ||
var domain; | ||
var subdomain; | ||
@@ -45,2 +35,5 @@ if (!url || typeof url !== "string") { | ||
} | ||
if ("privateTlds" in options === false) { | ||
options.privateTlds = false; | ||
} | ||
@@ -51,7 +44,4 @@ // urlSplit can't be null because urlParts will always match at the third capture | ||
// check if tld is supported | ||
tld = domain.match(knownTlds); | ||
// for potentially unrecognized tlds, try matching against custom tlds | ||
if (tld === null && options.customTlds) { | ||
if (options.customTlds) { | ||
if (options.customTlds instanceof RegExp === false) { | ||
@@ -65,5 +55,11 @@ // build regexp from options.customTlds | ||
// If no custom tlds, check if tld is supported | ||
if (tld === null) { | ||
tld = domain.match(options.privateTlds ? knownTlds : knownTlds.icann); | ||
} | ||
if (tld === null) { | ||
return null; | ||
} | ||
tld = tld[0]; | ||
@@ -70,0 +66,0 @@ |
{ | ||
"name": "parse-domain", | ||
"version": "0.3.0", | ||
"version": "1.0.0", | ||
"description": "Splits an url into sub-domain, domain and effective top-level-domain", | ||
@@ -8,2 +8,4 @@ "main": "./lib/parseDomain.js", | ||
"test": "mocha -R spec", | ||
"posttest": "eslint lib test", | ||
"coverage": "istanbul cover ./node_modules/mocha/bin/_mocha", | ||
"download-tld": "curl https://publicsuffix.org/list/public_suffix_list.dat > lib/build/tld.txt", | ||
@@ -33,4 +35,7 @@ "build-regex": "node lib/build/buildRegex.js", | ||
"chai": "^3.5.0", | ||
"mocha": "^3.1.0" | ||
"eslint": "^3.7.1", | ||
"eslint-config-peerigon": "^6.0.0", | ||
"eslint-plugin-jsdoc": "^2.3.1", | ||
"mocha": "^3.1.2" | ||
} | ||
} |
parse-domain | ||
============ | ||
**Splits a URL into sub-domain, domain and the effective top-level domain.** | ||
**Splits a URL into sub-domain, domain and the top-level domain.** | ||
@@ -8,7 +8,7 @@ [![](https://img.shields.io/npm/v/parse-domain.svg)](https://www.npmjs.com/package/parse-domain) | ||
[![Dependency Status](https://david-dm.org/peerigon/parse-domain.svg)](https://david-dm.org/peerigon/parse-domain) | ||
[![Build Status](https://travis-ci.org/peerigon/parse-domain.svg?branch=master)](https://travis-ci.org/peerigon/parse-domain) | ||
[![Coverage Status](https://img.shields.io/coveralls/peerigon/parse-domain.svg)](https://coveralls.io/r/peerigon/parse-domain?branch=master) | ||
Since domains are handled differently across different countries and organizations, splitting a URL into sub-domain, domain and top-level-domain parts is not a simple regexp. **parse-domain** uses a [large list of effective tld names](http://publicsuffix.org/list/effective_tld_names.dat) from publicsuffix.org to recognize different parts of the domain. | ||
Since domains are handled differently across different countries and organizations, splitting a URL into sub-domain, domain and top-level-domain parts is not a simple regexp. **parse-domain** uses a [large list of known top-level domains](https://publicsuffix.org/list/public_suffix_list.dat) from publicsuffix.org to recognize different parts of the domain. | ||
Please also read the note on [effective top-level domains](#note-on-effective-top-level-domains). | ||
<br /> | ||
@@ -36,3 +36,3 @@ | ||
// usernames, passwords and ports are disregarded | ||
// protocols, usernames, passwords, ports, paths, queries and hashes are disregarded | ||
expect(parseDomain("https://user:password@example.co.uk:8080/some/path?and&query#hash")).to.eql({ | ||
@@ -44,3 +44,3 @@ subdomain: "", | ||
// non-canonical top-level domains are ignored | ||
// unknown top-level domains are ignored | ||
expect(parseDomain("unknown.tld.kk")).to.equal(null); | ||
@@ -75,6 +75,5 @@ | ||
function parseLocalDomains(url) { | ||
var options = { | ||
return parseDomain(url, { | ||
customTlds: /localhost|\.local/ | ||
}; | ||
return parseDomain(url, options); | ||
}); | ||
} | ||
@@ -99,3 +98,3 @@ | ||
### `parseDomain(url: String, options: ParseOptions): ParsedDomain|null` | ||
### `parseDomain(url: string, options: ParseOptions): ParsedDomain|null` | ||
@@ -105,5 +104,17 @@ Returns `null` if `url` has an unknown tld or if it's not a valid url. | ||
#### `ParseOptions` | ||
```javascript | ||
{ | ||
customTlds: RegExp|String[] | ||
// A list of custom tlds that are first matched against the url. | ||
// Useful if you also need to split internal URLs like localhost. | ||
customTlds: RegExp|Array<string>, | ||
// There are lot of private domains that act like top-level domains, | ||
// like blogspot.com, googleapis.com or s3.amazonaws.com. | ||
// By default, these domains would be split into: | ||
// { subdomain: ..., domain: "blogspot", tld: "com" } | ||
// When this flag is set to true, the domain will be split into | ||
// { subdomain: ..., domain: ..., tld: "blogspot.com" } | ||
// See also https://github.com/peerigon/parse-domain/issues/4 | ||
privateTlds: boolean | ||
} | ||
@@ -113,27 +124,11 @@ ``` | ||
#### `ParsedDomain` | ||
```javascript | ||
{ | ||
tld: String, | ||
domain: String, | ||
subdomain: String | ||
} | ||
``` | ||
<br /> | ||
Note on effective top-level domains | ||
------------------------------------------------------------------------ | ||
Technically, the top-level domain is *always* the part after the last dot. That's why publicsuffix.org is a list of *effective* top-level domains: It lists all top-level domains where users are allowed to host any content. That's why `foo.blogspot.com` will be split into | ||
```javascript | ||
{ | ||
tld: "blogspot.com", | ||
domain: "foo", | ||
subdomain: "" | ||
tld: string, | ||
domain: string, | ||
subdomain: string | ||
} | ||
``` | ||
See also [#4](https://github.com/peerigon/parse-domain/issues/4) | ||
<br /> | ||
@@ -140,0 +135,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
404172
12
878
1
5
133
1