Socket
Socket
Sign inDemoInstall

parse-domain

Package Overview
Dependencies
Maintainers
1
Versions
55
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

parse-domain - npm Package Compare versions

Comparing version 0.3.0 to 1.0.0

.eslintrc.json

46

lib/build/buildRegex.js
"use strict";
var fs = require("fs"),
path = require("path");
var fs = require("fs");
var path = require("path");
var txtPath = path.resolve(__dirname, "./tld.txt"),
regexPath = path.resolve(__dirname, "../tld.js"),
tld,
src;
var txtPath = path.resolve(__dirname, "./tld.txt");
var regexPath = path.resolve(__dirname, "../tld.js");
var txtContent;
var icannContent;
var privateContent;
var icannTld;
var privateTld;
var src;
var regex;
console.log("Reading " + txtPath + " ...");
tld = fs.readFileSync(txtPath, "utf8")
.replace(/(\/\/.+)\r?\n/gi, "")
txtContent = fs.readFileSync(txtPath, "utf8");
icannContent = txtContent.slice(
txtContent.indexOf("// ===BEGIN ICANN DOMAINS==="),
txtContent.indexOf("// ===END ICANN DOMAINS===")
);
privateContent = txtContent.slice(
txtContent.indexOf("// ===BEGIN PRIVATE DOMAINS==="),
txtContent.indexOf("// ===END PRIVATE DOMAINS===")
);
icannTld = icannContent.replace(/(\/\/.+)\r?\n/gi, "")
.replace(/[\r?\n]+/g, "|")

@@ -19,4 +34,13 @@ .replace(/\./g, "\\.")

.slice(1, -1);
privateTld = privateContent.replace(/(\/\/.+)\r?\n/gi, "")
.replace(/[\r?\n]+/g, "|")
.replace(/\./g, "\\.")
.replace(/\*/g, "[^.]+")
.slice(1, -1);
src = "module.exports = /\\.(" + tld + ")$/;";
src = [
"exports = module.exports = /\\.(" + icannTld + "|$" + privateTld + ")$/;",
"exports.icann = /\\.(" + icannTld + ")$/;",
"exports.private = /\\.(" + privateTld + ")$/;"
].join("\n");

@@ -29,3 +53,3 @@ console.log("Writing module at " + regexPath + " ...");

var regex = require(regexPath);
regex = require(regexPath);

@@ -36,2 +60,2 @@ if (regex instanceof RegExp === false) {

console.log("Regex is ok, exiting now");
console.log("Regex is ok, exiting now");
"use strict";
var urlParts = /^(https?:\/\/)?(.+@)?(.+?)(:\d{2,5})?([/?].*)?$/,// 1 = protocol, 2 = auth, 3 = domain, 4 = port, 5 = path
knownTlds = require("./tld.js"),
dot = /\./g;
var urlParts = /^(https?:\/\/)?(.+@)?(.+?)(:\d{2,5})?([/?].*)?$/; // 1 = protocol, 2 = auth, 3 = domain, 4 = port, 5 = path
var knownTlds = require("./tld.js");
var dot = /\./g;
/**
* Removes all unnecessary parts of the domain (e.g. protocol, auth, port, path, query) and parses the remaining domain.
* Removes all unnecessary parts of the domain (e.g. protocol, auth, port, path, query)
* and parses the remaining domain. The returned object contains the properties 'subdomain', 'domain' and 'tld'.
*
* For example:
*
* parseDomain("http://www.google.com:1234/some/path?q=something")
*
* yields to
*
* {
* subdomain: "www",
* domain: "google"
* tld: "com"
* }
*
* Since the top-level domain is handled differently by every country, this function only
* supports all tlds listed in lib/build/tld.txt
* supports all tlds listed in lib/build/tld.txt.
*
* If the given url is not valid or isn't supported by the tld.txt, this function returns null.
*
* @param {String} url
* @param {Object} [options]
* @param {String[]|RegExp} [options.customTlds]
* @param {string} url
* @param {Object} [options]
* @param {Array<string>|RegExp} [options.customTlds]
* @param {boolean} [options.privateTlds]
* @returns {Object|null}
*/
function parseDomain(url, options) {
var urlSplit,
tld,
domain,
subdomain;
var urlSplit;
var tld = null;
var domain;
var subdomain;

@@ -45,2 +35,5 @@ if (!url || typeof url !== "string") {

}
if ("privateTlds" in options === false) {
options.privateTlds = false;
}

@@ -51,7 +44,4 @@ // urlSplit can't be null because urlParts will always match at the third capture

// check if tld is supported
tld = domain.match(knownTlds);
// for potentially unrecognized tlds, try matching against custom tlds
if (tld === null && options.customTlds) {
if (options.customTlds) {
if (options.customTlds instanceof RegExp === false) {

@@ -65,5 +55,11 @@ // build regexp from options.customTlds

// If no custom tlds, check if tld is supported
if (tld === null) {
tld = domain.match(options.privateTlds ? knownTlds : knownTlds.icann);
}
if (tld === null) {
return null;
}
tld = tld[0];

@@ -70,0 +66,0 @@

{
"name": "parse-domain",
"version": "0.3.0",
"version": "1.0.0",
"description": "Splits an url into sub-domain, domain and effective top-level-domain",

@@ -8,2 +8,4 @@ "main": "./lib/parseDomain.js",

"test": "mocha -R spec",
"posttest": "eslint lib test",
"coverage": "istanbul cover ./node_modules/mocha/bin/_mocha",
"download-tld": "curl https://publicsuffix.org/list/public_suffix_list.dat > lib/build/tld.txt",

@@ -33,4 +35,7 @@ "build-regex": "node lib/build/buildRegex.js",

"chai": "^3.5.0",
"mocha": "^3.1.0"
"eslint": "^3.7.1",
"eslint-config-peerigon": "^6.0.0",
"eslint-plugin-jsdoc": "^2.3.1",
"mocha": "^3.1.2"
}
}
parse-domain
============
**Splits a URL into sub-domain, domain and the effective top-level domain.**
**Splits a URL into sub-domain, domain and the top-level domain.**

@@ -8,7 +8,7 @@ [![](https://img.shields.io/npm/v/parse-domain.svg)](https://www.npmjs.com/package/parse-domain)

[![Dependency Status](https://david-dm.org/peerigon/parse-domain.svg)](https://david-dm.org/peerigon/parse-domain)
[![Build Status](https://travis-ci.org/peerigon/parse-domain.svg?branch=master)](https://travis-ci.org/peerigon/parse-domain)
[![Coverage Status](https://img.shields.io/coveralls/peerigon/parse-domain.svg)](https://coveralls.io/r/peerigon/parse-domain?branch=master)
Since domains are handled differently across different countries and organizations, splitting a URL into sub-domain, domain and top-level-domain parts is not a simple regexp. **parse-domain** uses a [large list of effective tld names](http://publicsuffix.org/list/effective_tld_names.dat) from publicsuffix.org to recognize different parts of the domain.
Since domains are handled differently across different countries and organizations, splitting a URL into sub-domain, domain and top-level-domain parts is not a simple regexp. **parse-domain** uses a [large list of known top-level domains](https://publicsuffix.org/list/public_suffix_list.dat) from publicsuffix.org to recognize different parts of the domain.
Please also read the note on [effective top-level domains](#note-on-effective-top-level-domains).
<br />

@@ -36,3 +36,3 @@

// usernames, passwords and ports are disregarded
// protocols, usernames, passwords, ports, paths, queries and hashes are disregarded
expect(parseDomain("https://user:password@example.co.uk:8080/some/path?and&query#hash")).to.eql({

@@ -44,3 +44,3 @@ subdomain: "",

// non-canonical top-level domains are ignored
// unknown top-level domains are ignored
expect(parseDomain("unknown.tld.kk")).to.equal(null);

@@ -75,6 +75,5 @@

function parseLocalDomains(url) {
var options = {
return parseDomain(url, {
customTlds: /localhost|\.local/
};
return parseDomain(url, options);
});
}

@@ -99,3 +98,3 @@

### `parseDomain(url: String, options: ParseOptions): ParsedDomain|null`
### `parseDomain(url: string, options: ParseOptions): ParsedDomain|null`

@@ -105,5 +104,17 @@ Returns `null` if `url` has an unknown tld or if it's not a valid url.

#### `ParseOptions`
```javascript
{
customTlds: RegExp|String[]
// A list of custom tlds that are first matched against the url.
// Useful if you also need to split internal URLs like localhost.
customTlds: RegExp|Array<string>,
// There are lot of private domains that act like top-level domains,
// like blogspot.com, googleapis.com or s3.amazonaws.com.
// By default, these domains would be split into:
// { subdomain: ..., domain: "blogspot", tld: "com" }
// When this flag is set to true, the domain will be split into
// { subdomain: ..., domain: ..., tld: "blogspot.com" }
// See also https://github.com/peerigon/parse-domain/issues/4
privateTlds: boolean
}

@@ -113,27 +124,11 @@ ```

#### `ParsedDomain`
```javascript
{
tld: String,
domain: String,
subdomain: String
}
```
<br />
Note on effective top-level domains
------------------------------------------------------------------------
Technically, the top-level domain is *always* the part after the last dot. That's why publicsuffix.org is a list of *effective* top-level domains: It lists all top-level domains where users are allowed to host any content. That's why `foo.blogspot.com` will be split into
```javascript
{
tld: "blogspot.com",
domain: "foo",
subdomain: ""
tld: string,
domain: string,
subdomain: string
}
```
See also [#4](https://github.com/peerigon/parse-domain/issues/4)
<br />

@@ -140,0 +135,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc