{
		"name": "deshortify",
		"version": "0.4.0",
		"version": "0.5.0",
		"description": "Turns short URLs into long, meaningful, crap-less URLs.",
		@@ -17,8 +17,15 @@ "main": "dist/deshortify.js",
		"scripts": {
		"lint": "eslint src",
		"lintfix": "eslint src --fix",
		"lint": "prettier src/**.js",
		"lintfix": "prettier src/**.js --write",
		"build": "rollup -c rollup-config.js"
		},
		"prettier": {
		"printWidth": 85,
		"tabWidth": 4,
		"useTabs": true,
		"trailingComma": "es5",
		"arrowParens": "always"
		},
		"devDependencies": {
		"eslint": "^4.0.0",
		"prettier": "^1.17.1",
		"rollup": "^0.52.1",
		@@ -25,0 +32,0 @@ "rollup-plugin-buble": "^0.18.0"

249

src/deshortify.js

		@@ -0,12 +1,10 @@
		const http = require("http");
		const https = require("https");
		const parseUrl = require("url").parse;
		const resolveUrl = require("url").resolve;
		const formatUrl = require("url").format;

		const http = require('http');
		const https = require('https');
		const parseUrl = require('url').parse;
		const resolveUrl = require('url').resolve;
		const formatUrl = require('url').format;

		export default class Deshortifier {
		// module.exports = class Deshortifier {
		// module.exports = class Deshortifier {
		constructor(options = {}) {

		this._cache = {};
		@@ -16,10 +14,16 @@ this._verbose = !!options.verbose;
		if (process) {
		let deshortifyVersion = require('../package.json').version;
		let nodejsVersion = process.release.name + '/' + process.version;
		let deshortifyVersion = require("../package.json").version;
		let nodejsVersion = process.release.name + "/" + process.version;
		// Nice user agent for most URLs
		this._userAgent = options.userAgent \|\|
		('Deshortify/' + deshortifyVersion + ' ' + nodejsVersion + ' (+https://gitlab.com/IvanSanchez/deshortify)');
		this._userAgent =
		options.userAgent \|\|
		"Deshortify/" +
		deshortifyVersion +
		" " +
		nodejsVersion +
		" (+https://gitlab.com/IvanSanchez/deshortify)";

		// User agent for asshole websites which filter requests based on it (I'm looking at you, facebook)
		this._assholeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12";
		this._assholeUserAgent =
		"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12";
		} else {
		@@ -31,3 +35,2 @@ // User agent when running on browser is the same as the navigator's


		// Returns a Promise for the deshortified URL
		@@ -38,3 +41,3 @@ deshortify(url) {
		if (!parsedUrl.protocol) {
		url = 'http://' + url;
		url = "http://" + url;
		}
		@@ -46,9 +49,9 @@
		_deshortify(url, breadcrumbs = []) {

		if (breadcrumbs.indexOf(url) !== -1 \|\| // Circular loop, break it.
		if (
		breadcrumbs.indexOf(url) !== -1 \|\| // Circular loop, break it.
		breadcrumbs.length > 20 \|\| // This looks like an infinite non-circular loop, break it.
		this._skipUrl(url)) // URL whitelisted from being deshortified because nuisances
		{
		this._skipUrl(url) // URL whitelisted from being deshortified because nuisances
		) {
		if (this._verbose) {
		console.log('Skipping: ', url);
		console.log("Skipping: ", url);
		}
		@@ -61,3 +64,2 @@ return Promise.resolve(this._cleanUp(url));
		if (url in this._cache) {

		let cachedUrl = this._cache[url];
		@@ -67,3 +69,5 @@
		// return the promise already created.
		if (cachedUrl instanceof Promise) { return cachedUrl; }
		if (cachedUrl instanceof Promise) {
		return cachedUrl;
		}

		@@ -75,3 +79,11 @@ if (cachedUrl === url) {
		if (this._verbose) {
		console.log('cached follow: ', url, ' → ', cachedUrl, ' (breadcrumbs lenght is ', breadcrumbs.length, ')');
		console.log(
		"cached follow: ",
		url,
		" → ",
		cachedUrl,
		" (breadcrumbs lenght is ",
		breadcrumbs.length,
		")"
		);
		}
		@@ -81,6 +93,5 @@ return this._deshortify(cachedUrl, breadcrumbs);


		let parsedUrl = parseUrl(url);

		if (parsedUrl.protocol !== 'http:' && parsedUrl.protocol !== 'https:') {
		if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
		// Neither HTTP or HTTPS, just return whatever (might be ftp, or something more esoteric as irc or gopher)
		@@ -91,41 +102,40 @@ return Promise.resolve(url);
		let userAgent =
		(parsedUrl.hostname === 'fb.me') ?
		this._assholeUserAgent :
		this._userAgent;
		parsedUrl.hostname === "fb.me"
		? this._assholeUserAgent
		: this._userAgent;


		// Handle header-based redirects
		return this._cache[url] = new Promise((resolve)=>{
		return (this._cache[url] = new Promise((resolve) => {
		let backend = parsedUrl.protocol === "http:" ? http : https;

		let backend =
		parsedUrl.protocol === 'http:' ?
		http:
		https;
		let request = backend.request(
		{
		method: "HEAD",
		protocol: parsedUrl.protocol,
		hostname: parsedUrl.hostname,
		port: parsedUrl.port,
		path: parsedUrl.path,
		headers: {
		"User-Agent": userAgent,
		},
		},
		(res) => {
		// console.log(res.headers);

		let request = backend.request({
		method: 'HEAD',
		protocol: parsedUrl.protocol,
		hostname: parsedUrl.hostname,
		port: parsedUrl.port,
		path: parsedUrl.path,
		headers: {
		'User-Agent': userAgent
		}
		}, (res)=>{
		// console.log(res.headers);
		if ("location" in res.headers) {
		// w00t! We've got a 30x response and a redirect!
		let newUrl = resolveUrl(url, res.headers.location);
		if (this._verbose && url !== newUrl) {
		console.log("follow: ", url, " → ", newUrl);
		}
		this._cache[url] = newUrl;
		return resolve(this._deshortify(newUrl, breadcrumbs));
		}

		if ('location' in res.headers) { // w00t! We've got a 30x response and a redirect!
		let newUrl = resolveUrl(url, res.headers.location);
		if (this._verbose && (url !== newUrl)) {
		console.log('follow: ', url, ' → ', newUrl);
		}
		this._cache[url] = newUrl;
		return resolve(this._deshortify(newUrl, breadcrumbs));
		// Giving up, looks like this was the final URL.
		return resolve(this._cleanUp(url));
		}
		);

		// Giving up, looks like this was the final URL.
		return resolve(this._cleanUp(url));
		});

		request.on('error', ()=>{
		request.on("error", () => {
		// Panic and return the original url.
		@@ -135,12 +145,8 @@ return resolve(this._cleanUp(url));

		request.end(); // Actually send the request

		});
		request.end(); // Actually send the request
		}));
		}


		// Cleans up spammy query parameters and hash bits.
		_cleanUp(url) {
		/// TODO

		let parsedUrl = parseUrl(url, true);
		@@ -153,21 +159,20 @@ let host = parsedUrl.host;
		if (params) {
		// console.log(url);
		params.forEach((name)=>{
		// console.log(url);
		params.forEach((name) => {
		let val = parsedUrl.query[name];

		// console.log(host, ' / ', name, ' = ', val);
		// console.log(host, ' / ', name, ' = ', val);
		// console.log('Matches _source: ', name.match(/_source$/));

		// console.log('Matches _source: ', name.match(/_source$/));

		if (
		(typeof val !== 'string') \|\| // e.g. http://www.businessinsider.com/...&r=US&IR=T&IR=T
		(name.match(/_source$/)) \|\|
		(name.match(/_medium$/)) \|\|
		(name.match(/_term$/)) \|\|
		(name.match(/_content$/)) \|\|
		(name.match(/_campaign$/)) \|\|
		(name.match(/_mchannel/)) \|\|
		(name.match(/_kwd$/)) \|\|
		(name === 'utm_cid') \|\|
		(name === "cm_mmc") \|\|
		typeof val !== "string" \|\| // e.g. http://www.businessinsider.com/...&r=US&IR=T&IR=T
		name.match(/_source$/) \|\|
		name.match(/_medium$/) \|\|
		name.match(/_term$/) \|\|
		name.match(/_content$/) \|\|
		name.match(/_campaign$/) \|\|
		name.match(/_mchannel/) \|\|
		name.match(/_kwd$/) \|\|
		name === "utm_cid" \|\|
		name === "cm_mmc" \|\|
		(name === "tag" && val === "as.rss") \|\|
		@@ -184,14 +189,14 @@ (name === "ref" && val === "rss") \|\|
		(name === "spref" && val === "gr") \|\|
		(val.match(/^twitter/)) \|\|
		(val.match(/\.twitter$/)) \|\|
		(val === "share_btn_tw") \|\|
		val.match(/^twitter/) \|\|
		val.match(/\.twitter$/) \|\|
		val === "share_btn_tw" \|\|
		(name === "platform" && val === "hootsuite") \|\|
		(name === "mbid" && val === "social_retweet") \|\| // New Yorker et al
		(name === "mbid" && val === "social_twitter") \|\| // New Yorker et al
		(name === "mbid" && val === "social_retweet") \|\| // New Yorker et al
		(name === "mbid" && val === "social_twitter") \|\| // New Yorker et al
		(host === "www.youtube.com" && name === "feature") \|\|
		(host === "www.nytimes.com" && name === "smid") \|\|
		(host === "www.nytimes.com" && name === "seid") \|\|
		(name === "awesm") \|\| // Appears as a logger of awesm shortener, at least in storify
		(name === "CMP" && val === "twt_gu") \|\| // Guardian.co.uk short links
		(name === "CMP" && val.match(/^soc_/)) \|\| // Guardian.co.uk short links
		name === "awesm" \|\| // Appears as a logger of awesm shortener, at least in storify
		(name === "CMP" && val === "twt_gu") \|\| // Guardian.co.uk short links
		(name === "CMP" && val.match(/^soc_/)) \|\| // Guardian.co.uk short links
		(name === "CMP" && val.match(/^Share_/)) \|\|
		@@ -204,12 +209,14 @@ (name === "ex_cid" && val === "story-twitter") \|\|
		(name === "soc_trk" && val === "tw") \|\|
		(name === "hootPostID") \|\|
		(name === "a" && val === "socialmedia") \|\| // Meetup
		name === "hootPostID" \|\|
		(name === "a" && val === "socialmedia") \|\| // Meetup
		(host.match(/medium.com$/) && name === "source") \|\|
		(host.match(/elpais.com$/) && name === "id_externo_rsoc") \|\|
		(host.match(/washingtonpost.com$$/) && name === "postshare") \|\|
		(host.match(/washingtonpost.com$$/) && name === "ss_tw-bottom") \|\|
		(val === "rss-default") \|\|
		(name === "__twitter_impression") \|\|
		(name === 'src' && val === "syn"))
		{
		(host.match(/washingtonpost.com$$/) &&
		name === "ss_tw-bottom") \|\|
		val === "rss-default" \|\|
		name === "__twitter_impression" \|\|
		(name === "src" && val === "syn") \|\|
		name === "fbclid"
		) {
		// Noop
		@@ -223,3 +230,3 @@ return;

		// console.log('cleanup: ', JSON.stringify(parsedUrl.query), ' → ', JSON.stringify(cleanedParams) );
		// console.log('cleanup: ', JSON.stringify(parsedUrl.query), ' → ', JSON.stringify(cleanedParams) );

		@@ -232,6 +239,6 @@ // Replace query params and delete duplicated stuff

		// console.log(parsedUrl);
		// console.log(parsedUrl);
		var cleanedUrl = formatUrl(parsedUrl);
		if (this._verbose && (url !== cleanedUrl)) {
		console.log('cleanup: ', url, ' → ', cleanedUrl );
		if (this._verbose && url !== cleanedUrl) {
		console.log("cleanup: ", url, " → ", cleanedUrl);
		}
		@@ -241,7 +248,5 @@ return cleanedUrl;


		// Returns boolean true if the passed URL should be skipped altogether,
		// false otherwise
		_skipUrl(url) {

		let parsedUrl = parseUrl(url, true);
		@@ -252,30 +257,24 @@
		return (
		host === "youtu.be" \|\| // Does not add more info
		host === "spoti.fi" \|\| // Does not add more info
		host === "4sq.com" \|\| // Does not add more info
		host === "flic.kr" \|\| // Does not add more info
		host === "untp.beer" \|\| // Does not add more info
		host === "youtube.com" \|\| // Does not add more info
		host === "www.elmundo.es" \|\| // El Mundo newspaper will only timeout and waste time
		host === "www.economist.com" \|\| // "You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake."
		host === "pbs.twimg.com" \|\| // Might trigger verbose errors if twitter is over capacity
		// host === "www.linkedin.com" \|\| // Used to redirect to login
		host === "session.wikispaces.com" \|\| // Infinite redirect loop with different URLs params each time

		host.match(/twitter\.com$/) \|\| // Infinite redirect to login
		host.match(/blogspot\.[a-z]{2-3}$/) \|\| // Will redirect to a nearby geolocated server
		host.match(/facebook\.com$/) \|\| // Will redirect to fb.com/unsupportedbrowser due to user-agent
		host.match(/\.nytimes\.com$/) \|\| // Infinite nocookies loop
		host.match(/^www\.amazon\.$/) \|\| // 405 MethodNotAllowed

		url.match(/subscribe/) \|\| // Potential paywall
		url.match(/nocookie/) \|\| // Potential paywall/login
		url.match(/gdpr/) // Potential "GDPR consent" interstitial
		url.length > 400 \|\| // Just too long, maybe a REPL
		host === "youtu.be" \|\| // Does not add more info
		host === "spoti.fi" \|\| // Does not add more info
		host === "4sq.com" \|\| // Does not add more info
		host === "flic.kr" \|\| // Does not add more info
		host === "untp.beer" \|\| // Does not add more info
		host === "youtube.com" \|\| // Does not add more info
		host === "www.elmundo.es" \|\| // El Mundo newspaper will only timeout and waste time
		host === "www.economist.com" \|\| // "You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake."
		host === "pbs.twimg.com" \|\| // Might trigger verbose errors if twitter is over capacity
		// host === "www.linkedin.com" \|\| // Used to redirect to login
		host === "session.wikispaces.com" \|\| // Infinite redirect loop with different URLs params each time
		host.match(/twitter\.com$/) \|\| // Infinite redirect to login
		host.match(/blogspot\.[a-z]{2-3}$/) \|\| // Will redirect to a nearby geolocated server
		host.match(/facebook\.com$/) \|\| // Will redirect to fb.com/unsupportedbrowser due to user-agent
		host.match(/\.nytimes\.com$/) \|\| // Infinite nocookies loop
		host.match(/^www\.amazon\.$/) \|\| // 405 MethodNotAllowed
		url.match(/subscribe/) \|\| // Potential paywall
		url.match(/nocookie/) \|\| // Potential paywall/login
		url.match(/gdpr/) // Potential "GDPR consent" interstitial
		);

		}


		}

.eslintrc.js

src/deshortify.js~

deshortify - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics