New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

deshortify

Package Overview
Dependencies
Maintainers
1
Versions
14
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

deshortify - npm Package Compare versions

Comparing version 0.4.0 to 0.5.0

dist/deshortify.js

4

CHANGELOG.md
# v0.4.0
* Skip GDPR interstitials
# v0.3.0 (2017-12-06)

@@ -3,0 +7,0 @@

15

package.json
{
"name": "deshortify",
"version": "0.4.0",
"version": "0.5.0",
"description": "Turns short URLs into long, meaningful, crap-less URLs.",

@@ -17,8 +17,15 @@ "main": "dist/deshortify.js",

"scripts": {
"lint": "eslint src",
"lintfix": "eslint src --fix",
"lint": "prettier src/**.js",
"lintfix": "prettier src/**.js --write",
"build": "rollup -c rollup-config.js"
},
"prettier": {
"printWidth": 85,
"tabWidth": 4,
"useTabs": true,
"trailingComma": "es5",
"arrowParens": "always"
},
"devDependencies": {
"eslint": "^4.0.0",
"prettier": "^1.17.1",
"rollup": "^0.52.1",

@@ -25,0 +32,0 @@ "rollup-plugin-buble": "^0.18.0"

@@ -0,12 +1,10 @@

const http = require("http");
const https = require("https");
const parseUrl = require("url").parse;
const resolveUrl = require("url").resolve;
const formatUrl = require("url").format;
const http = require('http');
const https = require('https');
const parseUrl = require('url').parse;
const resolveUrl = require('url').resolve;
const formatUrl = require('url').format;
export default class Deshortifier {
// module.exports = class Deshortifier {
// module.exports = class Deshortifier {
constructor(options = {}) {
this._cache = {};

@@ -16,10 +14,16 @@ this._verbose = !!options.verbose;

if (process) {
let deshortifyVersion = require('../package.json').version;
let nodejsVersion = process.release.name + '/' + process.version;
let deshortifyVersion = require("../package.json").version;
let nodejsVersion = process.release.name + "/" + process.version;
// Nice user agent for most URLs
this._userAgent = options.userAgent ||
('Deshortify/' + deshortifyVersion + ' ' + nodejsVersion + ' (+https://gitlab.com/IvanSanchez/deshortify)');
this._userAgent =
options.userAgent ||
"Deshortify/" +
deshortifyVersion +
" " +
nodejsVersion +
" (+https://gitlab.com/IvanSanchez/deshortify)";
// User agent for asshole websites which filter requests based on it (I'm looking at you, facebook)
this._assholeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12";
this._assholeUserAgent =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12";
} else {

@@ -31,3 +35,2 @@ // User agent when running on browser is the same as the navigator's

// Returns a Promise for the deshortified URL

@@ -38,3 +41,3 @@ deshortify(url) {

if (!parsedUrl.protocol) {
url = 'http://' + url;
url = "http://" + url;
}

@@ -46,9 +49,9 @@

_deshortify(url, breadcrumbs = []) {
if (breadcrumbs.indexOf(url) !== -1 || // Circular loop, break it.
if (
breadcrumbs.indexOf(url) !== -1 || // Circular loop, break it.
breadcrumbs.length > 20 || // This looks like an infinite non-circular loop, break it.
this._skipUrl(url)) // URL whitelisted from being deshortified because nuisances
{
this._skipUrl(url) // URL whitelisted from being deshortified because nuisances
) {
if (this._verbose) {
console.log('Skipping: ', url);
console.log("Skipping: ", url);
}

@@ -61,3 +64,2 @@ return Promise.resolve(this._cleanUp(url));

if (url in this._cache) {
let cachedUrl = this._cache[url];

@@ -67,3 +69,5 @@

// return the promise already created.
if (cachedUrl instanceof Promise) { return cachedUrl; }
if (cachedUrl instanceof Promise) {
return cachedUrl;
}

@@ -75,3 +79,11 @@ if (cachedUrl === url) {

if (this._verbose) {
console.log('cached follow: ', url, ' → ', cachedUrl, ' (breadcrumbs lenght is ', breadcrumbs.length, ')');
console.log(
"cached follow: ",
url,
" → ",
cachedUrl,
" (breadcrumbs lenght is ",
breadcrumbs.length,
")"
);
}

@@ -81,6 +93,5 @@ return this._deshortify(cachedUrl, breadcrumbs);

let parsedUrl = parseUrl(url);
if (parsedUrl.protocol !== 'http:' && parsedUrl.protocol !== 'https:') {
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
// Neither HTTP or HTTPS, just return whatever (might be ftp, or something more esoteric as irc or gopher)

@@ -91,41 +102,40 @@ return Promise.resolve(url);

let userAgent =
(parsedUrl.hostname === 'fb.me') ?
this._assholeUserAgent :
this._userAgent;
parsedUrl.hostname === "fb.me"
? this._assholeUserAgent
: this._userAgent;
// Handle header-based redirects
return this._cache[url] = new Promise((resolve)=>{
return (this._cache[url] = new Promise((resolve) => {
let backend = parsedUrl.protocol === "http:" ? http : https;
let backend =
parsedUrl.protocol === 'http:' ?
http:
https;
let request = backend.request(
{
method: "HEAD",
protocol: parsedUrl.protocol,
hostname: parsedUrl.hostname,
port: parsedUrl.port,
path: parsedUrl.path,
headers: {
"User-Agent": userAgent,
},
},
(res) => {
// console.log(res.headers);
let request = backend.request({
method: 'HEAD',
protocol: parsedUrl.protocol,
hostname: parsedUrl.hostname,
port: parsedUrl.port,
path: parsedUrl.path,
headers: {
'User-Agent': userAgent
}
}, (res)=>{
// console.log(res.headers);
if ("location" in res.headers) {
// w00t! We've got a 30x response and a redirect!
let newUrl = resolveUrl(url, res.headers.location);
if (this._verbose && url !== newUrl) {
console.log("follow: ", url, " → ", newUrl);
}
this._cache[url] = newUrl;
return resolve(this._deshortify(newUrl, breadcrumbs));
}
if ('location' in res.headers) { // w00t! We've got a 30x response and a redirect!
let newUrl = resolveUrl(url, res.headers.location);
if (this._verbose && (url !== newUrl)) {
console.log('follow: ', url, ' → ', newUrl);
}
this._cache[url] = newUrl;
return resolve(this._deshortify(newUrl, breadcrumbs));
// Giving up, looks like this was the final URL.
return resolve(this._cleanUp(url));
}
);
// Giving up, looks like this was the final URL.
return resolve(this._cleanUp(url));
});
request.on('error', ()=>{
request.on("error", () => {
// Panic and return the original url.

@@ -135,12 +145,8 @@ return resolve(this._cleanUp(url));

request.end(); // Actually send the request
});
request.end(); // Actually send the request
}));
}
// Cleans up spammy query parameters and hash bits.
_cleanUp(url) {
/// TODO
let parsedUrl = parseUrl(url, true);

@@ -153,21 +159,20 @@ let host = parsedUrl.host;

if (params) {
// console.log(url);
params.forEach((name)=>{
// console.log(url);
params.forEach((name) => {
let val = parsedUrl.query[name];
// console.log(host, ' / ', name, ' = ', val);
// console.log(host, ' / ', name, ' = ', val);
// console.log('Matches _source: ', name.match(/_source$/));
// console.log('Matches _source: ', name.match(/_source$/));
if (
(typeof val !== 'string') || // e.g. http://www.businessinsider.com/...&r=US&IR=T&IR=T
(name.match(/_source$/)) ||
(name.match(/_medium$/)) ||
(name.match(/_term$/)) ||
(name.match(/_content$/)) ||
(name.match(/_campaign$/)) ||
(name.match(/_mchannel/)) ||
(name.match(/_kwd$/)) ||
(name === 'utm_cid') ||
(name === "cm_mmc") ||
typeof val !== "string" || // e.g. http://www.businessinsider.com/...&r=US&IR=T&IR=T
name.match(/_source$/) ||
name.match(/_medium$/) ||
name.match(/_term$/) ||
name.match(/_content$/) ||
name.match(/_campaign$/) ||
name.match(/_mchannel/) ||
name.match(/_kwd$/) ||
name === "utm_cid" ||
name === "cm_mmc" ||
(name === "tag" && val === "as.rss") ||

@@ -184,14 +189,14 @@ (name === "ref" && val === "rss") ||

(name === "spref" && val === "gr") ||
(val.match(/^twitter/)) ||
(val.match(/\.twitter$/)) ||
(val === "share_btn_tw") ||
val.match(/^twitter/) ||
val.match(/\.twitter$/) ||
val === "share_btn_tw" ||
(name === "platform" && val === "hootsuite") ||
(name === "mbid" && val === "social_retweet") || // New Yorker et al
(name === "mbid" && val === "social_twitter") || // New Yorker et al
(name === "mbid" && val === "social_retweet") || // New Yorker et al
(name === "mbid" && val === "social_twitter") || // New Yorker et al
(host === "www.youtube.com" && name === "feature") ||
(host === "www.nytimes.com" && name === "smid") ||
(host === "www.nytimes.com" && name === "seid") ||
(name === "awesm") || // Appears as a logger of awesm shortener, at least in storify
(name === "CMP" && val === "twt_gu") || // Guardian.co.uk short links
(name === "CMP" && val.match(/^soc_/)) || // Guardian.co.uk short links
name === "awesm" || // Appears as a logger of awesm shortener, at least in storify
(name === "CMP" && val === "twt_gu") || // Guardian.co.uk short links
(name === "CMP" && val.match(/^soc_/)) || // Guardian.co.uk short links
(name === "CMP" && val.match(/^Share_/)) ||

@@ -204,12 +209,14 @@ (name === "ex_cid" && val === "story-twitter") ||

(name === "soc_trk" && val === "tw") ||
(name === "hootPostID") ||
(name === "a" && val === "socialmedia") || // Meetup
name === "hootPostID" ||
(name === "a" && val === "socialmedia") || // Meetup
(host.match(/medium.com$/) && name === "source") ||
(host.match(/elpais.com$/) && name === "id_externo_rsoc") ||
(host.match(/washingtonpost.com$$/) && name === "postshare") ||
(host.match(/washingtonpost.com$$/) && name === "ss_tw-bottom") ||
(val === "rss-default") ||
(name === "__twitter_impression") ||
(name === 'src' && val === "syn"))
{
(host.match(/washingtonpost.com$$/) &&
name === "ss_tw-bottom") ||
val === "rss-default" ||
name === "__twitter_impression" ||
(name === "src" && val === "syn") ||
name === "fbclid"
) {
// Noop

@@ -223,3 +230,3 @@ return;

// console.log('cleanup: ', JSON.stringify(parsedUrl.query), ' → ', JSON.stringify(cleanedParams) );
// console.log('cleanup: ', JSON.stringify(parsedUrl.query), ' → ', JSON.stringify(cleanedParams) );

@@ -232,6 +239,6 @@ // Replace query params and delete duplicated stuff

// console.log(parsedUrl);
// console.log(parsedUrl);
var cleanedUrl = formatUrl(parsedUrl);
if (this._verbose && (url !== cleanedUrl)) {
console.log('cleanup: ', url, ' → ', cleanedUrl );
if (this._verbose && url !== cleanedUrl) {
console.log("cleanup: ", url, " → ", cleanedUrl);
}

@@ -241,7 +248,5 @@ return cleanedUrl;

// Returns boolean true if the passed URL should be skipped altogether,
// false otherwise
_skipUrl(url) {
let parsedUrl = parseUrl(url, true);

@@ -252,30 +257,24 @@

return (
host === "youtu.be" || // Does not add more info
host === "spoti.fi" || // Does not add more info
host === "4sq.com" || // Does not add more info
host === "flic.kr" || // Does not add more info
host === "untp.beer" || // Does not add more info
host === "youtube.com" || // Does not add more info
host === "www.elmundo.es" || // El Mundo newspaper will only timeout and waste time
host === "www.economist.com" || // "You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake."
host === "pbs.twimg.com" || // Might trigger verbose errors if twitter is over capacity
// host === "www.linkedin.com" || // Used to redirect to login
host === "session.wikispaces.com" || // Infinite redirect loop with different URLs params each time
host.match(/twitter\.com$/) || // Infinite redirect to login
host.match(/blogspot\.[a-z]{2-3}$/) || // Will redirect to a nearby geolocated server
host.match(/facebook\.com$/) || // Will redirect to fb.com/unsupportedbrowser due to user-agent
host.match(/\.nytimes\.com$/) || // Infinite nocookies loop
host.match(/^www\.amazon\.$/) || // 405 MethodNotAllowed
url.match(/subscribe/) || // Potential paywall
url.match(/nocookie/) || // Potential paywall/login
url.match(/gdpr/) // Potential "GDPR consent" interstitial
url.length > 400 || // Just too long, maybe a REPL
host === "youtu.be" || // Does not add more info
host === "spoti.fi" || // Does not add more info
host === "4sq.com" || // Does not add more info
host === "flic.kr" || // Does not add more info
host === "untp.beer" || // Does not add more info
host === "youtube.com" || // Does not add more info
host === "www.elmundo.es" || // El Mundo newspaper will only timeout and waste time
host === "www.economist.com" || // "You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake."
host === "pbs.twimg.com" || // Might trigger verbose errors if twitter is over capacity
// host === "www.linkedin.com" || // Used to redirect to login
host === "session.wikispaces.com" || // Infinite redirect loop with different URLs params each time
host.match(/twitter\.com$/) || // Infinite redirect to login
host.match(/blogspot\.[a-z]{2-3}$/) || // Will redirect to a nearby geolocated server
host.match(/facebook\.com$/) || // Will redirect to fb.com/unsupportedbrowser due to user-agent
host.match(/\.nytimes\.com$/) || // Infinite nocookies loop
host.match(/^www\.amazon\.$/) || // 405 MethodNotAllowed
url.match(/subscribe/) || // Potential paywall
url.match(/nocookie/) || // Potential paywall/login
url.match(/gdpr/) // Potential "GDPR consent" interstitial
);
}
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc