Socket
Socket
Sign inDemoInstall

crawler-url-parser

Package Overview
Dependencies
Maintainers
1
Versions
21
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler-url-parser - npm Package Compare versions

Comparing version 2.0.2 to 2.0.4

75

crawler-url-parser.js

@@ -6,12 +6,2 @@ const URL = require('url');

const normalize_options = {
removeDirectoryIndex: false,
removeTrailingSlash: false,
stripWWW: false,
stripFragment: true,
normalizeHttps: false,
normalizeProtocol: true,
removeQueryParameters: [/^utm_\w+/i, 'ref']
}
const result_normalize_options = {

@@ -27,9 +17,4 @@ removeDirectoryIndex: true,

function _has_illegal_chars(str) {
if (/[^a-z0-9\:\/\?\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=\.\-\_\~\%]/i.test(str)) {
debugger;
return true;
}
return false;
return /[^a-z0-9\:\/\?\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=\.\-\_\~\%]/i.test(str);
}

@@ -41,3 +26,2 @@

baseurl: null,
normalized: null,
protocol: null,

@@ -55,27 +39,27 @@ host: null,

let currentNormUrlStr = cleanUrl(currentUrlStr, normalize_options);
let normalizedBaseUrl = null;
if (!currentNormUrlStr) {
normalizedBaseUrl = cleanUrl(baseUrlStr, normalize_options);
if (normalizedBaseUrl) {
currentUrlStr = normalizedBaseUrl;
currentNormUrlStr = normalizedBaseUrl;
normalizedBaseUrl = null;
baseUrlStr = null;
} else {
return null;
currentUrlStr = currentUrlStr.replace(/^\/\//, 'http://');
currentUrlStr = currentUrlStr.replace(/#.*$/, '');
if(baseUrlStr) {
baseUrlStr = baseUrlStr.replace(/^\/\//, 'http://');
baseUrlStr = baseUrlStr.replace(/#.*$/, '');
}
else {
if ( ! /^\.*\/|^(?!localhost)\w+:/.test(currentUrlStr)){
currentUrlStr = currentUrlStr.replace(/^(?!(?:\w+:)?\/\/)/, 'http://');
}
}
let parsedUrl = URL.parse(currentNormUrlStr, true, true);
let parsedUrl = URL.parse(currentUrlStr, true, true);
delete parsedUrl.hash ;
if (parsedUrl.protocol && parsedUrl.protocol != 'http:' && parsedUrl.protocol != 'https:') return null;
//current url is relative like "abc", "/abc" or "../abc"
if (parsedUrl.host == null && baseUrlStr) {
let parsedBaseUrl = URL.parse(baseUrlStr, true,true);
delete parsedUrl.hash;
ret.baseurl = URL.format(parsedBaseUrl);
if (parsedUrl.host == null && baseUrlStr) {
normalizedBaseUrl = cleanUrl(baseUrlStr, normalize_options);
ret.baseurl = normalizedBaseUrl;
let parsedBaseUrl = URL.parse(normalizedBaseUrl, normalize_options);
let absoluteUrl = URL.parse(URL.resolve(parsedBaseUrl, parsedUrl));

@@ -85,7 +69,6 @@ currentUrlStr = URL.format(absoluteUrl);

ret.url = currentUrlStr;
ret.normalized = cleanUrl(currentUrlStr, result_normalize_options);
parsedUrl = URL.parse(currentUrlStr, true, true);
delete parsedUrl.hash;
parsedUrl = URL.parse(ret.normalized, true, true);
ret.url = URL.format(parsedUrl);
ret.protocol = parsedUrl.protocol;

@@ -103,3 +86,2 @@ ret.host = parsedUrl.host;

ret.querycount = parsedUrl.search ? parsedUrl.search.split("=").length - 1 : 0;
//ret.type = normalizedBaseUrl ? gettype(ret.normalized,normalizedBaseUrl):"none";

@@ -117,3 +99,3 @@ return ret;

baseUrl = embedBaseUrl ? embedBaseUrl : baseUrl;
let baseUrlStr = baseUrl ? baseUrl.normalized : null;
let baseUrlStr = baseUrl ? baseUrl.url : null;

@@ -126,8 +108,7 @@ $('a').each(function (i, el) {

//let currentUrl = embedBaseUrl == null ? parse(href,baseUrl.normalized) : parse(href,embedBaseUrl.normalized);
let currentUrl = parse(href, baseUrlStr);
if (currentUrl && currentUrl.normalized) {
if (urlMap.has(currentUrl.normalized)) {
let tmpUrl = urlMap.get(currentUrl.normalized);
if (currentUrl && currentUrl.url) {
if (urlMap.has(currentUrl.url)) {
let tmpUrl = urlMap.get(currentUrl.url);
if (!tmpUrl.text.includes(text)) {

@@ -139,3 +120,3 @@ tmpUrl.text = `${tmpUrl.text} ${text}`;

currentUrl.baseurl = baseUrlStr;
urlMap.set(currentUrl.normalized, currentUrl);
urlMap.set(currentUrl.url, currentUrl);
}

@@ -156,3 +137,3 @@ }

return {
url: el.normalized,
url: el.url,
text: el.text,

@@ -177,2 +158,4 @@ type: el.type

let pageurl_path = pageurl.path ? pageurl.path : "";
linkurl_path = linkurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/');
pageurl_path = pageurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/');

@@ -179,0 +162,0 @@ let linkurl_parts = linkurl_path.split("/").filter(function (elem, index, array) {

{
"name": "crawler-url-parser",
"version": "2.0.2",
"version": "2.0.4",
"description": "An `URL` parser for crawling purpose.",

@@ -5,0 +5,0 @@ "main": "crawler-url-parser.js",

@@ -9,3 +9,3 @@ const assert = require('assert');

let res = cup.parse("/aaa");
assert.equal(res.normalized, "/aaa");
assert.equal(res.url, "/aaa");
});

@@ -15,3 +15,3 @@

let res = cup.parse("/aaa#hhh");
assert.equal(res.normalized, "/aaa");
assert.equal(res.url, "/aaa");
});

@@ -21,3 +21,3 @@

let res = cup.parse("/aaa?q=query#hhh");
assert.equal(res.normalized, "/aaa?q=query");
assert.equal(res.url, "/aaa?q=query");
});

@@ -27,3 +27,3 @@

let res = cup.parse("/aaa?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "/aaa?q1=query1&q2=query2");
assert.equal(res.url, "/aaa?q1=query1&q2=query2");
});

@@ -33,3 +33,3 @@

let res = cup.parse("/aaa/bbb");
assert.equal(res.normalized, "/aaa/bbb");
assert.equal(res.url, "/aaa/bbb");
});

@@ -39,3 +39,3 @@

let res = cup.parse("/aaa/bbb#hhh");
assert.equal(res.normalized, "/aaa/bbb");
assert.equal(res.url, "/aaa/bbb");
});

@@ -45,3 +45,3 @@

let res = cup.parse("/aaa/bbb?q=query#hhh");
assert.equal(res.normalized, "/aaa/bbb?q=query");
assert.equal(res.url, "/aaa/bbb?q=query");
});

@@ -51,3 +51,3 @@

let res = cup.parse("/aaa/bbb?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "/aaa/bbb?q1=query1&q2=query2");
assert.equal(res.url, "/aaa/bbb?q1=query1&q2=query2");
});

@@ -57,3 +57,3 @@

let res = cup.parse("/aaa/");
assert.equal(res.normalized, "/aaa");
assert.equal(res.url, "/aaa/");
});

@@ -63,3 +63,3 @@

let res = cup.parse("/aaa/#hhh");
assert.equal(res.normalized, "/aaa");
assert.equal(res.url, "/aaa/");
});

@@ -69,3 +69,3 @@

let res = cup.parse("/aaa/bbb/");
assert.equal(res.normalized, "/aaa/bbb");
assert.equal(res.url, "/aaa/bbb/");
});

@@ -75,3 +75,3 @@

let res = cup.parse("/aaa/bbb/#hhh");
assert.equal(res.normalized, "/aaa/bbb");
assert.equal(res.url, "/aaa/bbb/");
});

@@ -85,3 +85,3 @@

let res = cup.parse("aaa");
assert.equal(res.normalized, "aaa");
assert.equal(res.url, "http://aaa/");
});

@@ -91,3 +91,3 @@

let res = cup.parse("aaa#hhh");
assert.equal(res.normalized, "aaa");
assert.equal(res.url, "http://aaa/");
});

@@ -97,3 +97,3 @@

let res = cup.parse("aaa?q=query#hhh");
assert.equal(res.normalized, "aaa?q=query");
assert.equal(res.url, "http://aaa/?q=query");
});

@@ -103,3 +103,3 @@

let res = cup.parse("aaa?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "aaa?q1=query1&q2=query2");
assert.equal(res.url, "http://aaa/?q1=query1&q2=query2");
});

@@ -109,3 +109,3 @@

let res = cup.parse("aaa/bbb");
assert.equal(res.normalized, "aaa/bbb");
assert.equal(res.url, "http://aaa/bbb");
});

@@ -115,3 +115,3 @@

let res = cup.parse("aaa/bbb#hhh");
assert.equal(res.normalized, "aaa/bbb");
assert.equal(res.url, "http://aaa/bbb");
});

@@ -121,3 +121,3 @@

let res = cup.parse("aaa/bbb?q=query#hhh");
assert.equal(res.normalized, "aaa/bbb?q=query");
assert.equal(res.url, "http://aaa/bbb?q=query");
});

@@ -127,3 +127,3 @@

let res = cup.parse("aaa/bbb?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "aaa/bbb?q1=query1&q2=query2");
assert.equal(res.url, "http://aaa/bbb?q1=query1&q2=query2");
});

@@ -133,3 +133,3 @@

let res = cup.parse("aaa/");
assert.equal(res.normalized, "aaa");
assert.equal(res.url, "http://aaa/");
});

@@ -139,3 +139,3 @@

let res = cup.parse("aaa/#hhh");
assert.equal(res.normalized, "aaa");
assert.equal(res.url, "http://aaa/");
});

@@ -145,3 +145,3 @@

let res = cup.parse("aaa/bbb/");
assert.equal(res.normalized, "aaa/bbb");
assert.equal(res.url, "http://aaa/bbb/");
});

@@ -151,3 +151,3 @@

let res = cup.parse("aaa/bbb/#hhh");
assert.equal(res.normalized, "aaa/bbb");
assert.equal(res.url, "http://aaa/bbb/");
});

@@ -161,3 +161,3 @@

let res = cup.parse("../aaa");
assert.equal(res.normalized, "../aaa");
assert.equal(res.url, "../aaa");
});

@@ -167,3 +167,3 @@

let res = cup.parse("../aaa#hhh");
assert.equal(res.normalized, "../aaa");
assert.equal(res.url, "../aaa");
});

@@ -173,3 +173,3 @@

let res = cup.parse("../aaa?q=query#hhh");
assert.equal(res.normalized, "../aaa?q=query");
assert.equal(res.url, "../aaa?q=query");
});

@@ -179,3 +179,3 @@

let res = cup.parse("../aaa?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "../aaa?q1=query1&q2=query2");
assert.equal(res.url, "../aaa?q1=query1&q2=query2");
});

@@ -185,3 +185,3 @@

let res = cup.parse("../aaa/bbb");
assert.equal(res.normalized, "../aaa/bbb");
assert.equal(res.url, "../aaa/bbb");
});

@@ -191,3 +191,3 @@

let res = cup.parse("../aaa/bbb#hhh");
assert.equal(res.normalized, "../aaa/bbb");
assert.equal(res.url, "../aaa/bbb");
});

@@ -197,3 +197,3 @@

let res = cup.parse("../aaa/bbb?q=query#hhh");
assert.equal(res.normalized, "../aaa/bbb?q=query");
assert.equal(res.url, "../aaa/bbb?q=query");
});

@@ -203,3 +203,3 @@

let res = cup.parse("../aaa/bbb?q1=query1&q2=query2#hhh");
assert.equal(res.normalized, "../aaa/bbb?q1=query1&q2=query2");
assert.equal(res.url, "../aaa/bbb?q1=query1&q2=query2");
});

@@ -209,3 +209,3 @@

let res = cup.parse("../aaa/");
assert.equal(res.normalized, "../aaa");
assert.equal(res.url, "../aaa/");
});

@@ -215,3 +215,3 @@

let res = cup.parse("../aaa/#hhh");
assert.equal(res.normalized, "../aaa");
assert.equal(res.url, "../aaa/");
});

@@ -221,3 +221,3 @@

let res = cup.parse("../aaa/bbb/");
assert.equal(res.normalized, "../aaa/bbb");
assert.equal(res.url, "../aaa/bbb/");
});

@@ -227,3 +227,3 @@

let res = cup.parse("../aaa/bbb/#hhh");
assert.equal(res.normalized, "../aaa/bbb");
assert.equal(res.url, "../aaa/bbb/");
});

@@ -236,3 +236,3 @@

let res = cup.parse("http://www.google.com");
assert.equal(res.normalized, "http://google.com");
assert.equal(res.url, "http://www.google.com/");
});

@@ -242,3 +242,3 @@

let res = cup.parse("http://mail.google.com");
assert.equal(res.normalized, "http://mail.google.com");
assert.equal(res.url, "http://mail.google.com/");
});

@@ -248,3 +248,3 @@

let res = cup.parse("http://www.google.com/aaa");
assert.equal(res.normalized, "http://google.com/aaa");
assert.equal(res.url, "http://www.google.com/aaa");
});

@@ -254,3 +254,3 @@

let res = cup.parse("http://mail.google.com/aaa");
assert.equal(res.normalized, "http://mail.google.com/aaa");
assert.equal(res.url, "http://mail.google.com/aaa");
});

@@ -260,3 +260,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query");
assert.equal(res.normalized, "http://google.com/aaa?q=query");
assert.equal(res.url, "http://www.google.com/aaa?q=query");
});

@@ -266,3 +266,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query");
assert.equal(res.url, "http://mail.google.com/aaa?q=query");
});

@@ -272,3 +272,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.normalized, "http://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -278,3 +278,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -286,3 +286,3 @@ });

let res = cup.parse("https://www.google.com");
assert.equal(res.normalized, "https://google.com");
assert.equal(res.url, "https://www.google.com/");
});

@@ -292,3 +292,3 @@

let res = cup.parse("https://mail.google.com");
assert.equal(res.normalized, "https://mail.google.com");
assert.equal(res.url, "https://mail.google.com/");
});

@@ -298,3 +298,3 @@

let res = cup.parse("https://www.google.com/aaa");
assert.equal(res.normalized, "https://google.com/aaa");
assert.equal(res.url, "https://www.google.com/aaa");
});

@@ -304,3 +304,3 @@

let res = cup.parse("https://mail.google.com/aaa");
assert.equal(res.normalized, "https://mail.google.com/aaa");
assert.equal(res.url, "https://mail.google.com/aaa");
});

@@ -310,3 +310,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query");
assert.equal(res.normalized, "https://google.com/aaa?q=query");
assert.equal(res.url, "https://www.google.com/aaa?q=query");
});

@@ -316,3 +316,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query");
assert.equal(res.url, "https://mail.google.com/aaa?q=query");
});

@@ -322,3 +322,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.normalized, "https://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -328,3 +328,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -388,3 +388,3 @@ });

let res = cup.parse("http://www.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com");
assert.equal(res.url, "http://www.google.com/");
});

@@ -394,3 +394,3 @@

let res = cup.parse("http://mail.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com");
assert.equal(res.url, "http://mail.google.com/");
});

@@ -400,3 +400,3 @@

let res = cup.parse("http://www.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa");
assert.equal(res.url, "http://www.google.com/aaa");
});

@@ -406,3 +406,3 @@

let res = cup.parse("http://mail.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa");
assert.equal(res.url, "http://mail.google.com/aaa");
});

@@ -412,3 +412,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa?q=query");
assert.equal(res.url, "http://www.google.com/aaa?q=query");
});

@@ -418,3 +418,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query");
assert.equal(res.url, "http://mail.google.com/aaa?q=query");
});

@@ -424,3 +424,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -430,3 +430,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -438,3 +438,3 @@ });

let res = cup.parse("https://www.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "https://google.com");
assert.equal(res.url, "https://www.google.com/");
});

@@ -444,3 +444,3 @@

let res = cup.parse("https://mail.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "https://mail.google.com");
assert.equal(res.url, "https://mail.google.com/");
});

@@ -450,3 +450,3 @@

let res = cup.parse("https://www.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "https://google.com/aaa");
assert.equal(res.url, "https://www.google.com/aaa");
});

@@ -456,3 +456,3 @@

let res = cup.parse("https://mail.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "https://mail.google.com/aaa");
assert.equal(res.url, "https://mail.google.com/aaa");
});

@@ -462,3 +462,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "https://google.com/aaa?q=query");
assert.equal(res.url, "https://www.google.com/aaa?q=query");
});

@@ -468,3 +468,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query");
assert.equal(res.url, "https://mail.google.com/aaa?q=query");
});

@@ -474,3 +474,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "https://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -480,3 +480,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -542,3 +542,3 @@ });

let res = cup.parse("http://www.google.com", "#abcd");
assert.equal(res.normalized, "http://google.com");
assert.equal(res.url, "http://www.google.com/");
});

@@ -548,3 +548,3 @@

let res = cup.parse("http://mail.google.com", "#abcd");
assert.equal(res.normalized, "http://mail.google.com");
assert.equal(res.url, "http://mail.google.com/");
});

@@ -554,3 +554,3 @@

let res = cup.parse("http://www.google.com/aaa", "#abcd");
assert.equal(res.normalized, "http://google.com/aaa");
assert.equal(res.url, "http://www.google.com/aaa");
});

@@ -560,3 +560,3 @@

let res = cup.parse("http://mail.google.com/aaa", "#abcd");
assert.equal(res.normalized, "http://mail.google.com/aaa");
assert.equal(res.url, "http://mail.google.com/aaa");
});

@@ -566,3 +566,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query", "#abcd");
assert.equal(res.normalized, "http://google.com/aaa?q=query");
assert.equal(res.url, "http://www.google.com/aaa?q=query");
});

@@ -572,3 +572,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query", "#abcd");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query");
assert.equal(res.url, "http://mail.google.com/aaa?q=query");
});

@@ -578,3 +578,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query?q1=query1&q2=query2", "#abcd");
assert.equal(res.normalized, "http://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -584,3 +584,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query?q1=query1&q2=query2", "#abcd");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -593,3 +593,3 @@ });

let res = cup.parse("https://www.google.com", "#abcd");
assert.equal(res.normalized, "https://google.com");
assert.equal(res.url, "https://www.google.com/");
});

@@ -599,3 +599,3 @@

let res = cup.parse("https://mail.google.com", "#abcd");
assert.equal(res.normalized, "https://mail.google.com");
assert.equal(res.url, "https://mail.google.com/");
});

@@ -605,3 +605,3 @@

let res = cup.parse("https://www.google.com/aaa", "#abcd");
assert.equal(res.normalized, "https://google.com/aaa");
assert.equal(res.url, "https://www.google.com/aaa");
});

@@ -611,3 +611,3 @@

let res = cup.parse("https://mail.google.com/aaa", "#abcd");
assert.equal(res.normalized, "https://mail.google.com/aaa");
assert.equal(res.url, "https://mail.google.com/aaa");
});

@@ -617,3 +617,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query", "#abcd");
assert.equal(res.normalized, "https://google.com/aaa?q=query");
assert.equal(res.url, "https://www.google.com/aaa?q=query");
});

@@ -623,3 +623,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query", "#abcd");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query");
assert.equal(res.url, "https://mail.google.com/aaa?q=query");
});

@@ -629,3 +629,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query?q1=query1&q2=query2", "#abcd");
assert.equal(res.normalized, "https://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -635,3 +635,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query?q1=query1&q2=query2", "#abcd");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -687,3 +687,3 @@ });

let res = cup.parse("http://www.google.com", "");
assert.equal(res.normalized, "http://google.com");
assert.equal(res.url, "http://www.google.com/");
});

@@ -693,3 +693,3 @@

let res = cup.parse("http://mail.google.com", "");
assert.equal(res.normalized, "http://mail.google.com");
assert.equal(res.url, "http://mail.google.com/");
});

@@ -699,3 +699,3 @@

let res = cup.parse("http://www.google.com/aaa", "");
assert.equal(res.normalized, "http://google.com/aaa");
assert.equal(res.url, "http://www.google.com/aaa");
});

@@ -705,3 +705,3 @@

let res = cup.parse("http://mail.google.com/aaa", "");
assert.equal(res.normalized, "http://mail.google.com/aaa");
assert.equal(res.url, "http://mail.google.com/aaa");
});

@@ -711,3 +711,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query", "");
assert.equal(res.normalized, "http://google.com/aaa?q=query");
assert.equal(res.url, "http://www.google.com/aaa?q=query");
});

@@ -717,3 +717,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query", "");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query");
assert.equal(res.url, "http://mail.google.com/aaa?q=query");
});

@@ -723,3 +723,3 @@

let res = cup.parse("http://www.google.com/aaa?q=query?q1=query1&q2=query2", "");
assert.equal(res.normalized, "http://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -729,3 +729,3 @@

let res = cup.parse("http://mail.google.com/aaa?q=query?q1=query1&q2=query2", "");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -738,3 +738,3 @@ });

let res = cup.parse("https://www.google.com", "");
assert.equal(res.normalized, "https://google.com");
assert.equal(res.url, "https://www.google.com/");
});

@@ -744,3 +744,3 @@

let res = cup.parse("https://mail.google.com", "");
assert.equal(res.normalized, "https://mail.google.com");
assert.equal(res.url, "https://mail.google.com/");
});

@@ -750,3 +750,3 @@

let res = cup.parse("https://www.google.com/aaa", "");
assert.equal(res.normalized, "https://google.com/aaa");
assert.equal(res.url, "https://www.google.com/aaa");
});

@@ -756,3 +756,3 @@

let res = cup.parse("https://mail.google.com/aaa", "");
assert.equal(res.normalized, "https://mail.google.com/aaa");
assert.equal(res.url, "https://mail.google.com/aaa");
});

@@ -762,3 +762,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query", "");
assert.equal(res.normalized, "https://google.com/aaa?q=query");
assert.equal(res.url, "https://www.google.com/aaa?q=query");
});

@@ -768,3 +768,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query", "");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query");
assert.equal(res.url, "https://mail.google.com/aaa?q=query");
});

@@ -774,3 +774,3 @@

let res = cup.parse("https://www.google.com/aaa?q=query?q1=query1&q2=query2", "");
assert.equal(res.normalized, "https://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -780,3 +780,3 @@

let res = cup.parse("https://mail.google.com/aaa?q=query?q1=query1&q2=query2", "");
assert.equal(res.normalized, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "https://mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -832,3 +832,3 @@ });

let res = cup.parse("www.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com");
assert.equal(res.url, "http://www.facebook.com/www.google.com");
});

@@ -838,3 +838,3 @@

let res = cup.parse("mail.google.com", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com");
assert.equal(res.url, "http://www.facebook.com/mail.google.com");
});

@@ -844,3 +844,3 @@

let res = cup.parse("www.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa");
assert.equal(res.url, "http://www.facebook.com/www.google.com/aaa");
});

@@ -850,3 +850,3 @@

let res = cup.parse("mail.google.com/aaa", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa");
assert.equal(res.url, "http://www.facebook.com/mail.google.com/aaa");
});

@@ -856,3 +856,3 @@

let res = cup.parse("www.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa?q=query");
assert.equal(res.url, "http://www.facebook.com/www.google.com/aaa?q=query");
});

@@ -862,3 +862,3 @@

let res = cup.parse("mail.google.com/aaa?q=query", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query");
assert.equal(res.url, "http://www.facebook.com/mail.google.com/aaa?q=query");
});

@@ -868,3 +868,3 @@

let res = cup.parse("www.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "http://google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.facebook.com/www.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -874,3 +874,3 @@

let res = cup.parse("mail.google.com/aaa?q=query?q1=query1&q2=query2", "http://www.facebook.com");
assert.equal(res.normalized, "http://mail.google.com/aaa?q=query?q1=query1&q2=query2");
assert.equal(res.url, "http://www.facebook.com/mail.google.com/aaa?q=query?q1=query1&q2=query2");
});

@@ -887,3 +887,3 @@ });

let res = cup.parse("www.stackoverflow.com/questions.exe");
assert.equal(res.normalized, "http://stackoverflow.com/questions.exe");
assert.equal(res.url, "http://www.stackoverflow.com/questions.exe");
});

@@ -893,3 +893,3 @@

let res = cup.parse("//www.stackoverflow.com/questions.exe");
assert.equal(res.normalized, "http://stackoverflow.com/questions.exe");
assert.equal(res.url, "http://www.stackoverflow.com/questions.exe");
});

@@ -903,3 +903,3 @@ });

//http://www.stackoverflow.com/ddd check parse-test-html-01.html
assert.equal(res.normalized, "http://stackoverflow.com/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/ddd");
});

@@ -910,3 +910,3 @@

//http://www.stackoverflow.com/aaa/bbb/ddd check parse-test-html-01.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/bbb/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/bbb/ddd");
});

@@ -918,3 +918,3 @@

//http://www.stackoverflow.com/aaa/bbb/ddd check parse-test-html-01.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/bbb/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/bbb/ddd");
});

@@ -926,3 +926,3 @@

//http://www.stackoverflow.com/aaa/ddd check parse-test-html-01.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/ddd");
});

@@ -934,3 +934,3 @@

//http://www.stackoverflow.com/ddd check parse-test-html-02.html
assert.equal(res.normalized, "http://stackoverflow.com/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/ddd");
});

@@ -941,3 +941,3 @@

//http://www.stackoverflow.com/aaa/bbb/ccc/ddd check parse-test-html-02.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/bbb/ccc/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/bbb/ccc/ddd");
});

@@ -949,3 +949,3 @@

//http://www.stackoverflow.com/aaa/bbb/ccc/ddd check parse-test-html-02.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/bbb/ccc/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/bbb/ccc/ddd");
});

@@ -957,5 +957,5 @@

//http://www.stackoverflow.com/aaa/bbb/ddd check parse-test-html-02.html
assert.equal(res.normalized, "http://stackoverflow.com/aaa/bbb/ddd");
assert.equal(res.url, "http://www.stackoverflow.com/aaa/bbb/ddd");
});
});

@@ -6,2 +6,25 @@ const assert = require('assert');

it('should gettype sublevel urls', function() {
let res = cup.gettype("//sub.domain.com/aaa/bbb/","//sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"uplevel");
});
it('should gettype uplevel urls', function() {
let res = cup.gettype("//sub.domain.com/aaa/bbb/ccc/ddd","//sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"sublevel");
});
it('should gettype samelevel urls', function() {
let res = cup.gettype("//sub.domain.com/aaa/bbb/eee","//sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"samelevel");;
});
it('should handle invalid urls', function() {
let res = cup.gettype("//sub.domain.com/aaa/bbb/eee","//sub.anotherdomain.com/aaa/bbb/ccc");
assert.equal(res,"external");
});
});
describe('gettype url without protocol as samelevel, sublevel, uplevel', function() {
it('should gettype sublevel urls', function() {
let res = cup.gettype("sub.domain.com/aaa/bbb/","sub.domain.com/aaa/bbb/ccc");

@@ -8,0 +31,0 @@ assert.equal(res,"uplevel");

@@ -13,3 +13,3 @@ const assert = require('assert');

let result = cup.extract(htmlString,"http://journals.tubitak.gov.tr/");
//todo ///
let suplevelArr = result.filter((el, index, arr) => el.type == "sublevel");

@@ -22,7 +22,7 @@ let uplevelArr = result.filter((el, index, arr) => el.type == "uplevel");

let externalArr = result.filter((el, index, arr) => el.type == "external");
assert.equal(result.length,33);
assert.equal(suplevelArr.length+uplevelArr.length+samelevelArr.length+internalArr.length+subdomainArr.length+updomainArr.length+externalArr.length,33);
assert.equal(result.length,34);
assert.equal(suplevelArr.length+uplevelArr.length+samelevelArr.length+internalArr.length+subdomainArr.length+updomainArr.length+externalArr.length,34);
assert.equal(suplevelArr.length,12);
assert.equal(uplevelArr.length,0);
assert.equal(samelevelArr.length,0);
assert.equal(samelevelArr.length,1);
assert.equal(internalArr.length,13);

@@ -29,0 +29,0 @@ assert.equal(subdomainArr.length,2);

@@ -11,3 +11,2 @@ const assert = require('assert');

assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.path,"/aaa/bbb/ddd");

@@ -25,4 +24,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.question.stackoverflow.com");
assert.equal(result.path,"/aaa/bbb/ddd");

@@ -32,3 +30,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,"question");
assert.equal(result.subdomain,"www.question");
assert.equal(result.url,"http://www.question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");

@@ -41,9 +39,8 @@ });

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.question.stackoverflow.com");
assert.equal(result.path,"/aaa/bbb/ddd");
assert.equal(result.protocol,"http:");
assert.equal(result.querycount,2);
assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,"question");
assert.equal(result.search,"?q1=query1&q2=query2/");
assert.equal(result.subdomain,"www.question");
assert.equal(result.url,"http://www.question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2/");

@@ -56,9 +53,8 @@ });

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"https://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.question.stackoverflow.com");
assert.equal(result.path,"/aaa/bbb/ddd");
assert.equal(result.protocol,"https:");
assert.equal(result.querycount,2);
assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,"question");
assert.equal(result.search,"?q1=query1&q2=query2/");
assert.equal(result.subdomain,"www.question");
assert.equal(result.url,"https://www.question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2/");

@@ -76,3 +72,2 @@ });

assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.path,"/aaa/bbb/ddd");

@@ -90,4 +85,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.question.stackoverflow.com");
assert.equal(result.path,"/aaa/bbb/ddd");

@@ -97,3 +91,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,"question");
assert.equal(result.subdomain,"www.question");
assert.equal(result.url,"http://www.question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");

@@ -106,4 +100,3 @@ });

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"stackoverflow.com");
assert.equal(result.normalized,"http://stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.stackoverflow.com");
assert.equal(result.path,"/aaa/bbb/ddd");

@@ -113,3 +106,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,null);
assert.equal(result.subdomain,"www");
assert.equal(result.url,"http://www.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");

@@ -122,4 +115,3 @@ });

assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"stackoverflow.com");
assert.equal(result.normalized,"http://stackoverflow.com/aaa/ddd?q1=query1&q2=query2");
assert.equal(result.host,"www.stackoverflow.com");
assert.equal(result.path,"/aaa/ddd");

@@ -129,3 +121,3 @@ assert.equal(result.protocol,"http:");

assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,null);
assert.equal(result.subdomain,"www");
assert.equal(result.url,"http://www.stackoverflow.com/aaa/ddd?q1=query1&q2=query2");

@@ -132,0 +124,0 @@ });

@@ -12,3 +12,3 @@ const assert = require('assert');

let res = cup.parse("#start-of-content", url);
assert.equal(res.normalized, "https://github.com/Microsoft");
assert.equal(res.url, "https://github.com/Microsoft");

@@ -15,0 +15,0 @@ });

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc