Socket
Socket
Sign inDemoInstall

crawler-url-parser

Package Overview
Dependencies
32
Maintainers
1
Versions
21
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 2.0.4 to 2.0.5

56

.vscode/launch.json
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}\\crawler-url-parser.js"
},
{
"type": "node",
"request": "launch",
"name": "Launch QuickStart",
"program": "${workspaceFolder}\\QUICKSTART.js"
},
{
"type": "node",
"request": "launch",
"name": "Launch Test",
"program": "${workspaceFolder}\\node_modules\\mocha\\bin\\_mocha",
"args": ["--recursive"],
"cwd": "${workspaceRoot}"
}
]
}
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}\\crawler-url-parser.js"
},
{
"type": "node",
"request": "launch",
"name": "Launch QuickStart",
"program": "${workspaceFolder}\\quickstart.js"
},
{
"type": "node",
"request": "launch",
"name": "Launch Test",
"program": "${workspaceFolder}\\node_modules\\mocha\\bin\\_mocha",
"args": [
"--recursive"
],
"cwd": "${workspaceRoot}"
}
]
}
const URL = require('url');
const psl = require('psl');
const cleanUrl = require('url-clean');
const cheerio = require('cheerio');

@@ -38,9 +37,8 @@

currentUrlStr = currentUrlStr.replace(/#.*$/, '');
if(baseUrlStr) {
if (baseUrlStr) {
baseUrlStr = baseUrlStr.replace(/^\/\//, 'http://');
baseUrlStr = baseUrlStr.replace(/#.*$/, '');
}
else {
if ( ! /^\.*\/|^(?!localhost)\w+:/.test(currentUrlStr)){
} else {
if (!/^\.*\/|^(?!localhost)\w+:/.test(currentUrlStr)) {
currentUrlStr = currentUrlStr.replace(/^(?!(?:\w+:)?\/\/)/, 'http://');

@@ -51,3 +49,3 @@ }

let parsedUrl = URL.parse(currentUrlStr, true, true);
delete parsedUrl.hash ;
delete parsedUrl.hash;

@@ -59,3 +57,3 @@

if (parsedUrl.host == null && baseUrlStr) {
let parsedBaseUrl = URL.parse(baseUrlStr, true,true);
let parsedBaseUrl = URL.parse(baseUrlStr, true, true);
delete parsedUrl.hash;

@@ -151,4 +149,4 @@ ret.baseurl = URL.format(parsedBaseUrl);

let pageurl_path = pageurl.path ? pageurl.path : "";
linkurl_path = linkurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/');
pageurl_path = pageurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/');
linkurl_path = linkurl_path.replace(/\/index\.[a-z]+$/, '/').replace(/\/default\.[a-z]+$/, '/');
pageurl_path = pageurl_path.replace(/\/index\.[a-z]+$/, '/').replace(/\/default\.[a-z]+$/, '/');

@@ -155,0 +153,0 @@ let linkurl_parts = linkurl_path.split("/").filter(function (elem, index, array) {

{
"name": "crawler-url-parser",
"version": "2.0.4",
"version": "2.0.5",
"description": "An `URL` parser for crawling purpose.",

@@ -39,4 +39,3 @@ "main": "crawler-url-parser.js",

"psl": "^1.1.20",
"url": "^0.11.0",
"url-clean": "1.0.2"
"url": "^0.11.0"
},

@@ -43,0 +42,0 @@ "devDependencies": {

# crawler-url-parser
> **An URL parser for crawling purpose**
![logo](https://assets.gitlab-static.net/uploads/-/system/project/avatar/4809017/crawler-url-parser.png)
**An URL parser for crawling purpose**

@@ -149,6 +148,24 @@ [![version](https://img.shields.io/npm/v/crawler-url-parser.svg)](https://www.npmjs.org/package/crawler-url-parser)

## Test
`mocha` or `npm test`
> more than 200 unit test cases.
> check test folder and QUICKSTART.js for extra usage.
* `mocha` or `npm test`
* More than 200 unit test cases.
* Check [test folder](https://gitlab.com/autokent/crawler-url-parser/tree/master/test) and [quickstart.js](https://gitlab.com/autokent/crawler-url-parser/blob/master/quickstart.js) for extra usages.
## Support
I use this package actively myself, so it has my top priority. You can chat on WhatsApp about any infos, ideas and suggestions.
[![WhatsApp](https://img.shields.io/badge/style-chat-green.svg?style=flat&label=whatsapp)](https://api.whatsapp.com/send?phone=905063042480&text=Hi%2C%0ALet%27s%20talk%20about%20crawler-url-parser)
### Submitting an Issue
If you find a bug or a mistake, you can help by submitting an issue to [GitLab Repository](https://gitlab.com/autokent/crawler-url-parser/issues)
### Creating a Merge Request
GitLab calls it merge request instead of pull request.
* [A Guide for First-Timers](https://about.gitlab.com/2016/06/16/fearless-contribution-a-guide-for-first-timers/)
* [How to create a merge request](https://docs.gitlab.com/ee/gitlab-basics/add-merge-request.html)
* Check [Contributing Guide](https://gitlab.com/autokent/crawler-url-parser/blob/master/CONTRIBUTING.md)
## License
[MIT licensed](https://gitlab.com/autokent/crawler-url-parser/blob/master/LICENSE) and all it's dependencies are MIT or BSD licensed.

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc