crawler-url-parser
Advanced tools
Comparing version 2.0.4 to 2.0.5
{ | ||
// Use IntelliSense to learn about possible attributes. | ||
// Hover to view descriptions of existing attributes. | ||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch Program", | ||
"program": "${workspaceFolder}\\crawler-url-parser.js" | ||
}, | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch QuickStart", | ||
"program": "${workspaceFolder}\\QUICKSTART.js" | ||
}, | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch Test", | ||
"program": "${workspaceFolder}\\node_modules\\mocha\\bin\\_mocha", | ||
"args": ["--recursive"], | ||
"cwd": "${workspaceRoot}" | ||
} | ||
] | ||
} | ||
// Use IntelliSense to learn about possible attributes. | ||
// Hover to view descriptions of existing attributes. | ||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch Program", | ||
"program": "${workspaceFolder}\\crawler-url-parser.js" | ||
}, | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch QuickStart", | ||
"program": "${workspaceFolder}\\quickstart.js" | ||
}, | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch Test", | ||
"program": "${workspaceFolder}\\node_modules\\mocha\\bin\\_mocha", | ||
"args": [ | ||
"--recursive" | ||
], | ||
"cwd": "${workspaceRoot}" | ||
} | ||
] | ||
} |
const URL = require('url'); | ||
const psl = require('psl'); | ||
const cleanUrl = require('url-clean'); | ||
const cheerio = require('cheerio'); | ||
@@ -38,9 +37,8 @@ | ||
currentUrlStr = currentUrlStr.replace(/#.*$/, ''); | ||
if(baseUrlStr) { | ||
if (baseUrlStr) { | ||
baseUrlStr = baseUrlStr.replace(/^\/\//, 'http://'); | ||
baseUrlStr = baseUrlStr.replace(/#.*$/, ''); | ||
} | ||
else { | ||
if ( ! /^\.*\/|^(?!localhost)\w+:/.test(currentUrlStr)){ | ||
} else { | ||
if (!/^\.*\/|^(?!localhost)\w+:/.test(currentUrlStr)) { | ||
currentUrlStr = currentUrlStr.replace(/^(?!(?:\w+:)?\/\/)/, 'http://'); | ||
@@ -51,3 +49,3 @@ } | ||
let parsedUrl = URL.parse(currentUrlStr, true, true); | ||
delete parsedUrl.hash ; | ||
delete parsedUrl.hash; | ||
@@ -59,3 +57,3 @@ | ||
if (parsedUrl.host == null && baseUrlStr) { | ||
let parsedBaseUrl = URL.parse(baseUrlStr, true,true); | ||
let parsedBaseUrl = URL.parse(baseUrlStr, true, true); | ||
delete parsedUrl.hash; | ||
@@ -151,4 +149,4 @@ ret.baseurl = URL.format(parsedBaseUrl); | ||
let pageurl_path = pageurl.path ? pageurl.path : ""; | ||
linkurl_path = linkurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/'); | ||
pageurl_path = pageurl_path.replace(/\/index\.[a-z]+$/,'/').replace(/\/default\.[a-z]+$/,'/'); | ||
linkurl_path = linkurl_path.replace(/\/index\.[a-z]+$/, '/').replace(/\/default\.[a-z]+$/, '/'); | ||
pageurl_path = pageurl_path.replace(/\/index\.[a-z]+$/, '/').replace(/\/default\.[a-z]+$/, '/'); | ||
@@ -155,0 +153,0 @@ let linkurl_parts = linkurl_path.split("/").filter(function (elem, index, array) { |
{ | ||
"name": "crawler-url-parser", | ||
"version": "2.0.4", | ||
"version": "2.0.5", | ||
"description": "An `URL` parser for crawling purpose.", | ||
@@ -39,4 +39,3 @@ "main": "crawler-url-parser.js", | ||
"psl": "^1.1.20", | ||
"url": "^0.11.0", | ||
"url-clean": "1.0.2" | ||
"url": "^0.11.0" | ||
}, | ||
@@ -43,0 +42,0 @@ "devDependencies": { |
# crawler-url-parser | ||
> **An URL parser for crawling purpose** | ||
![logo](https://assets.gitlab-static.net/uploads/-/system/project/avatar/4809017/crawler-url-parser.png) | ||
**An URL parser for crawling purpose** | ||
@@ -149,6 +148,24 @@ [![version](https://img.shields.io/npm/v/crawler-url-parser.svg)](https://www.npmjs.org/package/crawler-url-parser) | ||
## Test | ||
`mocha` or `npm test` | ||
> more than 200 unit test cases. | ||
> check test folder and QUICKSTART.js for extra usage. | ||
* `mocha` or `npm test` | ||
* More than 200 unit test cases. | ||
* Check [test folder](https://gitlab.com/autokent/crawler-url-parser/tree/master/test) and [quickstart.js](https://gitlab.com/autokent/crawler-url-parser/blob/master/quickstart.js) for extra usages. | ||
## Support | ||
I use this package actively myself, so it has my top priority. You can chat on WhatsApp about any infos, ideas and suggestions. | ||
[![WhatsApp](https://img.shields.io/badge/style-chat-green.svg?style=flat&label=whatsapp)](https://api.whatsapp.com/send?phone=905063042480&text=Hi%2C%0ALet%27s%20talk%20about%20crawler-url-parser) | ||
### Submitting an Issue | ||
If you find a bug or a mistake, you can help by submitting an issue to [GitLab Repository](https://gitlab.com/autokent/crawler-url-parser/issues) | ||
### Creating a Merge Request | ||
GitLab calls it merge request instead of pull request. | ||
* [A Guide for First-Timers](https://about.gitlab.com/2016/06/16/fearless-contribution-a-guide-for-first-timers/) | ||
* [How to create a merge request](https://docs.gitlab.com/ee/gitlab-basics/add-merge-request.html) | ||
* Check [Contributing Guide](https://gitlab.com/autokent/crawler-url-parser/blob/master/CONTRIBUTING.md) | ||
## License | ||
[MIT licensed](https://gitlab.com/autokent/crawler-url-parser/blob/master/LICENSE) and all it's dependencies are MIT or BSD licensed. |
Sorry, the diff of this file is not supported yet
94664
3
1214
170
- Removedurl-clean@1.0.2
- Removeddecode-uri-component@0.2.2(transitive)
- Removedobject-assign@4.1.1(transitive)
- Removedpunycode@2.3.1(transitive)
- Removedquery-string@5.1.1(transitive)
- Removedstrict-uri-encode@1.1.0(transitive)
- Removedurl-clean@1.0.2(transitive)