crawler-url-parser
Advanced tools
Comparing version 1.2.1 to 1.2.2
@@ -85,3 +85,3 @@ const url = require('url'); | ||
if(!urlMap.has(currentUrl.normalized)){ | ||
urlMap.add(currentUrl.normalized,{url:currentUrl,text:text}); | ||
urlMap.set(currentUrl.normalized,{url:currentUrl,text:text}); | ||
} | ||
@@ -88,0 +88,0 @@ else{ |
{ | ||
"name": "crawler-url-parser", | ||
"version": "1.2.1", | ||
"version": "1.2.2", | ||
"description": "An `URL` parser for crawling purpose.", | ||
"main": "crawler-url-parser.js", | ||
"keywords":[ | ||
"keywords": [ | ||
"crawler-url-parser", | ||
@@ -28,3 +28,6 @@ "url-parser", | ||
"trim", | ||
"crawling" | ||
"crawling", | ||
"subdomain", | ||
"external", | ||
"internal" | ||
], | ||
@@ -46,6 +49,6 @@ "dependencies": { | ||
}, | ||
"homepage":"https://gitlab.com/mehmet.kozan/crawler-url-parser", | ||
"bugs" :{ | ||
"url" : "https://gitlab.com/mehmet.kozan/crawler-url-parser/issues", | ||
"email" : "mehmet.kozan@live.com" | ||
"homepage": "https://gitlab.com/mehmet.kozan/crawler-url-parser", | ||
"bugs": { | ||
"url": "https://gitlab.com/mehmet.kozan/crawler-url-parser/issues", | ||
"email": "mehmet.kozan@live.com" | ||
}, | ||
@@ -52,0 +55,0 @@ "repository": { |
@@ -6,2 +6,51 @@ # crawler-url-parser | ||
## crawler-url-parser | ||
**An `URL` parser for crawling purpose.** | ||
## Installation | ||
`npm install crawler-url-parser` | ||
## Usage | ||
### Parse | ||
```js | ||
const cup = require('crawler-url-parser'); | ||
let url = cup.parse("../ddd","http://question.stackoverflow.com/aaa/bbb/ccc/"); | ||
console.log(url.normalized); | ||
console.log(url.host); | ||
console.log(url.domain); | ||
console.log(url.subdomain); | ||
console.log(url.protocol); | ||
console.log(url.path); | ||
``` | ||
### Extract | ||
```js | ||
const cup = require('crawler-url-parser'); | ||
let htmlStr= | ||
'html> \ | ||
<body> \ | ||
<a href="http://www.stackoverflow.com/internal-1">test-link-4</a><br /> \ | ||
<a href="http://www.stackoverflow.com/internal-2">test-link-5</a><br /> \ | ||
<a href="http://www.stackoverflow.com/internal-2">test-link-6</a><br /> \ | ||
<a href="http://faq.stackoverflow.com/subdomain-1">test-link-7</a><br /> \ | ||
<a href="http://faq.stackoverflow.com/subdomain-2">test-link-8</a><br /> \ | ||
<a href="http://faq.stackoverflow.com/subdomain-2">test-link-9</a><br /> \ | ||
<a href="http://www.google.com/external-1">test-link-10</a><br /> \ | ||
<a href="http://www.google.com/external-2">test-link-11</a><br /> \ | ||
<a href="http://www.google.com/external-2">test-link-12</a><br /> \ | ||
</body> \ | ||
</html>'; | ||
let currentUrl= "http://www.stackoverflow.com/aaa/bbb/ccc"; | ||
let result = cup.extract(htmlStr,currentUrl); | ||
console.log(result.length); | ||
``` | ||
## Test | ||
* check test folder extra usage. | ||
`mocha` | ||
or | ||
`npm test` | ||
## API |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
49352
11
817
56
1