@qualweb/crawler
Advanced tools
Comparing version 0.3.4 to 0.3.5
# Changelog | ||
## [0.3.5] - 05/05/2021 | ||
### Removed | ||
- php extension from being excluded during the crawling process | ||
### Updated | ||
- dependencies | ||
## [0.3.4] - 23/04/2021 | ||
@@ -4,0 +14,0 @@ |
@@ -12,2 +12,3 @@ /// <reference types="@qualweb/types" /> | ||
crawl(options?: CrawlOptions): Promise<void>; | ||
private log; | ||
private addUrlsToCrawl; | ||
@@ -14,0 +15,0 @@ private fetchPageLinks; |
@@ -25,7 +25,7 @@ "use strict"; | ||
async crawl(options) { | ||
var _a, _b, _c, _d, _e, _f; | ||
var _a, _b, _c, _d, _e; | ||
const maxDepth = (_a = options === null || options === void 0 ? void 0 : options.maxDepth) !== null && _a !== void 0 ? _a : -1; | ||
const maxUrls = (_b = options === null || options === void 0 ? void 0 : options.maxUrls) !== null && _b !== void 0 ? _b : -1; | ||
const parallel = (_c = options === null || options === void 0 ? void 0 : options.maxParallelCrawls) !== null && _c !== void 0 ? _c : 5; | ||
const timeout = (_d = options === null || options === void 0 ? void 0 : options.timeout) !== null && _d !== void 0 ? _d : -1; | ||
const parallel = (options === null || options === void 0 ? void 0 : options.maxParallelCrawls) || 5; | ||
const timeout = (_c = options === null || options === void 0 ? void 0 : options.timeout) !== null && _c !== void 0 ? _c : -1; | ||
let currentDepth = 0; | ||
@@ -39,3 +39,3 @@ let currentUrlCount = 1; | ||
if (options === null || options === void 0 ? void 0 : options.logging) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
this.log(currentDepth, currentUrlCount, timer); | ||
} | ||
@@ -49,3 +49,3 @@ }, 2000); | ||
if (options === null || options === void 0 ? void 0 : options.logging) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
this.log(currentDepth, currentUrlCount, timer); | ||
} | ||
@@ -60,3 +60,3 @@ const urlsByDepth = {}; | ||
if (options === null || options === void 0 ? void 0 : options.logging) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
this.log(currentDepth, currentUrlCount, timer); | ||
} | ||
@@ -71,3 +71,3 @@ if (maxUrls >= 0 && currentUrlCount >= maxUrls) { | ||
if (options === null || options === void 0 ? void 0 : options.logging) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
this.log(currentDepth, currentUrlCount, timer); | ||
} | ||
@@ -77,3 +77,3 @@ while (!depthCompleted) { | ||
let count = 0; | ||
for (const url of (_e = urlsByDepth[currentDepth - 1]) !== null && _e !== void 0 ? _e : []) { | ||
for (const url of (_d = urlsByDepth[currentDepth - 1]) !== null && _d !== void 0 ? _d : []) { | ||
if (!urlsCrawled[url]) { | ||
@@ -101,3 +101,3 @@ urlsCrawled[url] = true; | ||
if (options === null || options === void 0 ? void 0 : options.logging) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
this.log(currentDepth, currentUrlCount, timer); | ||
} | ||
@@ -116,3 +116,3 @@ if (maxUrls >= 0 && currentUrlCount >= maxUrls) { | ||
} | ||
if (!((_f = urlsByDepth[currentDepth]) === null || _f === void 0 ? void 0 : _f.length)) { | ||
if (!((_e = urlsByDepth[currentDepth]) === null || _e === void 0 ? void 0 : _e.length)) { | ||
continueCrawling = false; | ||
@@ -132,2 +132,5 @@ } | ||
} | ||
log(currentDepth, currentUrlCount, timer) { | ||
log_update_1.default(`Domain: ${this.domain} Current depth: ${currentDepth} Urls found: ${currentUrlCount} Time passed: ${timer} seconds`); | ||
} | ||
addUrlsToCrawl(urlsCrawled, urls) { | ||
@@ -151,3 +154,3 @@ for (const url of urls !== null && urls !== void 0 ? urls : []) { | ||
urls = await page.evaluate((domain) => { | ||
const notHtml = 'css|jpg|jpeg|gif|svg|pdf|docx|js|png|ico|xml|mp4|mp3|mkv|wav|rss|php|json|pptx|txt'.split('|'); | ||
const notHtml = 'css|jpg|jpeg|gif|svg|pdf|docx|js|png|ico|xml|mp4|mp3|mkv|wav|rss|json|pptx|txt'.split('|'); | ||
const links = document.querySelectorAll('body a'); | ||
@@ -154,0 +157,0 @@ const urls = new Array(); |
{ | ||
"name": "@qualweb/crawler", | ||
"version": "0.3.4", | ||
"version": "0.3.5", | ||
"description": "Webpage crawler for qualweb", | ||
@@ -43,17 +43,17 @@ "main": "dist/index.js", | ||
"devDependencies": { | ||
"@qualweb/types": "^0.5.13", | ||
"@qualweb/types": "^0.6.3", | ||
"@tsconfig/recommended": "^1.0.1", | ||
"@types/node": "^14.14.37", | ||
"@types/node": "^15.0.2", | ||
"@types/puppeteer": "^5.4.3", | ||
"@typescript-eslint/eslint-plugin": "^4.22.0", | ||
"@typescript-eslint/parser": "^4.22.0", | ||
"@typescript-eslint/eslint-plugin": "^4.22.1", | ||
"@typescript-eslint/parser": "^4.22.1", | ||
"chai": "^4.3.4", | ||
"eslint": "^7.24.0", | ||
"eslint-config-prettier": "^8.2.0", | ||
"eslint": "^7.25.0", | ||
"eslint-config-prettier": "^8.3.0", | ||
"eslint-plugin-prettier": "^3.3.1", | ||
"eslint-plugin-sonarjs": "^0.6.0", | ||
"eslint-plugin-sonarjs": "^0.7.0", | ||
"esm": "^3.2.25", | ||
"mocha": "^8.3.2", | ||
"prettier": "^2.2.1", | ||
"puppeteer": "^8.0.0", | ||
"puppeteer": "^9.1.1", | ||
"rimraf": "^3.0.2", | ||
@@ -60,0 +60,0 @@ "typescript": "^4.2.4" |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
247
23523