@qualweb/crawler
Advanced tools
Comparing version 0.1.1 to 0.2.0
@@ -6,9 +6,15 @@ 'use strict'; | ||
const simplecrawler_1 = __importDefault(require("simplecrawler")); | ||
const log_update_1 = __importDefault(require("log-update")); | ||
const iohook_1 = __importDefault(require("iohook")); | ||
class Crawl { | ||
constructor(domain) { | ||
this.frames = ['-', '\\', '|', '/']; | ||
this.i = 0; | ||
this.urls = new Array(); | ||
this.crawler = new simplecrawler_1.default(domain); | ||
this.crawledURLS = 0; | ||
} | ||
async start(options) { | ||
return new Promise(resolve => { | ||
console.log('Starting crawler... Press CTRL+X to stop the crawling process at any time'); | ||
if (options) { | ||
@@ -19,2 +25,6 @@ this.crawler.maxConcurrency = 100; | ||
} | ||
let interval = setInterval(() => { | ||
const frame = this.frames[this.i = ++this.i % this.frames.length]; | ||
log_update_1.default('Crawled ' + this.crawledURLS + ' pages ' + `${frame}`); | ||
}, 100); | ||
this.crawler.on('fetchcomplete', (item) => { | ||
@@ -25,8 +35,19 @@ if (item && item['stateData'] && item['stateData']['contentType'] && | ||
this.urls.push(item.url); | ||
const frame = this.frames[this.i = ++this.i % this.frames.length]; | ||
log_update_1.default('Crawled ' + this.crawledURLS++ + ' pages ' + `${frame}`); | ||
} | ||
}); | ||
this.crawler.on('complete', () => { | ||
clearInterval(interval); | ||
this.stop(); | ||
resolve(); | ||
console.log('\nCrawler done!'); | ||
}); | ||
iohook_1.default.on('keydown', event => { | ||
if (event && event.ctrlKey && event.keycode === 45) { | ||
this.crawler.emit('complete'); | ||
iohook_1.default.stop(); | ||
} | ||
}); | ||
iohook_1.default.start(); | ||
this.crawler.start(); | ||
@@ -33,0 +54,0 @@ }); |
{ | ||
"name": "@qualweb/crawler", | ||
"version": "0.1.1", | ||
"version": "0.2.0", | ||
"description": "Webpage crawler for qualweb", | ||
@@ -31,2 +31,4 @@ "main": "dist/index.js", | ||
"dependencies": { | ||
"iohook": "^0.6.5", | ||
"log-update": "^4.0.0", | ||
"simplecrawler": "^1.1.8" | ||
@@ -33,0 +35,0 @@ }, |
'use strict'; | ||
import Crawler from 'simplecrawler'; | ||
import logUpdate from 'log-update'; | ||
import ioHook from 'iohook'; | ||
@@ -9,2 +11,5 @@ class Crawl { | ||
private crawler: Crawler; | ||
private crawledURLS: number; | ||
private frames = ['-', '\\', '|', '/']; | ||
private i = 0; | ||
@@ -14,2 +19,3 @@ constructor(domain: string) { | ||
this.crawler = new Crawler(domain); | ||
this.crawledURLS = 0; | ||
} | ||
@@ -19,2 +25,3 @@ | ||
return new Promise(resolve => { | ||
console.log('Starting crawler... Press CTRL+X to stop the crawling process at any time'); | ||
if (options) { | ||
@@ -26,2 +33,7 @@ this.crawler.maxConcurrency = 100; | ||
let interval = setInterval(() => { | ||
const frame = this.frames[this.i = ++this.i % this.frames.length]; | ||
logUpdate('Crawled ' + this.crawledURLS + ' pages ' + `${frame}` ); | ||
}, 100); | ||
this.crawler.on('fetchcomplete', (item: any) => { | ||
@@ -32,10 +44,23 @@ if (item && item['stateData'] && item['stateData']['contentType'] && | ||
this.urls.push(item.url); | ||
const frame = this.frames[this.i = ++this.i % this.frames.length]; | ||
logUpdate('Crawled ' + this.crawledURLS++ + ' pages ' + `${frame}`); | ||
} | ||
}); | ||
this.crawler.on('complete', () => { | ||
clearInterval(interval); | ||
this.stop(); | ||
resolve(); | ||
console.log('\nCrawler done!'); | ||
}); | ||
ioHook.on('keydown', event => { | ||
if (event && event.ctrlKey && event.keycode === 45) { | ||
this.crawler.emit('complete'); | ||
ioHook.stop(); | ||
} | ||
}); | ||
ioHook.start(); | ||
this.crawler.start(); | ||
@@ -42,0 +67,0 @@ }); |
@@ -7,3 +7,3 @@ const Crawl = require('../dist/index'); | ||
this.timeout(10 * 100000); | ||
const crawler = new Crawl('https://lodash.com'); | ||
const crawler = new Crawl('https://ciencias.ulisboa.pt'); | ||
await crawler.start(); | ||
@@ -10,0 +10,0 @@ const urls = crawler.getResults(); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
13094
199
3
+ Addediohook@^0.6.5
+ Addedlog-update@^4.0.0
+ Addedajv@6.12.6(transitive)
+ Addedansi-escapes@4.3.2(transitive)
+ Addedansi-regex@2.1.15.0.1(transitive)
+ Addedansi-styles@4.3.0(transitive)
+ Addedasn1@0.2.6(transitive)
+ Addedassert-plus@1.0.0(transitive)
+ Addedastral-regex@2.0.0(transitive)
+ Addedasynckit@0.4.0(transitive)
+ Addedaws-sign2@0.7.0(transitive)
+ Addedaws4@1.13.2(transitive)
+ Addedbcrypt-pbkdf@1.0.2(transitive)
+ Addedbindings@1.5.0(transitive)
+ Addedbl@1.2.3(transitive)
+ Addedbuffer-alloc@1.2.0(transitive)
+ Addedbuffer-alloc-unsafe@1.1.0(transitive)
+ Addedbuffer-fill@1.0.0(transitive)
+ Addedcaseless@0.12.0(transitive)
+ Addedchownr@1.1.4(transitive)
+ Addedcli-cursor@3.1.0(transitive)
+ Addedcode-point-at@1.1.0(transitive)
+ Addedcolor-convert@2.0.1(transitive)
+ Addedcolor-name@1.1.4(transitive)
+ Addedcombined-stream@1.0.8(transitive)
+ Addedcore-util-is@1.0.21.0.3(transitive)
+ Addeddashdash@1.14.1(transitive)
+ Addeddebug@2.6.9(transitive)
+ Addeddeep-extend@0.6.0(transitive)
+ Addeddelayed-stream@1.0.0(transitive)
+ Addedecc-jsbn@0.1.2(transitive)
+ Addedemoji-regex@8.0.0(transitive)
+ Addedend-of-stream@1.4.4(transitive)
+ Addedextend@3.0.2(transitive)
+ Addedextsprintf@1.3.0(transitive)
+ Addedfast-deep-equal@3.1.3(transitive)
+ Addedfast-json-stable-stringify@2.1.0(transitive)
+ Addedfile-uri-to-path@1.0.0(transitive)
+ Addedforever-agent@0.6.1(transitive)
+ Addedform-data@2.3.3(transitive)
+ Addedfs-constants@1.0.0(transitive)
+ Addedgetpass@0.1.7(transitive)
+ Addedhar-schema@2.0.0(transitive)
+ Addedhar-validator@5.1.5(transitive)
+ Addedhttp-signature@1.2.0(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedini@1.3.8(transitive)
+ Addediohook@0.6.6(transitive)
+ Addedis-fullwidth-code-point@1.0.03.0.0(transitive)
+ Addedis-typedarray@1.0.0(transitive)
+ Addedisarray@0.0.11.0.0(transitive)
+ Addedisstream@0.1.2(transitive)
+ Addedjsbn@0.1.1(transitive)
+ Addedjson-schema@0.4.0(transitive)
+ Addedjson-schema-traverse@0.4.1(transitive)
+ Addedjson-stringify-safe@5.0.1(transitive)
+ Addedjsprim@1.4.2(transitive)
+ Addedlog-update@4.0.0(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@2.1.35(transitive)
+ Addedmimic-fn@2.1.0(transitive)
+ Addedminimist@1.2.8(transitive)
+ Addedmkdirp@0.5.6(transitive)
+ Addedms@2.0.0(transitive)
+ Addednode-abi@2.30.1(transitive)
+ Addednugget@2.2.0(transitive)
+ Addednumber-is-nan@1.0.1(transitive)
+ Addedoauth-sign@0.9.0(transitive)
+ Addedobject-keys@0.4.0(transitive)
+ Addedonce@1.4.0(transitive)
+ Addedonetime@5.1.2(transitive)
+ Addedperformance-now@2.1.0(transitive)
+ Addedpretty-bytes@4.0.2(transitive)
+ Addedprocess-nextick-args@2.0.1(transitive)
+ Addedprogress-stream@1.2.0(transitive)
+ Addedpsl@1.9.0(transitive)
+ Addedpump@1.0.3(transitive)
+ Addedpunycode@2.3.1(transitive)
+ Addedqs@6.5.3(transitive)
+ Addedrc@1.2.8(transitive)
+ Addedreadable-stream@1.1.142.3.8(transitive)
+ Addedrequest@2.88.2(transitive)
+ Addedrestore-cursor@3.1.0(transitive)
+ Addedsafe-buffer@5.1.25.2.1(transitive)
+ Addedsemver@5.7.2(transitive)
+ Addedsignal-exit@3.0.7(transitive)
+ Addedsingle-line-log@1.1.2(transitive)
+ Addedslice-ansi@4.0.0(transitive)
+ Addedspeedometer@0.1.4(transitive)
+ Addedsshpk@1.18.0(transitive)
+ Addedstring-width@1.0.24.2.3(transitive)
+ Addedstring_decoder@0.10.311.1.1(transitive)
+ Addedstrip-ansi@3.0.16.0.1(transitive)
+ Addedstrip-json-comments@2.0.1(transitive)
+ Addedtar-fs@1.16.3(transitive)
+ Addedtar-stream@1.6.2(transitive)
+ Addedthrottleit@0.0.2(transitive)
+ Addedthrough2@0.2.3(transitive)
+ Addedto-buffer@1.1.1(transitive)
+ Addedtough-cookie@2.5.0(transitive)
+ Addedtunnel-agent@0.6.0(transitive)
+ Addedtweetnacl@0.14.5(transitive)
+ Addedtype-fest@0.21.3(transitive)
+ Addeduri-js@4.4.1(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addeduuid@3.4.0(transitive)
+ Addedverror@1.10.0(transitive)
+ Addedwrap-ansi@6.2.0(transitive)
+ Addedwrappy@1.0.2(transitive)
+ Addedxtend@2.1.24.0.2(transitive)