scrape-emails
Advanced tools
Comparing version 1.0.0 to 1.0.1
{ | ||
"name": "scrape-emails", | ||
"version": "1.0.0", | ||
"version": "1.0.1", | ||
"description": "Scrape emails from whole rendered website with puppeteer", | ||
@@ -20,2 +20,3 @@ "main": "src/cli.js", | ||
"husky": "^0.14.3", | ||
"lint-staged": "^6.1.0", | ||
"prettier": "^1.10.2" | ||
@@ -22,0 +23,0 @@ }, |
#!/usr/bin/env node | ||
const url = require("url") | ||
const url = require("url"); | ||
const Scraper = require("./scraper"); | ||
@@ -4,0 +4,0 @@ |
@@ -20,14 +20,4 @@ // Dependencies | ||
this._batchJobs = new BatchJobs(this._options.concurrency); | ||
this._lastPageLoad = 0; | ||
} | ||
_parseUrl(link) { | ||
const parse = url.parse(link); | ||
return { | ||
uri: `${parse.protocol}//${parse.host}`, | ||
pathname: parse.pathname | ||
}; | ||
} | ||
_waitForBatchJobs() { | ||
@@ -51,5 +41,5 @@ return new Promise(resolve => { | ||
async _fetchUrl(pathname, callback) { | ||
async _fetchUrl(link, callback) { | ||
const page = await this._browser.newPage(); | ||
await page.goto(this._uri + pathname); | ||
await page.goto(link); | ||
await page.waitFor(this._options.waitForPageLoad); | ||
@@ -59,2 +49,4 @@ | ||
return { | ||
origin: window.location.origin, | ||
html: document.documentElement.outerHTML, | ||
mailto: [].slice | ||
@@ -65,3 +57,2 @@ .call(document.querySelectorAll('a[href^="mailto:"]')) | ||
}), | ||
html: document.documentElement.outerHTML, | ||
links: Array.from(document.getElementsByTagName("a")) | ||
@@ -87,3 +78,5 @@ .filter(element => { | ||
this._links.add(link); | ||
this._batchJobs.push(done => this._fetchUrl(link, done)); | ||
this._batchJobs.push(done => | ||
this._fetchUrl(url.resolve(data.origin, link), done) | ||
); | ||
} | ||
@@ -99,11 +92,6 @@ }); | ||
async scrape(link) { | ||
const { uri, pathname } = this._parseUrl(link); | ||
this._uri = uri; | ||
this._links.add(pathname); | ||
await this._initBrowser(); | ||
const result = this._waitForBatchJobs(); | ||
this._batchJobs.push(done => this._fetchUrl(pathname, done)); | ||
this._batchJobs.push(done => this._fetchUrl(link, done)); | ||
this._batchJobs.start(); | ||
@@ -110,0 +98,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
6378
7
132