Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

scrape-emails

Package Overview
Dependencies
Maintainers
1
Versions
3
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

scrape-emails - npm Package Compare versions

Comparing version 1.0.0 to 1.0.1

3

package.json
{
"name": "scrape-emails",
"version": "1.0.0",
"version": "1.0.1",
"description": "Scrape emails from whole rendered website with puppeteer",

@@ -20,2 +20,3 @@ "main": "src/cli.js",

"husky": "^0.14.3",
"lint-staged": "^6.1.0",
"prettier": "^1.10.2"

@@ -22,0 +23,0 @@ },

#!/usr/bin/env node
const url = require("url")
const url = require("url");
const Scraper = require("./scraper");

@@ -4,0 +4,0 @@

@@ -20,14 +20,4 @@ // Dependencies

this._batchJobs = new BatchJobs(this._options.concurrency);
this._lastPageLoad = 0;
}
_parseUrl(link) {
const parse = url.parse(link);
return {
uri: `${parse.protocol}//${parse.host}`,
pathname: parse.pathname
};
}
_waitForBatchJobs() {

@@ -51,5 +41,5 @@ return new Promise(resolve => {

async _fetchUrl(pathname, callback) {
async _fetchUrl(link, callback) {
const page = await this._browser.newPage();
await page.goto(this._uri + pathname);
await page.goto(link);
await page.waitFor(this._options.waitForPageLoad);

@@ -59,2 +49,4 @@

return {
origin: window.location.origin,
html: document.documentElement.outerHTML,
mailto: [].slice

@@ -65,3 +57,2 @@ .call(document.querySelectorAll('a[href^="mailto:"]'))

}),
html: document.documentElement.outerHTML,
links: Array.from(document.getElementsByTagName("a"))

@@ -87,3 +78,5 @@ .filter(element => {

this._links.add(link);
this._batchJobs.push(done => this._fetchUrl(link, done));
this._batchJobs.push(done =>
this._fetchUrl(url.resolve(data.origin, link), done)
);
}

@@ -99,11 +92,6 @@ });

async scrape(link) {
const { uri, pathname } = this._parseUrl(link);
this._uri = uri;
this._links.add(pathname);
await this._initBrowser();
const result = this._waitForBatchJobs();
this._batchJobs.push(done => this._fetchUrl(pathname, done));
this._batchJobs.push(done => this._fetchUrl(link, done));
this._batchJobs.start();

@@ -110,0 +98,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc