New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

website-scraper

Package Overview
Dependencies
Maintainers
1
Versions
60
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

website-scraper - npm Package Compare versions

Comparing version 3.2.0 to 3.3.0

1

lib/config/defaults.js

@@ -52,2 +52,3 @@ 'use strict';

},
requestConcurrency: Infinity,
urlFilter: null,

@@ -54,0 +55,0 @@ recursive: false,

40

lib/scraper.js
'use strict';
const Promise = require('bluebird');
const PromiseQueue = require('p-queue');
const _ = require('lodash');

@@ -20,10 +21,8 @@

function Scraper (options) {
const self = this;
this.options = u.extend(defaults, options);
this.options.request = u.extend(defaults.request, options.request);
this.options.urls = Array.isArray(this.options.urls) ? this.options.urls : [this.options.urls];
self.options = u.extend(defaults, options);
self.options.request = u.extend(defaults.request, options.request);
self.options.urls = Array.isArray(self.options.urls) ? self.options.urls : [self.options.urls];
if (self.options.subdirectories) {
self.options.subdirectories.forEach((element) => {
if (this.options.subdirectories) {
this.options.subdirectories.forEach((element) => {
element.extensions = element.extensions.map((ext) => ext.toLowerCase());

@@ -33,23 +32,24 @@ });

self.options.recursiveSources = recursiveSources;
if (self.options.recursive) {
self.options.sources = u.union(self.options.sources, self.options.recursiveSources);
this.options.recursiveSources = recursiveSources;
if (this.options.recursive) {
this.options.sources = u.union(this.options.sources, this.options.recursiveSources);
}
logger.info('init with options', self.options);
logger.info('init with options', this.options);
self.request = new Request(self.options);
self.resourceHandler = new ResourceHandler(self.options, self);
self.filenameGenerator = new FilenameGenerator(self.options);
self.resourceSaver = self.options.resourceSaver ? new self.options.resourceSaver(u.clone(self.options)) : new ResourceSaver(self.options);
this.request = new Request(this.options);
this.resourceHandler = new ResourceHandler(this.options, this);
this.filenameGenerator = new FilenameGenerator(this.options);
this.resourceSaver = this.options.resourceSaver ? new this.options.resourceSaver(u.clone(this.options)) : new ResourceSaver(this.options);
// Array of Resources for downloading
self.resources = self.options.urls.map((obj) => {
this.resources = this.options.urls.map((obj) => {
const url = (obj && obj.url) ? obj.url : obj;
const filename = (obj && obj.filename) ? obj.filename : self.options.defaultFilename;
const filename = (obj && obj.filename) ? obj.filename : this.options.defaultFilename;
return new Resource(url, filename);
});
self.requestedResourcePromises = new NormalizedUrlMap(); // Map url -> request promise
self.loadedResources = new NormalizedUrlMap(); // Map url -> resource
this.requestedResourcePromises = new NormalizedUrlMap(); // Map url -> request promise
this.loadedResources = new NormalizedUrlMap(); // Map url -> resource
this.requestQueue = new PromiseQueue({concurrency: this.options.requestConcurrency});
}

@@ -95,3 +95,3 @@

const referer = resource.parent ? resource.parent.getUrl() : null;
return self.request.get(url, referer);
return self.requestQueue.add(() => self.request.get(url, referer));
}).then(function requestCompleted (responseData) {

@@ -98,0 +98,0 @@

{
"name": "website-scraper",
"version": "3.2.0",
"version": "3.3.0",
"description": "Download website to a local directory (including all css, images, js, etc.)",

@@ -44,2 +44,3 @@ "readmeFilename": "README.md",

"normalize-url": "^1.5.3",
"p-queue": "^1.1.0",
"request": "^2.81.0",

@@ -46,0 +47,0 @@ "srcset": "^1.0.0"

@@ -66,2 +66,3 @@ ## Introduction

* [updateMissingSources](#updatemissingsources) - update url for missing sources with absolute url
* [requestConcurrency](#requestconcurrency) - set maximum concurrent requests

@@ -281,3 +282,6 @@ Default options you can find in [lib/config/defaults.js](https://github.com/website-scraper/node-website-scraper/blob/master/lib/config/defaults.js) or get them using `scrape.defaults`.

#### requestConcurrency
Number, maximum amount of concurrent requests. Defaults to `Infinity`.
## callback

@@ -284,0 +288,0 @@ Callback function, optional, includes following parameters:

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc