Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

supercrawler

Package Overview
Dependencies
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

supercrawler - npm Package Compare versions

Comparing version 0.16.0 to 0.16.1

4

lib/Crawler.js

@@ -414,4 +414,4 @@ var Crawler,

// if robots returns a 404, we assume there are no restrictions.
if (robotsStatusCode === 404) {
// if robots returns a 404 or 410, we assume there are no restrictions.
if (robotsStatusCode === 404 || robotsStatusCode === 410) {
return Promise.resolve({

@@ -418,0 +418,0 @@ statusCode: 200,

{
"name": "supercrawler",
"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.",
"version": "0.16.0",
"version": "0.16.1",
"homepage": "https://github.com/brendonboshell/supercrawler",

@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>",

@@ -350,2 +350,6 @@ # Node.js Web Crawler

### 0.16.1
* [Fixed] Treats 410 the same as 404 for robots.txt requests.
### 0.16.0

@@ -352,0 +356,0 @@

@@ -450,2 +450,17 @@ var proxyquire = require('proxyquire'),

it("crawls all pages if robots.txt is 410", function (done) {
var crawler = new Crawler({
interval: 10
});
crawler.start();
robotsStatusCode = 410;
setTimeout(function () {
crawler.stop();
expect(numCrawlsOfUrl("https://example.com/index17.html", false)).to.equal(1);
done();
}, 200);
});
it("excludes all pages if robots.txt could not be crawled", function (done) {

@@ -452,0 +467,0 @@ var crawler = new Crawler({

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc