Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler-ninja

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler-ninja - npm Package Compare versions

Comparing version 0.1.19 to 0.1.20

8

default-lists/domain-black-list.js

@@ -19,3 +19,3 @@ module.exports.list = function () {

"instagram",
"last.fm",
"last",
"wikipedia",

@@ -25,3 +25,3 @@ "baidu",

"feedburner",
"galleria.net",
"galleria",
"socialmediatoday",

@@ -151,5 +151,7 @@ "badoo",

"ovh",
"webadmax.com"
"webadmax",
"digg",
"amazon"
];
}

@@ -22,3 +22,3 @@ var timers = require('timers');

var DEFAULT_RETRY_TIMEOUT = 10000;
var DEFAULT_RETRY_404 = false;
var DEFAULT_RETRY_400 = false;
var DEFAULT_SKIP_DUPLICATES = true;

@@ -277,3 +277,3 @@ var DEFAULT_RATE_LIMITS = 0;

isExternal : false,
retry404 : DEFAULT_RETRY_404
retry400 : DEFAULT_RETRY_400

@@ -280,0 +280,0 @@ };

@@ -187,8 +187,8 @@ /**

// TODO : review this solution
// some sites provide inconsistent response for some urls (status 404 instead of 200).
// In such case, it should be nice to retry (if the option retry404 == true)
if (result.statusCode && result.statusCode == 404 && result.retry404) {
error = new Error("404 - Not Found");
error.code = 404;
// TODO : review this solution/try to understand the origin of this problem
// some sites provide inconsistent response for some urls (status 40* instead of 200).
// In such case, it should be nice to retry (if the option retry400 == true)
if (result.statusCode && result.statusCode >= 400 && result.statusCode <= 499 && result.retry400) {
error = new Error("40* Error");
error.code = result.statusCode;
}

@@ -195,0 +195,0 @@

{
"name": "crawler-ninja",
"version": "0.1.19",
"version": "0.1.20",
"description": "A web crawler made for the SEO based on plugins. Please wait or contribute ... still in beta",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -467,1 +467,5 @@ Crawler Ninja

- Add a new option "retry404" : some sites provide inconsistent response for some urls (status 404 instead of 200). In such case, it should be nice to retry (this issuee needs to be analyzed in more detail).
0.1.20
- Review the default domain blacklist.
- retry40* instead of 404.

@@ -137,3 +137,3 @@ var assert = require("assert");

};
crawler.init({retry404 : true, retryTimeout:20}, end);
crawler.init({retry400 : true, retryTimeout:20}, end);
var a = new audit.Plugin();

@@ -140,0 +140,0 @@ //var cons = new cs.Plugin();

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc