crawler-ninja
Advanced tools
Comparing version 0.1.19 to 0.1.20
@@ -19,3 +19,3 @@ module.exports.list = function () { | ||
"instagram", | ||
"last.fm", | ||
"last", | ||
"wikipedia", | ||
@@ -25,3 +25,3 @@ "baidu", | ||
"feedburner", | ||
"galleria.net", | ||
"galleria", | ||
"socialmediatoday", | ||
@@ -151,5 +151,7 @@ "badoo", | ||
"ovh", | ||
"webadmax.com" | ||
"webadmax", | ||
"digg", | ||
"amazon" | ||
]; | ||
} |
@@ -22,3 +22,3 @@ var timers = require('timers'); | ||
var DEFAULT_RETRY_TIMEOUT = 10000; | ||
var DEFAULT_RETRY_404 = false; | ||
var DEFAULT_RETRY_400 = false; | ||
var DEFAULT_SKIP_DUPLICATES = true; | ||
@@ -277,3 +277,3 @@ var DEFAULT_RATE_LIMITS = 0; | ||
isExternal : false, | ||
retry404 : DEFAULT_RETRY_404 | ||
retry400 : DEFAULT_RETRY_400 | ||
@@ -280,0 +280,0 @@ }; |
@@ -187,8 +187,8 @@ /** | ||
// TODO : review this solution | ||
// some sites provide inconsistent response for some urls (status 404 instead of 200). | ||
// In such case, it should be nice to retry (if the option retry404 == true) | ||
if (result.statusCode && result.statusCode == 404 && result.retry404) { | ||
error = new Error("404 - Not Found"); | ||
error.code = 404; | ||
// TODO : review this solution/try to understand the origin of this problem | ||
// some sites provide inconsistent response for some urls (status 40* instead of 200). | ||
// In such case, it should be nice to retry (if the option retry400 == true) | ||
if (result.statusCode && result.statusCode >= 400 && result.statusCode <= 499 && result.retry400) { | ||
error = new Error("40* Error"); | ||
error.code = result.statusCode; | ||
} | ||
@@ -195,0 +195,0 @@ |
{ | ||
"name": "crawler-ninja", | ||
"version": "0.1.19", | ||
"version": "0.1.20", | ||
"description": "A web crawler made for the SEO based on plugins. Please wait or contribute ... still in beta", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -467,1 +467,5 @@ Crawler Ninja | ||
- Add a new option "retry404" : some sites provide inconsistent response for some urls (status 404 instead of 200). In such case, it should be nice to retry (this issuee needs to be analyzed in more detail). | ||
0.1.20 | ||
- Review the default domain blacklist. | ||
- retry40* instead of 404. |
@@ -137,3 +137,3 @@ var assert = require("assert"); | ||
}; | ||
crawler.init({retry404 : true, retryTimeout:20}, end); | ||
crawler.init({retry400 : true, retryTimeout:20}, end); | ||
var a = new audit.Plugin(); | ||
@@ -140,0 +140,0 @@ //var cons = new cs.Plugin(); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
8210277
10865
471