simplecrawler
Advanced tools
Comparing version 0.4.2 to 0.5.0
@@ -172,2 +172,5 @@ // Simplecrawler | ||
// Whether to allow 'resources' greater than the max depth to be downloaded | ||
crawler.fetchWhitelistedMimeTypesBelowMaxDepth = false; | ||
// Ignore invalid ssl certificates | ||
@@ -341,5 +344,8 @@ crawler.ignoreInvalidSSL = false; | ||
queueItem.depth <= crawler.maxDepth || | ||
mimeTypesWhitelist.reduce(function(prev,mimeCheck) { | ||
return prev || !!mimeCheck.exec(queueItem.stateData.contentType); | ||
}, false) | ||
( | ||
crawler.fetchWhitelistedMimeTypesBelowMaxDepth && | ||
mimeTypesWhitelist.reduce(function(prev,mimeCheck) { | ||
return prev || !!mimeCheck.exec(queueItem.stateData.contentType); | ||
}, false) | ||
) | ||
); | ||
@@ -346,0 +352,0 @@ }; |
{ | ||
"name": "simplecrawler", | ||
"description": "Very straigntforward web crawler. Uses EventEmitter. Generates queue statistics and has a basic cache mechanism with extensible backend.", | ||
"version": "0.4.2", | ||
"version": "0.5.0", | ||
"homepage": "http://github.com/cgiffard/node-simplecrawler", | ||
@@ -6,0 +6,0 @@ "author": "Christopher Giffard <christopher.giffard@cgiffard.com>", |
@@ -11,3 +11,3 @@ // Runs a very simple crawl on an HTTP server with different depth | ||
// Test the number of links discovered for the given "depth" and compare it to "linksToDiscover" | ||
var depthTest = function(depth, linksToDiscover) { | ||
var depthTest = function(depth, linksToDiscover, behaviour) { | ||
depth = parseInt(depth); // Force depth to be a number | ||
@@ -26,5 +26,7 @@ | ||
crawler.interval = 1; | ||
crawler.fetchWhitelistedMimeTypesBelowMaxDepth = !!behaviour; | ||
// Define max depth for this crawl | ||
crawler.maxDepth = depth; | ||
crawler.maxDepth = depth; | ||
@@ -56,3 +58,3 @@ linksDiscovered = 0; | ||
describe("Crawler max depth",function() { | ||
describe("Crawler max depth with resource override (old default behaviour)",function() { | ||
@@ -68,5 +70,19 @@ // depth: linksToDiscover | ||
for(var depth in linksToDiscover) { | ||
depthTest(depth, linksToDiscover[depth]); | ||
depthTest(depth, linksToDiscover[depth], true); | ||
} | ||
}); | ||
describe("Crawler max depth without fetching resources (new default behaviour)", function() { | ||
// depth: linksToDiscover | ||
var linksToDiscover = { | ||
0: 11, // links for depth 0 | ||
1: 1, // links for depth 1 | ||
2: 3, // links for depth 2 | ||
3: 6 // links for depth 3 | ||
}; | ||
for(var depth in linksToDiscover) { | ||
depthTest(depth, linksToDiscover[depth], false); | ||
} | ||
}); |
Sorry, the diff of this file is not supported yet
128228
2813
622