simplecrawler
Advanced tools
Comparing version 0.0.4 to 0.0.5
296
index.js
@@ -18,3 +18,3 @@ // Simplecrawler | ||
this.domain = domain || ""; | ||
// Gotta start crawling *somewhere* | ||
@@ -31,3 +31,3 @@ this.initialPath = initialPath || "/"; | ||
this.maxConcurrency = 5; | ||
// Maximum time we'll wait for headers | ||
@@ -51,12 +51,12 @@ this.timeout = 5 * 60 * 1000; | ||
this.ignoreWWWDomain = true; | ||
// Or go even further and strip WWW subdomain from domains altogether! | ||
this.stripWWWDomain = false; | ||
// Use simplecrawler's internal resource discovery function (switch it off if you'd prefer to discover and queue resources yourself!) | ||
this.discoverResources = true; | ||
// Internal cachestore | ||
this.cache = null; | ||
// Use an HTTP Proxy? | ||
@@ -67,6 +67,11 @@ this.useProxy = false; | ||
// Support for HTTP basic auth | ||
this.needsAuth = false; | ||
this.authUser = ""; | ||
this.authPass = ""; | ||
// Domain Whitelist | ||
// We allow domains to be whitelisted, so cross-domain requests can be made. | ||
this.domainWhitelist = []; | ||
// Supported Protocols | ||
@@ -80,3 +85,3 @@ this.allowedProtocols = [ | ||
this.maxResourceSize = 1024 * 1024 * 16; // 16mb | ||
// Supported MIME-types | ||
@@ -90,3 +95,3 @@ // Matching MIME-types will be scanned for links | ||
]; | ||
// Download linked, but unsupported files (binary - images, documents, etc) | ||
@@ -137,4 +142,4 @@ this.downloadUnsupported = true; | ||
path = "/" + split.slice(1).join("/"); | ||
} else if (URL.match(/^\//)) { | ||
@@ -147,8 +152,8 @@ // Absolute URL. Easy to handle! | ||
// Split into a stack and walk it up and down to calculate the absolute path | ||
var processedPathContext = URLContext.path; | ||
processedPathContext = processedPathContext.split(/\?/).shift(); | ||
processedPathContext = processedPathContext.split(/\#/).shift(); | ||
pathStack = processedPathContext.split("/"); | ||
@@ -184,3 +189,3 @@ | ||
} | ||
// Filter blank path chunks | ||
@@ -193,3 +198,3 @@ pathStack = pathStack.filter(function(item) { | ||
} | ||
// Strip the www subdomain out if required | ||
@@ -199,9 +204,9 @@ if (crawler.stripWWWDomain) { | ||
} | ||
// Replace problem entities... | ||
path = path.replace(/&/ig,"&"); | ||
// Ensure domain is always lower-case | ||
domain = domain.toLowerCase(); | ||
return { | ||
@@ -214,10 +219,10 @@ "protocol": protocol, | ||
} | ||
// Make this function available externally | ||
crawler.processURL = processURL; | ||
// Determines whether the protocol is supported, given a URL | ||
function protocolSupported(URL) { | ||
var supported = false; | ||
if (URL.match(/^[a-z0-9]+\:/i)) { | ||
@@ -229,3 +234,3 @@ crawler.allowedProtocols.forEach(function(protocolCheck) { | ||
}); | ||
return supported; | ||
@@ -236,7 +241,7 @@ } else { | ||
} | ||
// Determines whether the mimetype is supported, given a... mimetype | ||
function mimeTypeSupported(MIMEType) { | ||
var supported = false; | ||
crawler.supportedMimeTypes.forEach(function(mimeCheck) { | ||
@@ -247,7 +252,7 @@ if (!!mimeCheck.exec(MIMEType)) { | ||
}); | ||
return supported; | ||
} | ||
// Input some text/html and this function will return a bunch of URLs for queueing | ||
@@ -257,3 +262,3 @@ // (if there are actually any in the resource, otherwise it'll return an empty array) | ||
var resources = [], resourceText = resourceData.toString("utf8"); | ||
// Clean links | ||
@@ -268,3 +273,3 @@ function cleanAndQueue(urlMatch) { | ||
URL = URL.split(/\s+/g).shift(); | ||
if (URL.match(/^\s*#/)) { | ||
@@ -274,3 +279,3 @@ // Bookmark URL | ||
} | ||
URL = URL.split("#").shift(); | ||
@@ -282,3 +287,3 @@ | ||
},false)) { | ||
resources.push(URL); | ||
@@ -290,3 +295,3 @@ } | ||
} | ||
// Rough scan for URLs | ||
@@ -296,3 +301,3 @@ cleanAndQueue(resourceText.match(/(href\s?=\s?|src\s?=\s?|url\()['"]?([^"'\s>\)]+)/ig)); | ||
cleanAndQueue(resourceText.match(/url\([^)]+/ig)); | ||
// This might be a bit of a gamble... but get hard-coded strings out of javacript: URLs | ||
@@ -304,3 +309,3 @@ // They're often popup-image or preview windows, which would otherwise be unavailable to us | ||
} | ||
// Checks to see whether domain is valid for crawling. | ||
@@ -323,3 +328,3 @@ function domainValid(domain) { | ||
} | ||
// Checks if the first domain is a subdomain of the second | ||
@@ -329,3 +334,3 @@ function isSubdomainOf(subdomain,domain) { | ||
subdomainParts = subdomain.split(/\./g); | ||
// If we're ignoring www, remove it from both (if www is the first domain component...) | ||
@@ -336,6 +341,6 @@ if (crawler.ignoreWWWDomain) { | ||
} | ||
// Can't have a subdomain that's shorter than its parent. | ||
if (subdomain.length < domain.length) return false; | ||
// Loop through subdomain backwards, from TLD to least significant domain, break on first error. | ||
@@ -347,6 +352,6 @@ var index = subdomainParts.length - 1; | ||
} | ||
return true; | ||
} | ||
// If we're not filtering by domain, just return true. | ||
@@ -363,51 +368,66 @@ return (!crawler.filterByDomain || | ||
} | ||
// Make available externally to this scope | ||
crawler.isDomainValid = domainValid; | ||
// Externally accessible function for auditing the number of open requests... | ||
crawler.openRequests = function() { | ||
return openRequests; | ||
}; | ||
// Input some text/html and this function will delegate resource discovery, check link validity | ||
// and queue up resources for downloading! | ||
function queueLinkedItems(resourceData,queueItem) { | ||
var urlList = discoverResources(resourceData,queueItem); | ||
discoverResources(resourceData,queueItem).forEach(function(url){ queueURL(url,queueItem); }); | ||
} | ||
urlList.forEach(function(url) { | ||
var URLData = processURL(url,queueItem); | ||
// Clean and queue a single URL... | ||
function queueURL(url,queueItem) { | ||
var parsedURL = typeof(url) === "object" ? url : processURL(url,queueItem); | ||
// URL Parser decided this URL was junky. Next please! | ||
if (!parsedURL) { | ||
return false; | ||
} | ||
// URL Parser decided this URL was junky. Next please! | ||
if (!URLData) { | ||
return false; | ||
} | ||
// Pass this URL past fetch conditions to ensure the user thinks it's valid | ||
var fetchDenied = false; | ||
fetchDenied = crawler.fetchConditions.reduce(function(prev,callback) { | ||
return fetchDenied || !callback(URLData); | ||
},false); | ||
if (fetchDenied) { | ||
// Fetch Conditions conspired to block URL | ||
return false; | ||
} | ||
// Pass this URL past fetch conditions to ensure the user thinks it's valid | ||
var fetchDenied = false; | ||
fetchDenied = crawler.fetchConditions.reduce(function(prev,callback) { | ||
return fetchDenied || !callback(parsedURL); | ||
},false); | ||
// Check the domain is valid before adding it to the queue | ||
if (domainValid(URLData.domain)) { | ||
try { | ||
if (crawler.queue.add(URLData.protocol,URLData.domain,URLData.port,URLData.path)) { | ||
crawler.queue.last().referrer = queueItem.url; | ||
crawler.emit("queueadd",crawler.queue.last()); | ||
if (fetchDenied) { | ||
// Fetch Conditions conspired to block URL | ||
return false; | ||
} | ||
// Check the domain is valid before adding it to the queue | ||
if (domainValid(parsedURL.domain)) { | ||
try { | ||
crawler.queue.add( | ||
parsedURL.protocol, | ||
parsedURL.domain, | ||
parsedURL.port, | ||
parsedURL.path, | ||
function queueAddCallback(error,newQueueItem) { | ||
if (error) { | ||
// We received an error condition when adding the callback | ||
crawler.emit("queueerror",error,parsedURL); | ||
} else { | ||
crawler.emit("queueadd",newQueueItem,parsedURL); | ||
newQueueItem.referrer = queueItem.url; | ||
} | ||
} | ||
} catch(error) { | ||
crawler.emit("queueerror",error,URLData); | ||
} | ||
); | ||
} catch(error) { | ||
// If we caught an error, emit queueerror | ||
crawler.emit("queueerror",error,parsedURL); | ||
} | ||
}); | ||
} | ||
} | ||
// Fetch a queue item | ||
function fetchQueueItem(index) { | ||
function fetchQueueItem(queueItem) { | ||
openRequests ++; | ||
// Get queue item | ||
var queueItem = crawler.queue.get(index); | ||
// Emit fetchstart event | ||
@@ -419,3 +439,3 @@ crawler.emit("fetchstart",queueItem); | ||
var responseBuffer, responseLength, responseLengthReceived, contentType; | ||
// Mark as spooled | ||
@@ -429,3 +449,3 @@ queueItem.status = "spooled"; | ||
requestPath = queueItem.path; | ||
// Are we passing through an HTTP proxy? | ||
@@ -437,3 +457,3 @@ if (crawler.useProxy) { | ||
} | ||
// Load in request options | ||
@@ -448,3 +468,8 @@ requestOptions = { | ||
}; | ||
if(crawler.needsAuth) { | ||
var auth = 'Basic ' + new Buffer(crawler.authUser + ":" + crawler.authPass).toString('base64'); | ||
requestOptions.headers['Authorization'] = auth; | ||
} | ||
// Record what time we started this request | ||
@@ -457,10 +482,13 @@ timeCommenced = (new Date().getTime()); | ||
responseLengthReceived = 0; | ||
// Record what time we first received the header information | ||
timeHeadersReceived = (new Date().getTime()); | ||
responseLength = parseInt(response.headers["content-length"],10); | ||
responseLength = !isNaN(responseLength) ? responseLength : 0; | ||
// Save timing and content some header information into queue | ||
queueItem.stateData.requestLatency = (timeHeadersReceived - timeCommenced); | ||
queueItem.stateData.requestTime = (timeHeadersReceived - timeCommenced); | ||
queueItem.stateData.contentLength = responseLength = parseInt(response.headers["content-length"],10); | ||
queueItem.stateData.contentLength = responseLength; | ||
queueItem.stateData.contentType = contentType = response.headers["content-type"]; | ||
@@ -471,13 +499,13 @@ queueItem.stateData.code = response.statusCode; | ||
queueItem.stateData.headers = response.headers; | ||
// Emit header receive event | ||
crawler.emit("fetchheaders",queueItem,response); | ||
// Ensure response length is reasonable... | ||
responseLength = responseLength > 0 ? responseLength : crawler.maxResourceSize; | ||
queueItem.stateData.contentLength = responseLength; | ||
// Function for dealing with 200 responses | ||
function processReceivedData() { | ||
if (!crawler.queue[index].fetched) { | ||
if (!queueItem.fetched) { | ||
timeDataReceived = (new Date().getTime()); | ||
@@ -491,5 +519,5 @@ | ||
queueItem.stateData.sentIncorrectSize = responseBuffer.length !== responseLength; | ||
crawler.emit("fetchcomplete",queueItem,responseBuffer,response); | ||
// First, save item to cache (if we're using a cache!) | ||
@@ -499,3 +527,3 @@ if (crawler.cache !== null && crawler.cache.setCacheData instanceof Function) { | ||
} | ||
// We only process the item if it's of a valid mimetype | ||
@@ -510,3 +538,3 @@ // and only if the crawler is set to discover its own resources | ||
} | ||
function receiveData(chunk) { | ||
@@ -519,16 +547,16 @@ if (chunk && chunk.length && !dataReceived) { | ||
// larger than our maximum resource size. | ||
if (responseLengthReceived + chunk.length <= crawler.maxResourceSize) { | ||
// Start by creating a new buffer, which will be our main buffer going forward... | ||
var tmpNewBuffer = new Buffer(responseLengthReceived + chunk.length); | ||
// Copy all our old data into it... | ||
responseBuffer.copy(tmpNewBuffer,0,0,responseBuffer.length); | ||
// And now the new chunk | ||
chunk.copy(tmpNewBuffer,responseBuffer.length,0,chunk.length); | ||
// And now make the response buffer our new buffer, leaving the original for GC | ||
responseBuffer = tmpNewBuffer; | ||
} else { | ||
@@ -540,3 +568,3 @@ // Oh dear oh dear! The response is not only more data than we were initially told, | ||
// We'll then deal with the data that we have. | ||
crawler.emit("fetchdataerror",queueItem,response); | ||
@@ -548,12 +576,12 @@ } | ||
} | ||
// Increment our data received counter | ||
responseLengthReceived += chunk.length; | ||
} | ||
if ((responseLengthReceived >= responseLength || response.complete) && !dataReceived) { | ||
// Slice the buffer to chop off any unused space | ||
responseBuffer = responseBuffer.slice(0,responseLengthReceived); | ||
dataReceived = true; | ||
@@ -563,3 +591,3 @@ processReceivedData(); | ||
} | ||
// If we should just go ahead and get the data | ||
@@ -571,9 +599,9 @@ if (response.statusCode >= 200 && response.statusCode < 300 && responseLength <= crawler.maxResourceSize) { | ||
responseBuffer = new Buffer(responseLength); | ||
response.on("data",receiveData); | ||
response.on("end",receiveData); | ||
// We've got a not-modified response back | ||
} else if (response.statusCode === 304) { | ||
if (crawler.cache !== null && crawler.cache.getCacheData) { | ||
@@ -588,3 +616,3 @@ // We've got access to a cache | ||
} | ||
// If we should queue a redirect | ||
@@ -597,19 +625,11 @@ } else if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) { | ||
parsedURL = processURL(response.headers.location,queueItem); | ||
// Emit redirect event | ||
crawler.emit("fetchredirect",queueItem,parsedURL,response); | ||
// If we're permitted to talk to the domain... | ||
if (domainValid(parsedURL.domain)) { | ||
// ...then queue up the new URL! | ||
try { | ||
if (crawler.queue.add(parsedURL.protocol,parsedURL.domain,parsedURL.port,parsedURL.path)) { | ||
crawler.emit("queueadd",crawler.queue.last()); | ||
} | ||
} catch(error) { | ||
crawler.emit("queueerror",error,parsedURL); | ||
} | ||
} | ||
// Clean URL, add to queue... | ||
queueURL(parsedURL,queueItem); | ||
openRequests --; | ||
// Ignore this request, but record that we had a 404 | ||
@@ -619,7 +639,8 @@ } else if (response.statusCode === 404) { | ||
queueItem.status = "notfound"; | ||
// Emit 404 event | ||
crawler.emit("fetch404",queueItem,response); | ||
openRequests --; | ||
// And oh dear. Handle this one as well. (other 400s, 500s, etc) | ||
@@ -629,6 +650,6 @@ } else { | ||
queueItem.status = "failed"; | ||
// Emit 5xx / 4xx event | ||
crawler.emit("fetcherror",queueItem,response); | ||
openRequests --; | ||
@@ -640,6 +661,5 @@ } | ||
openRequests --; | ||
// Emit 5xx / 4xx event | ||
crawler.emit("fetchclienterror",crawler.queue[index],errorData); | ||
crawler.emit("fetchclienterror",queueItem,errorData); | ||
queueItem.fetched = true; | ||
@@ -650,12 +670,18 @@ queueItem.stateData.code = 599; | ||
} | ||
// Crawl init | ||
this.crawl = function() { | ||
var currentFetchIndex = crawler.queue.oldestUnfetchedItem(); | ||
if (currentFetchIndex >= 0 && !isNaN(currentFetchIndex) && openRequests < crawler.maxConcurrency) { | ||
fetchQueueItem(currentFetchIndex); | ||
} else if (openRequests === 0) { | ||
crawler.emit("complete"); | ||
crawler.stop(); | ||
if (openRequests < crawler.maxConcurrency) { | ||
crawler.queue.oldestUnfetchedItem(function(err,queueItem) { | ||
if (queueItem) { | ||
fetchQueueItem(queueItem); | ||
} else if (openRequests === 0) { | ||
crawler.queue.complete(function(err,completeCount) { | ||
if (completeCount === crawler.queue.length) { | ||
crawler.emit("complete"); | ||
crawler.stop(); | ||
} | ||
}); | ||
} | ||
}); | ||
} | ||
@@ -685,3 +711,3 @@ }; | ||
} | ||
} | ||
}; | ||
@@ -692,5 +718,5 @@ Crawler.prototype.removeFetchCondition = function(index) { | ||
tmpArray = this.fetchConditions.length-1 > index ? tmpArray.concat(this.fetchConditions.slice(0,index+1)) : tmpArray; | ||
this.fetchConditions = tmpArray; | ||
return true; | ||
@@ -700,3 +726,3 @@ } else { | ||
} | ||
} | ||
}; | ||
@@ -703,0 +729,0 @@ // EXPORTS |
{ | ||
"name": "simplecrawler", | ||
"description": "Very straigntforward web crawler. Uses EventEmitter. Generates queue statistics and has a basic cache mechanism with extensible backend.", | ||
"version": "0.0.4", | ||
"version": "0.0.5", | ||
"homepage": "http://github.com/cgiffard/node-simplecrawler", | ||
@@ -6,0 +6,0 @@ "author": "Christopher Giffard <christopher.giffard@cgiffard.com>", |
237
queue.js
@@ -19,5 +19,10 @@ // Simplecrawler - queue module | ||
this.oldestUnfetchedIndex = 0; | ||
this.completeCache = 0; | ||
}; | ||
FetchQueue.prototype = []; | ||
FetchQueue.prototype.add = function(protocol,domain,port,path) { | ||
FetchQueue.prototype.add = function(protocol,domain,port,path,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
// Ensure all variables conform to reasonable defaults | ||
@@ -27,32 +32,63 @@ protocol = protocol === "https" ? "https" : "http"; | ||
if (isNaN(port)) { | ||
throw Error("Port must be numeric!"); | ||
return callback(new Error("Port must be numeric!")); | ||
} | ||
var url = protocol + "://" + domain + (port !== 80 ? ":" + port : "") + path; | ||
if (!this.reduce(function(prev, current, index, array) { | ||
return prev || String(current.url).toLowerCase() === String(url).toLowerCase(); | ||
},false)) { | ||
this.push({ | ||
"url": url, | ||
"protocol": protocol, | ||
"domain": domain, | ||
"port": port, | ||
"path": path, | ||
"fetched": false, | ||
"status": "queued", | ||
"stateData": {} | ||
this.exists(protocol,domain,port,path, | ||
function(err,exists) { | ||
if (err) return callback(err); | ||
if (!exists) { | ||
var queueItem = { | ||
"url": url, | ||
"protocol": protocol, | ||
"domain": domain, | ||
"port": port, | ||
"path": path, | ||
"fetched": false, | ||
"status": "queued", | ||
"stateData": {} | ||
}; | ||
self.push(queueItem); | ||
callback(null,queueItem); | ||
} else { | ||
callback(new Error("Resource already exists in queue!")); | ||
} | ||
}); | ||
}; | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
// Check if an item already exists in the queue... | ||
FetchQueue.prototype.exists = function(protocol,domain,port,path,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
var count = self.filter(function(item) { | ||
if (String(item.protocol).toLowerCase() === String(protocol).toLowerCase() && | ||
String(item.domain).toLowerCase() === String(domain).toLowerCase() && | ||
parseInt(item.port,10) === parseInt(port,10) && | ||
item.path === path) return true; | ||
return false; | ||
}).length; | ||
callback(null,count); | ||
}; | ||
// Get last item in queue... | ||
FetchQueue.prototype.last = function(callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
callback(null,self[self.length-1]); | ||
}; | ||
// Get item from queue | ||
FetchQueue.prototype.get = function(id) { | ||
if (!isNaN(id) && this.length > id) { | ||
return this[id]; | ||
FetchQueue.prototype.get = function(id, callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
if (!isNaN(id) && self.length > id) { | ||
return callback(null,self[id]); | ||
} | ||
@@ -62,28 +98,27 @@ } | ||
// Get first unfetched item in the queue (and return its index) | ||
FetchQueue.prototype.oldestUnfetchedItem = function() { | ||
for (var itemIndex = this.oldestUnfetchedIndex; itemIndex < this.length; itemIndex ++) { | ||
if (this[itemIndex].status === "queued") { | ||
this.oldestUnfetchedIndex = itemIndex; | ||
return itemIndex; | ||
FetchQueue.prototype.oldestUnfetchedItem = function(callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
for (var itemIndex = self.oldestUnfetchedIndex; itemIndex < self.length; itemIndex ++) { | ||
if (self[itemIndex].status === "queued") { | ||
self.oldestUnfetchedIndex = itemIndex; | ||
return callback(null,self[itemIndex]); | ||
} | ||
} | ||
return -1; | ||
callback(new Error("No unfetched items remain.")); | ||
} | ||
FetchQueue.prototype.last = function() { | ||
return this[this.length-1]; | ||
} | ||
// Gets the maximum total request time, request latency, or download time | ||
FetchQueue.prototype.max = function(statisticName) { | ||
var maxStatisticValue = 0; | ||
FetchQueue.prototype.max = function(statisticName,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var maxStatisticValue = 0, self = this; | ||
if (allowedStatistics.join().indexOf(statisticName) === -1) { | ||
// Not a recognised statistic! | ||
return false; | ||
return callback(new Error("Invalid statistic.")); | ||
} | ||
this.forEach(function(item) { | ||
self.forEach(function(item) { | ||
if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] > maxStatisticValue) { | ||
@@ -93,16 +128,17 @@ maxStatisticValue = item.stateData[statisticName]; | ||
}); | ||
return maxStatisticValue; | ||
callback(null,maxStatisticValue); | ||
}; | ||
// Gets the minimum total request time, request latency, or download time | ||
FetchQueue.prototype.min = function(statisticName) { | ||
var minStatisticValue = Infinity; | ||
FetchQueue.prototype.min = function(statisticName,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var minStatisticValue = Infinity, self = this; | ||
if (allowedStatistics.join().indexOf(statisticName) === -1) { | ||
// Not a recognised statistic! | ||
return false; | ||
return callback(new Error("Invalid statistic.")); | ||
} | ||
this.forEach(function(item) { | ||
self.forEach(function(item) { | ||
if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] < minStatisticValue) { | ||
@@ -113,15 +149,16 @@ minStatisticValue = item.stateData[statisticName]; | ||
return minStatisticValue === Infinity? 0 : minStatisticValue; | ||
callback(null,minStatisticValue === Infinity? 0 : minStatisticValue); | ||
}; | ||
// Gets the minimum total request time, request latency, or download time | ||
FetchQueue.prototype.avg = function(statisticName) { | ||
var NumberSum = 0, NumberCount = 0; | ||
FetchQueue.prototype.avg = function(statisticName,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var NumberSum = 0, NumberCount = 0, self = this; | ||
if (allowedStatistics.join().indexOf(statisticName) === -1) { | ||
// Not a recognised statistic! | ||
return false; | ||
return callback(new Error("Invalid statistic.")); | ||
} | ||
this.forEach(function(item) { | ||
self.forEach(function(item) { | ||
if (item.fetched && item.stateData[statisticName] !== null && !isNaN(item.stateData[statisticName])) { | ||
@@ -132,11 +169,12 @@ NumberSum += item.stateData[statisticName]; | ||
}); | ||
return NumberSum / NumberCount; | ||
callback(null,NumberSum / NumberCount); | ||
}; | ||
// Gets the number of requests which have been completed. | ||
FetchQueue.prototype.complete = function() { | ||
var NumberComplete = 0; | ||
FetchQueue.prototype.complete = function(callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var NumberComplete = 0, self = this; | ||
this.forEach(function(item) { | ||
self.forEach(function(item) { | ||
if (item.fetched) { | ||
@@ -146,3 +184,4 @@ NumberComplete ++; | ||
}); | ||
callback(null,NumberComplete); | ||
return NumberComplete; | ||
@@ -152,6 +191,7 @@ }; | ||
// Gets the number of queue items with the given status | ||
FetchQueue.prototype.countWithStatus = function(status) { | ||
var queueItemsMatched = 0; | ||
FetchQueue.prototype.countWithStatus = function(status,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var queueItemsMatched = 0, self = this; | ||
this.forEach(function(item) { | ||
self.forEach(function(item) { | ||
if (item.status === status) { | ||
@@ -162,10 +202,11 @@ queueItemsMatched ++; | ||
return queueItemsMatched; | ||
callback(null,queueItemsMatched); | ||
}; | ||
// Gets the number of queue items with the given status | ||
FetchQueue.prototype.getWithStatus = function(status) { | ||
var subqueue = []; | ||
this.forEach(function(item,index) { | ||
FetchQueue.prototype.getWithStatus = function(status,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var subqueue = [], self = this; | ||
self.forEach(function(item,index) { | ||
if (item.status === status) { | ||
@@ -176,15 +217,25 @@ subqueue.push(item); | ||
}); | ||
return subqueue; | ||
callback(null,subqueue); | ||
}; | ||
// Gets the number of requests which have failed for some reason | ||
FetchQueue.prototype.errors = function() { | ||
return this.countWithStatus("failed") + this.countWithStatus("notfound"); | ||
FetchQueue.prototype.errors = function(callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
self.countWithStatus("failed",function(err1,failed) { | ||
self.countWithStatus("notfound",function(err2,notfound) { | ||
callback(null,failed + notfound); | ||
}); | ||
}); | ||
}; | ||
// Writes the queue to disk | ||
FetchQueue.prototype.freeze = function(filename) { | ||
// Re-queue items before freezing... | ||
this.forEach(function(item) { | ||
FetchQueue.prototype.freeze = function(filename,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var self = this; | ||
// Re-queue in-progress items before freezing... | ||
self.forEach(function(item) { | ||
if (item.fetched !== true) { | ||
@@ -195,26 +246,36 @@ item.status = "queued"; | ||
fs.writeFileSync(filename,JSON.stringify(this)); | ||
fs.writeFile(filename,JSON.stringify(self),function(err) { | ||
callback(err,self); | ||
}); | ||
}; | ||
// Reads the queue from disk | ||
FetchQueue.prototype.defrost = function(filename) { | ||
var fileData; | ||
try { | ||
if ((fileData = fs.readFileSync(filename))) { | ||
var defrostedQueue = JSON.parse(fileData.toString("utf8")); | ||
for (var index in defrostedQueue) { | ||
if (defrostedQueue.hasOwnProperty(index) && !isNaN(index)) { | ||
var queueItem = defrostedQueue[index]; | ||
this.push(queueItem); | ||
} | ||
FetchQueue.prototype.defrost = function(filename,callback) { | ||
callback = callback && callback instanceof Function ? callback : function(){}; | ||
var fileData, self = this, defrostedQueue = []; | ||
fs.readFile(filename,function(err,fileData) { | ||
if (err) return callback(err); | ||
if (!fileData.toString("utf8").length) { | ||
return callback(new Error("Failed to defrost queue from zero-length JSON.")); | ||
} | ||
try { | ||
defrostedQueue = JSON.parse(fileData.toString("utf8")); | ||
} catch(error) { | ||
return callback(error); | ||
} | ||
for (var index in defrostedQueue) { | ||
if (defrostedQueue.hasOwnProperty(index) && !isNaN(index)) { | ||
var queueItem = defrostedQueue[index]; | ||
self.push(queueItem); | ||
} | ||
} | ||
return true; | ||
} catch(error) { | ||
return false; | ||
} | ||
callback(null,self) | ||
}); | ||
}; | ||
exports.queue = FetchQueue; |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
53045
982
266
2