simplecrawler - npm Package Compare versions

Comparing version 0.0.4 to 0.0.5

296

index.js

		@@ -18,3 +18,3 @@ // Simplecrawler
		this.domain = domain \|\| "";


		// Gotta start crawling somewhere
		@@ -31,3 +31,3 @@ this.initialPath = initialPath \|\| "/";
		this.maxConcurrency = 5;


		// Maximum time we'll wait for headers
		@@ -51,12 +51,12 @@ this.timeout = 5 * 60 * 1000;
		this.ignoreWWWDomain = true;


		// Or go even further and strip WWW subdomain from domains altogether!
		this.stripWWWDomain = false;


		// Use simplecrawler's internal resource discovery function (switch it off if you'd prefer to discover and queue resources yourself!)
		this.discoverResources = true;


		// Internal cachestore
		this.cache = null;


		// Use an HTTP Proxy?
		@@ -67,6 +67,11 @@ this.useProxy = false;

		// Support for HTTP basic auth
		this.needsAuth = false;
		this.authUser = "";
		this.authPass = "";

		// Domain Whitelist
		// We allow domains to be whitelisted, so cross-domain requests can be made.
		this.domainWhitelist = [];


		// Supported Protocols
		@@ -80,3 +85,3 @@ this.allowedProtocols = [
		this.maxResourceSize = 1024 * 1024 * 16; // 16mb


		// Supported MIME-types
		@@ -90,3 +95,3 @@ // Matching MIME-types will be scanned for links
		];


		// Download linked, but unsupported files (binary - images, documents, etc)
		@@ -137,4 +142,4 @@ this.downloadUnsupported = true;
		path = "/" + split.slice(1).join("/");



		} else if (URL.match(/^\//)) {
		@@ -147,8 +152,8 @@ // Absolute URL. Easy to handle!
		// Split into a stack and walk it up and down to calculate the absolute path


		var processedPathContext = URLContext.path;


		processedPathContext = processedPathContext.split(/\?/).shift();
		processedPathContext = processedPathContext.split(/\#/).shift();


		pathStack = processedPathContext.split("/");
		@@ -184,3 +189,3 @@
		}


		// Filter blank path chunks
		@@ -193,3 +198,3 @@ pathStack = pathStack.filter(function(item) {
		}


		// Strip the www subdomain out if required
		@@ -199,9 +204,9 @@ if (crawler.stripWWWDomain) {
		}


		// Replace problem entities...
		path = path.replace(/&/ig,"&");


		// Ensure domain is always lower-case
		domain = domain.toLowerCase();


		return {
		@@ -214,10 +219,10 @@ "protocol": protocol,
		}


		// Make this function available externally
		crawler.processURL = processURL;


		// Determines whether the protocol is supported, given a URL
		function protocolSupported(URL) {
		var supported = false;


		if (URL.match(/^[a-z0-9]+\:/i)) {
		@@ -229,3 +234,3 @@ crawler.allowedProtocols.forEach(function(protocolCheck) {
		});


		return supported;
		@@ -236,7 +241,7 @@ } else {
		}


		// Determines whether the mimetype is supported, given a... mimetype
		function mimeTypeSupported(MIMEType) {
		var supported = false;


		crawler.supportedMimeTypes.forEach(function(mimeCheck) {
		@@ -247,7 +252,7 @@ if (!!mimeCheck.exec(MIMEType)) {
		});


		return supported;


		}


		// Input some text/html and this function will return a bunch of URLs for queueing
		@@ -257,3 +262,3 @@ // (if there are actually any in the resource, otherwise it'll return an empty array)
		var resources = [], resourceText = resourceData.toString("utf8");


		// Clean links
		@@ -268,3 +273,3 @@ function cleanAndQueue(urlMatch) {
		URL = URL.split(/\s+/g).shift();


		if (URL.match(/^\s*#/)) {
		@@ -274,3 +279,3 @@ // Bookmark URL
		}


		URL = URL.split("#").shift();
		@@ -282,3 +287,3 @@
		},false)) {


		resources.push(URL);
		@@ -290,3 +295,3 @@ }
		}


		// Rough scan for URLs
		@@ -296,3 +301,3 @@ cleanAndQueue(resourceText.match(/(href\s?=\s?\|src\s?=\s?\|url\()['"]?([^"'\s>\)]+)/ig));
		cleanAndQueue(resourceText.match(/url\([^)]+/ig));


		// This might be a bit of a gamble... but get hard-coded strings out of javacript: URLs
		@@ -304,3 +309,3 @@ // They're often popup-image or preview windows, which would otherwise be unavailable to us
		}


		// Checks to see whether domain is valid for crawling.
		@@ -323,3 +328,3 @@ function domainValid(domain) {
		}


		// Checks if the first domain is a subdomain of the second
		@@ -329,3 +334,3 @@ function isSubdomainOf(subdomain,domain) {
		subdomainParts = subdomain.split(/\./g);


		// If we're ignoring www, remove it from both (if www is the first domain component...)
		@@ -336,6 +341,6 @@ if (crawler.ignoreWWWDomain) {
		}


		// Can't have a subdomain that's shorter than its parent.
		if (subdomain.length < domain.length) return false;


		// Loop through subdomain backwards, from TLD to least significant domain, break on first error.
		@@ -347,6 +352,6 @@ var index = subdomainParts.length - 1;
		}


		return true;
		}


		// If we're not filtering by domain, just return true.
		@@ -363,51 +368,66 @@ return (!crawler.filterByDomain \|\|
		}


		// Make available externally to this scope
		crawler.isDomainValid = domainValid;

		// Externally accessible function for auditing the number of open requests...
		crawler.openRequests = function() {
		return openRequests;
		};

		// Input some text/html and this function will delegate resource discovery, check link validity
		// and queue up resources for downloading!
		function queueLinkedItems(resourceData,queueItem) {
		var urlList = discoverResources(resourceData,queueItem);
		discoverResources(resourceData,queueItem).forEach(function(url){ queueURL(url,queueItem); });
		}

		urlList.forEach(function(url) {
		var URLData = processURL(url,queueItem);
		// Clean and queue a single URL...
		function queueURL(url,queueItem) {
		var parsedURL = typeof(url) === "object" ? url : processURL(url,queueItem);

		// URL Parser decided this URL was junky. Next please!
		if (!parsedURL) {
		return false;
		}

		// URL Parser decided this URL was junky. Next please!
		if (!URLData) {
		return false;
		}

		// Pass this URL past fetch conditions to ensure the user thinks it's valid
		var fetchDenied = false;
		fetchDenied = crawler.fetchConditions.reduce(function(prev,callback) {
		return fetchDenied \|\| !callback(URLData);
		},false);

		if (fetchDenied) {
		// Fetch Conditions conspired to block URL
		return false;
		}
		// Pass this URL past fetch conditions to ensure the user thinks it's valid
		var fetchDenied = false;
		fetchDenied = crawler.fetchConditions.reduce(function(prev,callback) {
		return fetchDenied \|\| !callback(parsedURL);
		},false);

		// Check the domain is valid before adding it to the queue
		if (domainValid(URLData.domain)) {
		try {
		if (crawler.queue.add(URLData.protocol,URLData.domain,URLData.port,URLData.path)) {
		crawler.queue.last().referrer = queueItem.url;
		crawler.emit("queueadd",crawler.queue.last());
		if (fetchDenied) {
		// Fetch Conditions conspired to block URL
		return false;
		}

		// Check the domain is valid before adding it to the queue
		if (domainValid(parsedURL.domain)) {
		try {
		crawler.queue.add(
		parsedURL.protocol,
		parsedURL.domain,
		parsedURL.port,
		parsedURL.path,
		function queueAddCallback(error,newQueueItem) {
		if (error) {
		// We received an error condition when adding the callback
		crawler.emit("queueerror",error,parsedURL);
		} else {
		crawler.emit("queueadd",newQueueItem,parsedURL);
		newQueueItem.referrer = queueItem.url;
		}
		}
		} catch(error) {
		crawler.emit("queueerror",error,URLData);
		}
		);
		} catch(error) {
		// If we caught an error, emit queueerror
		crawler.emit("queueerror",error,parsedURL);
		}
		});
		}
		}

		// Fetch a queue item
		function fetchQueueItem(index) {
		function fetchQueueItem(queueItem) {
		openRequests ++;

		// Get queue item
		var queueItem = crawler.queue.get(index);


		// Emit fetchstart event
		@@ -419,3 +439,3 @@ crawler.emit("fetchstart",queueItem);
		var responseBuffer, responseLength, responseLengthReceived, contentType;


		// Mark as spooled
		@@ -429,3 +449,3 @@ queueItem.status = "spooled";
		requestPath = queueItem.path;


		// Are we passing through an HTTP proxy?
		@@ -437,3 +457,3 @@ if (crawler.useProxy) {
		}


		// Load in request options
		@@ -448,3 +468,8 @@ requestOptions = {
		};


		if(crawler.needsAuth) {
		var auth = 'Basic ' + new Buffer(crawler.authUser + ":" + crawler.authPass).toString('base64');
		requestOptions.headers['Authorization'] = auth;
		}

		// Record what time we started this request
		@@ -457,10 +482,13 @@ timeCommenced = (new Date().getTime());
		responseLengthReceived = 0;


		// Record what time we first received the header information
		timeHeadersReceived = (new Date().getTime());

		responseLength = parseInt(response.headers["content-length"],10);
		responseLength = !isNaN(responseLength) ? responseLength : 0;

		// Save timing and content some header information into queue
		queueItem.stateData.requestLatency = (timeHeadersReceived - timeCommenced);
		queueItem.stateData.requestTime = (timeHeadersReceived - timeCommenced);
		queueItem.stateData.contentLength = responseLength = parseInt(response.headers["content-length"],10);
		queueItem.stateData.contentLength = responseLength;
		queueItem.stateData.contentType = contentType = response.headers["content-type"];
		@@ -471,13 +499,13 @@ queueItem.stateData.code = response.statusCode;
		queueItem.stateData.headers = response.headers;


		// Emit header receive event
		crawler.emit("fetchheaders",queueItem,response);


		// Ensure response length is reasonable...
		responseLength = responseLength > 0 ? responseLength : crawler.maxResourceSize;
		queueItem.stateData.contentLength = responseLength;


		// Function for dealing with 200 responses
		function processReceivedData() {
		if (!crawler.queue[index].fetched) {
		if (!queueItem.fetched) {
		timeDataReceived = (new Date().getTime());
		@@ -491,5 +519,5 @@
		queueItem.stateData.sentIncorrectSize = responseBuffer.length !== responseLength;


		crawler.emit("fetchcomplete",queueItem,responseBuffer,response);


		// First, save item to cache (if we're using a cache!)
		@@ -499,3 +527,3 @@ if (crawler.cache !== null && crawler.cache.setCacheData instanceof Function) {
		}


		// We only process the item if it's of a valid mimetype
		@@ -510,3 +538,3 @@ // and only if the crawler is set to discover its own resources
		}


		function receiveData(chunk) {
		@@ -519,16 +547,16 @@ if (chunk && chunk.length && !dataReceived) {
		// larger than our maximum resource size.


		if (responseLengthReceived + chunk.length <= crawler.maxResourceSize) {
		// Start by creating a new buffer, which will be our main buffer going forward...
		var tmpNewBuffer = new Buffer(responseLengthReceived + chunk.length);


		// Copy all our old data into it...
		responseBuffer.copy(tmpNewBuffer,0,0,responseBuffer.length);


		// And now the new chunk
		chunk.copy(tmpNewBuffer,responseBuffer.length,0,chunk.length);


		// And now make the response buffer our new buffer, leaving the original for GC
		responseBuffer = tmpNewBuffer;


		} else {
		@@ -540,3 +568,3 @@ // Oh dear oh dear! The response is not only more data than we were initially told,
		// We'll then deal with the data that we have.


		crawler.emit("fetchdataerror",queueItem,response);
		@@ -548,12 +576,12 @@ }
		}


		// Increment our data received counter
		responseLengthReceived += chunk.length;
		}



		if ((responseLengthReceived >= responseLength \|\| response.complete) && !dataReceived) {
		// Slice the buffer to chop off any unused space
		responseBuffer = responseBuffer.slice(0,responseLengthReceived);


		dataReceived = true;
		@@ -563,3 +591,3 @@ processReceivedData();
		}


		// If we should just go ahead and get the data
		@@ -571,9 +599,9 @@ if (response.statusCode >= 200 && response.statusCode < 300 && responseLength <= crawler.maxResourceSize) {
		responseBuffer = new Buffer(responseLength);


		response.on("data",receiveData);
		response.on("end",receiveData);


		// We've got a not-modified response back
		} else if (response.statusCode === 304) {


		if (crawler.cache !== null && crawler.cache.getCacheData) {
		@@ -588,3 +616,3 @@ // We've got access to a cache
		}


		// If we should queue a redirect
		@@ -597,19 +625,11 @@ } else if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
		parsedURL = processURL(response.headers.location,queueItem);


		// Emit redirect event
		crawler.emit("fetchredirect",queueItem,parsedURL,response);

		// If we're permitted to talk to the domain...
		if (domainValid(parsedURL.domain)) {
		// ...then queue up the new URL!
		try {
		if (crawler.queue.add(parsedURL.protocol,parsedURL.domain,parsedURL.port,parsedURL.path)) {
		crawler.emit("queueadd",crawler.queue.last());
		}
		} catch(error) {
		crawler.emit("queueerror",error,parsedURL);
		}
		}


		// Clean URL, add to queue...
		queueURL(parsedURL,queueItem);

		openRequests --;

		// Ignore this request, but record that we had a 404
		@@ -619,7 +639,8 @@ } else if (response.statusCode === 404) {
		queueItem.status = "notfound";


		// Emit 404 event
		crawler.emit("fetch404",queueItem,response);


		openRequests --;

		// And oh dear. Handle this one as well. (other 400s, 500s, etc)
		@@ -629,6 +650,6 @@ } else {
		queueItem.status = "failed";


		// Emit 5xx / 4xx event
		crawler.emit("fetcherror",queueItem,response);


		openRequests --;
		@@ -640,6 +661,5 @@ }
		openRequests --;


		// Emit 5xx / 4xx event
		crawler.emit("fetchclienterror",crawler.queue[index],errorData);

		crawler.emit("fetchclienterror",queueItem,errorData);
		queueItem.fetched = true;
		@@ -650,12 +670,18 @@ queueItem.stateData.code = 599;
		}


		// Crawl init
		this.crawl = function() {
		var currentFetchIndex = crawler.queue.oldestUnfetchedItem();

		if (currentFetchIndex >= 0 && !isNaN(currentFetchIndex) && openRequests < crawler.maxConcurrency) {
		fetchQueueItem(currentFetchIndex);
		} else if (openRequests === 0) {
		crawler.emit("complete");
		crawler.stop();
		if (openRequests < crawler.maxConcurrency) {
		crawler.queue.oldestUnfetchedItem(function(err,queueItem) {
		if (queueItem) {
		fetchQueueItem(queueItem);
		} else if (openRequests === 0) {
		crawler.queue.complete(function(err,completeCount) {
		if (completeCount === crawler.queue.length) {
		crawler.emit("complete");
		crawler.stop();
		}
		});
		}
		});
		}
		@@ -685,3 +711,3 @@ };
		}
		}
		};

		@@ -692,5 +718,5 @@ Crawler.prototype.removeFetchCondition = function(index) {
		tmpArray = this.fetchConditions.length-1 > index ? tmpArray.concat(this.fetchConditions.slice(0,index+1)) : tmpArray;


		this.fetchConditions = tmpArray;


		return true;
		@@ -700,3 +726,3 @@ } else {
		}
		}
		};

		@@ -703,0 +729,0 @@ // EXPORTS

package.json

		{
		"name": "simplecrawler",
		"description": "Very straigntforward web crawler. Uses EventEmitter. Generates queue statistics and has a basic cache mechanism with extensible backend.",
		"version": "0.0.4",
		"version": "0.0.5",
		"homepage": "http://github.com/cgiffard/node-simplecrawler",
		@@ -6,0 +6,0 @@ "author": "Christopher Giffard <christopher.giffard@cgiffard.com>",

237

queue.js

		@@ -19,5 +19,10 @@ // Simplecrawler - queue module
		this.oldestUnfetchedIndex = 0;
		this.completeCache = 0;
		};

		FetchQueue.prototype = [];
		FetchQueue.prototype.add = function(protocol,domain,port,path) {
		FetchQueue.prototype.add = function(protocol,domain,port,path,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		// Ensure all variables conform to reasonable defaults
		@@ -27,32 +32,63 @@ protocol = protocol === "https" ? "https" : "http";
		if (isNaN(port)) {
		throw Error("Port must be numeric!");
		return callback(new Error("Port must be numeric!"));
		}


		var url = protocol + "://" + domain + (port !== 80 ? ":" + port : "") + path;

		if (!this.reduce(function(prev, current, index, array) {
		return prev \|\| String(current.url).toLowerCase() === String(url).toLowerCase();
		},false)) {

		this.push({
		"url": url,
		"protocol": protocol,
		"domain": domain,
		"port": port,
		"path": path,
		"fetched": false,
		"status": "queued",
		"stateData": {}

		this.exists(protocol,domain,port,path,
		function(err,exists) {
		if (err) return callback(err);

		if (!exists) {
		var queueItem = {
		"url": url,
		"protocol": protocol,
		"domain": domain,
		"port": port,
		"path": path,
		"fetched": false,
		"status": "queued",
		"stateData": {}
		};

		self.push(queueItem);
		callback(null,queueItem);
		} else {
		callback(new Error("Resource already exists in queue!"));
		}
		});
		};

		return true;
		} else {
		return false;
		}
		// Check if an item already exists in the queue...
		FetchQueue.prototype.exists = function(protocol,domain,port,path,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		var count = self.filter(function(item) {
		if (String(item.protocol).toLowerCase() === String(protocol).toLowerCase() &&
		String(item.domain).toLowerCase() === String(domain).toLowerCase() &&
		parseInt(item.port,10) === parseInt(port,10) &&
		item.path === path) return true;

		return false;
		}).length;

		callback(null,count);
		};

		// Get last item in queue...
		FetchQueue.prototype.last = function(callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		callback(null,self[self.length-1]);
		};

		// Get item from queue
		FetchQueue.prototype.get = function(id) {
		if (!isNaN(id) && this.length > id) {
		return this[id];
		FetchQueue.prototype.get = function(id, callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		if (!isNaN(id) && self.length > id) {
		return callback(null,self[id]);
		}
		@@ -62,28 +98,27 @@ }
		// Get first unfetched item in the queue (and return its index)
		FetchQueue.prototype.oldestUnfetchedItem = function() {
		for (var itemIndex = this.oldestUnfetchedIndex; itemIndex < this.length; itemIndex ++) {
		if (this[itemIndex].status === "queued") {
		this.oldestUnfetchedIndex = itemIndex;
		return itemIndex;
		FetchQueue.prototype.oldestUnfetchedItem = function(callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		for (var itemIndex = self.oldestUnfetchedIndex; itemIndex < self.length; itemIndex ++) {
		if (self[itemIndex].status === "queued") {
		self.oldestUnfetchedIndex = itemIndex;
		return callback(null,self[itemIndex]);
		}
		}

		return -1;
		callback(new Error("No unfetched items remain."));
		}


		FetchQueue.prototype.last = function() {
		return this[this.length-1];
		}

		// Gets the maximum total request time, request latency, or download time
		FetchQueue.prototype.max = function(statisticName) {
		var maxStatisticValue = 0;
		FetchQueue.prototype.max = function(statisticName,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var maxStatisticValue = 0, self = this;

		if (allowedStatistics.join().indexOf(statisticName) === -1) {
		// Not a recognised statistic!
		return false;
		return callback(new Error("Invalid statistic."));
		}

		this.forEach(function(item) {
		self.forEach(function(item) {
		if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] > maxStatisticValue) {
		@@ -93,16 +128,17 @@ maxStatisticValue = item.stateData[statisticName];
		});

		return maxStatisticValue;

		callback(null,maxStatisticValue);
		};

		// Gets the minimum total request time, request latency, or download time
		FetchQueue.prototype.min = function(statisticName) {
		var minStatisticValue = Infinity;
		FetchQueue.prototype.min = function(statisticName,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var minStatisticValue = Infinity, self = this;

		if (allowedStatistics.join().indexOf(statisticName) === -1) {
		// Not a recognised statistic!
		return false;
		return callback(new Error("Invalid statistic."));
		}

		this.forEach(function(item) {
		self.forEach(function(item) {
		if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] < minStatisticValue) {
		@@ -113,15 +149,16 @@ minStatisticValue = item.stateData[statisticName];

		return minStatisticValue === Infinity? 0 : minStatisticValue;
		callback(null,minStatisticValue === Infinity? 0 : minStatisticValue);
		};

		// Gets the minimum total request time, request latency, or download time
		FetchQueue.prototype.avg = function(statisticName) {
		var NumberSum = 0, NumberCount = 0;
		FetchQueue.prototype.avg = function(statisticName,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var NumberSum = 0, NumberCount = 0, self = this;

		if (allowedStatistics.join().indexOf(statisticName) === -1) {
		// Not a recognised statistic!
		return false;
		return callback(new Error("Invalid statistic."));
		}

		this.forEach(function(item) {
		self.forEach(function(item) {
		if (item.fetched && item.stateData[statisticName] !== null && !isNaN(item.stateData[statisticName])) {
		@@ -132,11 +169,12 @@ NumberSum += item.stateData[statisticName];
		});

		return NumberSum / NumberCount;

		callback(null,NumberSum / NumberCount);
		};

		// Gets the number of requests which have been completed.
		FetchQueue.prototype.complete = function() {
		var NumberComplete = 0;
		FetchQueue.prototype.complete = function(callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var NumberComplete = 0, self = this;

		this.forEach(function(item) {
		self.forEach(function(item) {
		if (item.fetched) {
		@@ -146,3 +184,4 @@ NumberComplete ++;
		});


		callback(null,NumberComplete);
		return NumberComplete;
		@@ -152,6 +191,7 @@ };
		// Gets the number of queue items with the given status
		FetchQueue.prototype.countWithStatus = function(status) {
		var queueItemsMatched = 0;
		FetchQueue.prototype.countWithStatus = function(status,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var queueItemsMatched = 0, self = this;

		this.forEach(function(item) {
		self.forEach(function(item) {
		if (item.status === status) {
		@@ -162,10 +202,11 @@ queueItemsMatched ++;

		return queueItemsMatched;
		callback(null,queueItemsMatched);
		};

		// Gets the number of queue items with the given status
		FetchQueue.prototype.getWithStatus = function(status) {
		var subqueue = [];

		this.forEach(function(item,index) {
		FetchQueue.prototype.getWithStatus = function(status,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var subqueue = [], self = this;

		self.forEach(function(item,index) {
		if (item.status === status) {
		@@ -176,15 +217,25 @@ subqueue.push(item);
		});

		return subqueue;

		callback(null,subqueue);
		};

		// Gets the number of requests which have failed for some reason
		FetchQueue.prototype.errors = function() {
		return this.countWithStatus("failed") + this.countWithStatus("notfound");
		FetchQueue.prototype.errors = function(callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		self.countWithStatus("failed",function(err1,failed) {
		self.countWithStatus("notfound",function(err2,notfound) {
		callback(null,failed + notfound);
		});
		});
		};

		// Writes the queue to disk
		FetchQueue.prototype.freeze = function(filename) {
		// Re-queue items before freezing...
		this.forEach(function(item) {
		FetchQueue.prototype.freeze = function(filename,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var self = this;

		// Re-queue in-progress items before freezing...
		self.forEach(function(item) {
		if (item.fetched !== true) {
		@@ -195,26 +246,36 @@ item.status = "queued";

		fs.writeFileSync(filename,JSON.stringify(this));
		fs.writeFile(filename,JSON.stringify(self),function(err) {
		callback(err,self);
		});
		};

		// Reads the queue from disk
		FetchQueue.prototype.defrost = function(filename) {
		var fileData;
		try {
		if ((fileData = fs.readFileSync(filename))) {
		var defrostedQueue = JSON.parse(fileData.toString("utf8"));

		for (var index in defrostedQueue) {
		if (defrostedQueue.hasOwnProperty(index) && !isNaN(index)) {
		var queueItem = defrostedQueue[index];
		this.push(queueItem);
		}
		FetchQueue.prototype.defrost = function(filename,callback) {
		callback = callback && callback instanceof Function ? callback : function(){};
		var fileData, self = this, defrostedQueue = [];

		fs.readFile(filename,function(err,fileData) {
		if (err) return callback(err);

		if (!fileData.toString("utf8").length) {
		return callback(new Error("Failed to defrost queue from zero-length JSON."));
		}

		try {
		defrostedQueue = JSON.parse(fileData.toString("utf8"));
		} catch(error) {
		return callback(error);
		}

		for (var index in defrostedQueue) {
		if (defrostedQueue.hasOwnProperty(index) && !isNaN(index)) {
		var queueItem = defrostedQueue[index];
		self.push(queueItem);
		}
		}

		return true;
		} catch(error) {
		return false;
		}

		callback(null,self)
		});
		};

		exports.queue = FetchQueue;

README.markdown

Sorry, the diff of this file is not supported yet

simplecrawler - npm Package Compare versions

Fixed alerts

Improved metrics