crawler - npm Package Compare versions

Comparing version 0.4.0 to 0.4.1

lib/crawler.js

		@@ -200,3 +200,3 @@ 'use strict';
		});
		} else {
		} else if (options) {
		self._pushToQueue(options);
		@@ -266,18 +266,15 @@ }
		var self = this;
		var cacheData = self.cache[options.uri];

		if (useCache(options)) {
		//If a query has already been made to self URL, don't callback again
		if (useCache(options) && cacheData) {

		var cacheData = self.cache[options.uri];

		//If a query has already been made to self URL, don't callback again
		if (cacheData) {

		// Make sure we actually have cached data, and not just a note
		// that the page was already crawled
		if (_.isArray(cacheData)) {
		self._onContent(null, options, cacheData[0], true);
		} else {
		self.emit('pool:release', options);
		}
		// Make sure we actually have cached data, and not just a note
		// that the page was already crawled
		if (_.isArray(cacheData)) {
		self._onContent(null, options, cacheData[0], true);
		} else {
		self.emit('pool:release', options);
		}

		} else {
		@@ -284,0 +281,0 @@ self._buildHttpRequest(options);

package.json

		{
		"name": "crawler",
		"version": "0.4.0",
		"version": "0.4.1",
		"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!",
		@@ -5,0 +5,0 @@ "keywords": [

README.md

		[![Build Status](https://travis-ci.org/sylvinus/node-crawler.svg?branch=master)](https://travis-ci.org/sylvinus/node-crawler)

		Current Goal
		------
		Refactoring the code to be more maintenable, it's spaghetti code in there !

		node-crawler
		@@ -200,2 +204,4 @@ ------------
		* Same for the Pool
		* Proxy feature
		* This issue: https://github.com/sylvinus/node-crawler/issues/118
		* Make Sizzle tests pass (jsdom bug? https://github.com/tmpvar/jsdom/issues#issue/81)
		@@ -202,0 +208,0 @@ * More crawling tests

tests/cacheOption.test.js

		@@ -34,10 +34,24 @@ 'use strict';

		//describe('Skip Duplicate', function() {
		// afterEach(function () {
		// c = {};
		// });
		// it('should skip previous crawled urls', function (done) {});
		// it('should not skip one single url', function (done) {});
		//});
		describe('Skip Duplicate active', function() {
		afterEach(function () {
		c = {};
		});

		it('should not skip one single url', function (done) {
		c = new Crawler({
		jquery: false,
		skipDuplicates: true,
		callback: function (error, result) {
		expect(error).to.be.null;
		expect(result.statusCode).to.equal(200);
		done();
		},
		});

		c.queue('http://' + httpbinHost + '/status/200');
		});

		//it('should skip previous crawled urls', function (done) {});
		});
		});

tests/errorHandling.test.js

		@@ -51,2 +51,32 @@ 'use strict';
		});
		it('should not return an error on status code 400 (Bad Request)', function(done) {
		c.queue({
		uri: 'http://' + httpbinHost + '/status/400',
		callback: function(error, response, $){
		expect(error).to.be.null;
		expect(response.statusCode).to.equal(400);
		done();
		}
		});
		});
		it('should not return an error on status code 401 (Unauthorized)', function(done) {
		c.queue({
		uri: 'http://' + httpbinHost + '/status/401',
		callback: function(error, response, $){
		expect(error).to.be.null;
		expect(response.statusCode).to.equal(401);
		done();
		}
		});
		});
		it('should not return an error on status code 403 (Forbidden)', function(done) {
		c.queue({
		uri: 'http://' + httpbinHost + '/status/403',
		callback: function(error, response, $){
		expect(error).to.be.null;
		expect(response.statusCode).to.equal(403);
		done();
		}
		});
		});
		it('should not return an error on a 404', function(done) {
		@@ -53,0 +83,0 @@ c.queue({

tests/uriOption.test.js

		@@ -5,8 +5,9 @@ 'use strict';
		var expect = require('chai').expect;
		var sinon = require('sinon');
		var httpbinHost = 'localhost:8000';
		var c;
		var c, spy;

		describe('Uri Options', function() {
		afterEach(function() {
		c = {};
		c = spy = {};
		});
		@@ -51,2 +52,16 @@ it('should work if uri is a function', function(done) {
		});
		it('should skip if the uri is undefined or an empty string', function(done) {
		c = new Crawler({
		onDrain: function() {
		expect(spy.calledOnce).to.be.true;
		done();
		},
		callback: function(error, result) {
		expect(typeof result.statusCode).to.equal('number');
		expect(result.statusCode).to.equal(200);
		}
		});
		spy = sinon.spy(c, '_pushToQueue');
		c.queue([undefined, 'http://'+httpbinHost]);
		});
		});

Fixed alerts