Comparing version 0.4.0 to 0.4.1
@@ -200,3 +200,3 @@ 'use strict'; | ||
}); | ||
} else { | ||
} else if (options) { | ||
self._pushToQueue(options); | ||
@@ -266,18 +266,15 @@ } | ||
var self = this; | ||
var cacheData = self.cache[options.uri]; | ||
if (useCache(options)) { | ||
//If a query has already been made to self URL, don't callback again | ||
if (useCache(options) && cacheData) { | ||
var cacheData = self.cache[options.uri]; | ||
//If a query has already been made to self URL, don't callback again | ||
if (cacheData) { | ||
// Make sure we actually have cached data, and not just a note | ||
// that the page was already crawled | ||
if (_.isArray(cacheData)) { | ||
self._onContent(null, options, cacheData[0], true); | ||
} else { | ||
self.emit('pool:release', options); | ||
} | ||
// Make sure we actually have cached data, and not just a note | ||
// that the page was already crawled | ||
if (_.isArray(cacheData)) { | ||
self._onContent(null, options, cacheData[0], true); | ||
} else { | ||
self.emit('pool:release', options); | ||
} | ||
} else { | ||
@@ -284,0 +281,0 @@ self._buildHttpRequest(options); |
{ | ||
"name": "crawler", | ||
"version": "0.4.0", | ||
"version": "0.4.1", | ||
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
[![Build Status](https://travis-ci.org/sylvinus/node-crawler.svg?branch=master)](https://travis-ci.org/sylvinus/node-crawler) | ||
Current Goal | ||
------ | ||
Refactoring the code to be more maintenable, it's spaghetti code in there ! | ||
node-crawler | ||
@@ -200,2 +204,4 @@ ------------ | ||
* Same for the Pool | ||
* Proxy feature | ||
* This issue: https://github.com/sylvinus/node-crawler/issues/118 | ||
* Make Sizzle tests pass (jsdom bug? https://github.com/tmpvar/jsdom/issues#issue/81) | ||
@@ -202,0 +208,0 @@ * More crawling tests |
@@ -34,10 +34,24 @@ 'use strict'; | ||
//describe('Skip Duplicate', function() { | ||
// afterEach(function () { | ||
// c = {}; | ||
// }); | ||
// it('should skip previous crawled urls', function (done) {}); | ||
// it('should not skip one single url', function (done) {}); | ||
//}); | ||
describe('Skip Duplicate active', function() { | ||
afterEach(function () { | ||
c = {}; | ||
}); | ||
it('should not skip one single url', function (done) { | ||
c = new Crawler({ | ||
jquery: false, | ||
skipDuplicates: true, | ||
callback: function (error, result) { | ||
expect(error).to.be.null; | ||
expect(result.statusCode).to.equal(200); | ||
done(); | ||
}, | ||
}); | ||
c.queue('http://' + httpbinHost + '/status/200'); | ||
}); | ||
//it('should skip previous crawled urls', function (done) {}); | ||
}); | ||
}); | ||
@@ -51,2 +51,32 @@ 'use strict'; | ||
}); | ||
it('should not return an error on status code 400 (Bad Request)', function(done) { | ||
c.queue({ | ||
uri: 'http://' + httpbinHost + '/status/400', | ||
callback: function(error, response, $){ | ||
expect(error).to.be.null; | ||
expect(response.statusCode).to.equal(400); | ||
done(); | ||
} | ||
}); | ||
}); | ||
it('should not return an error on status code 401 (Unauthorized)', function(done) { | ||
c.queue({ | ||
uri: 'http://' + httpbinHost + '/status/401', | ||
callback: function(error, response, $){ | ||
expect(error).to.be.null; | ||
expect(response.statusCode).to.equal(401); | ||
done(); | ||
} | ||
}); | ||
}); | ||
it('should not return an error on status code 403 (Forbidden)', function(done) { | ||
c.queue({ | ||
uri: 'http://' + httpbinHost + '/status/403', | ||
callback: function(error, response, $){ | ||
expect(error).to.be.null; | ||
expect(response.statusCode).to.equal(403); | ||
done(); | ||
} | ||
}); | ||
}); | ||
it('should not return an error on a 404', function(done) { | ||
@@ -53,0 +83,0 @@ c.queue({ |
@@ -5,8 +5,9 @@ 'use strict'; | ||
var expect = require('chai').expect; | ||
var sinon = require('sinon'); | ||
var httpbinHost = 'localhost:8000'; | ||
var c; | ||
var c, spy; | ||
describe('Uri Options', function() { | ||
afterEach(function() { | ||
c = {}; | ||
c = spy = {}; | ||
}); | ||
@@ -51,2 +52,16 @@ it('should work if uri is a function', function(done) { | ||
}); | ||
it('should skip if the uri is undefined or an empty string', function(done) { | ||
c = new Crawler({ | ||
onDrain: function() { | ||
expect(spy.calledOnce).to.be.true; | ||
done(); | ||
}, | ||
callback: function(error, result) { | ||
expect(typeof result.statusCode).to.equal('number'); | ||
expect(result.statusCode).to.equal(200); | ||
} | ||
}); | ||
spy = sinon.spy(c, '_pushToQueue'); | ||
c.queue([undefined, 'http://'+httpbinHost]); | ||
}); | ||
}); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
232971
1832
216
0