Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler

Package Overview
Dependencies
Maintainers
2
Versions
40
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler - npm Package Compare versions

Comparing version 0.4.0 to 0.4.1

25

lib/crawler.js

@@ -200,3 +200,3 @@ 'use strict';

});
} else {
} else if (options) {
self._pushToQueue(options);

@@ -266,18 +266,15 @@ }

var self = this;
var cacheData = self.cache[options.uri];
if (useCache(options)) {
//If a query has already been made to self URL, don't callback again
if (useCache(options) && cacheData) {
var cacheData = self.cache[options.uri];
//If a query has already been made to self URL, don't callback again
if (cacheData) {
// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}
// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}
} else {

@@ -284,0 +281,0 @@ self._buildHttpRequest(options);

{
"name": "crawler",
"version": "0.4.0",
"version": "0.4.1",
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!",

@@ -5,0 +5,0 @@ "keywords": [

[![Build Status](https://travis-ci.org/sylvinus/node-crawler.svg?branch=master)](https://travis-ci.org/sylvinus/node-crawler)
Current Goal
------
Refactoring the code to be more maintenable, it's spaghetti code in there !
node-crawler

@@ -200,2 +204,4 @@ ------------

* Same for the Pool
* Proxy feature
* This issue: https://github.com/sylvinus/node-crawler/issues/118
* Make Sizzle tests pass (jsdom bug? https://github.com/tmpvar/jsdom/issues#issue/81)

@@ -202,0 +208,0 @@ * More crawling tests

@@ -34,10 +34,24 @@ 'use strict';

//describe('Skip Duplicate', function() {
// afterEach(function () {
// c = {};
// });
// it('should skip previous crawled urls', function (done) {});
// it('should not skip one single url', function (done) {});
//});
describe('Skip Duplicate active', function() {
afterEach(function () {
c = {};
});
it('should not skip one single url', function (done) {
c = new Crawler({
jquery: false,
skipDuplicates: true,
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
done();
},
});
c.queue('http://' + httpbinHost + '/status/200');
});
//it('should skip previous crawled urls', function (done) {});
});
});

@@ -51,2 +51,32 @@ 'use strict';

});
it('should not return an error on status code 400 (Bad Request)', function(done) {
c.queue({
uri: 'http://' + httpbinHost + '/status/400',
callback: function(error, response, $){
expect(error).to.be.null;
expect(response.statusCode).to.equal(400);
done();
}
});
});
it('should not return an error on status code 401 (Unauthorized)', function(done) {
c.queue({
uri: 'http://' + httpbinHost + '/status/401',
callback: function(error, response, $){
expect(error).to.be.null;
expect(response.statusCode).to.equal(401);
done();
}
});
});
it('should not return an error on status code 403 (Forbidden)', function(done) {
c.queue({
uri: 'http://' + httpbinHost + '/status/403',
callback: function(error, response, $){
expect(error).to.be.null;
expect(response.statusCode).to.equal(403);
done();
}
});
});
it('should not return an error on a 404', function(done) {

@@ -53,0 +83,0 @@ c.queue({

@@ -5,8 +5,9 @@ 'use strict';

var expect = require('chai').expect;
var sinon = require('sinon');
var httpbinHost = 'localhost:8000';
var c;
var c, spy;
describe('Uri Options', function() {
afterEach(function() {
c = {};
c = spy = {};
});

@@ -51,2 +52,16 @@ it('should work if uri is a function', function(done) {

});
it('should skip if the uri is undefined or an empty string', function(done) {
c = new Crawler({
onDrain: function() {
expect(spy.calledOnce).to.be.true;
done();
},
callback: function(error, result) {
expect(typeof result.statusCode).to.equal('number');
expect(result.statusCode).to.equal(200);
}
});
spy = sinon.spy(c, '_pushToQueue');
c.queue([undefined, 'http://'+httpbinHost]);
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc