Comparing version 0.4.1 to 0.4.2
@@ -189,15 +189,14 @@ 'use strict'; | ||
//Did I get a list ? Queue all the URLs. | ||
if (_.isArray(options)) { | ||
var queueLength = options.length; | ||
for (var i = 0; i < queueLength; i++) { | ||
self.queue(options[i]); | ||
} | ||
// Did I get a simple string instead, convert to object and queue | ||
} else if (_.isString(options)) { | ||
self._pushToQueue({ | ||
uri: options | ||
}); | ||
} else if (options) { | ||
self._pushToQueue(options); | ||
// Did you get a single object or string? Make it compatible. | ||
options = _.isString(options) || _.isPlainObject(options) ? [ options ] : options; | ||
if (options !== undefined && options.length == 1) { | ||
self._pushToQueue( | ||
_.isString(options[0]) ? { uri: options[0] } : options[0] | ||
); | ||
// Did you get multiple requests? Queue the URLs. | ||
} else if (options !== undefined) { | ||
self.queue( | ||
_.isString(options[0]) ? { uri: options[0] } : options[0] | ||
); | ||
self.queue(options.slice(1)) | ||
} | ||
@@ -261,3 +260,3 @@ }; | ||
self._executeCrawlerRequest(options); | ||
} | ||
} | ||
}; | ||
@@ -279,3 +278,3 @@ | ||
} | ||
} else { | ||
@@ -478,2 +477,2 @@ self._buildHttpRequest(options); | ||
module.exports = Crawler; | ||
module.exports.VERSION = '0.3.1'; | ||
module.exports.VERSION = '0.3.1'; |
{ | ||
"name": "crawler", | ||
"version": "0.4.1", | ||
"version": "0.4.2", | ||
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!", | ||
@@ -57,3 +57,3 @@ "keywords": [ | ||
"sinon": "1.11.1", | ||
"jsdom": "0.11.1" | ||
"jsdom": "3.1.1" | ||
}, | ||
@@ -60,0 +60,0 @@ "scripts": { |
[![Build Status](https://travis-ci.org/sylvinus/node-crawler.svg?branch=master)](https://travis-ci.org/sylvinus/node-crawler) | ||
Current Goal | ||
Node Crawler is no more maintained | ||
------ | ||
Refactoring the code to be more maintenable, it's spaghetti code in there ! | ||
Have a look at alternatives modules: | ||
* [node-spider](https://github.com/mikeal/spider) | ||
* [node-simplecrawler](https://github.com/cgiffard/node-simplecrawler) | ||
* [phantomJS](http://phantomjs.org/) | ||
node-crawler | ||
@@ -19,3 +23,3 @@ ------------ | ||
* A local cache | ||
* node 0.8 and 0.10 support | ||
* node 0.10 and 0.12 support | ||
@@ -81,3 +85,3 @@ The argument for creating this package was made at ParisJS #2 in 2010 ( [lightning talk slides](http://www.slideshare.net/sylvinus/web-crawling-with-nodejs) ) | ||
``` | ||
For more examples, look at the [tests](https://github.com/sylvinus/node-crawler/tree/master/test). | ||
For more examples, look at the [tests](https://github.com/sylvinus/node-crawler/tree/master/tests). | ||
@@ -117,3 +121,3 @@ Options reference | ||
* `jQuery`: true, false or ConfObject (Default true) | ||
* `jQuery`: true, false or ConfObject (Default true) | ||
see below [Working with Cheerio or JSDOM](https://github.com/paulvalla/node-crawler/blob/master/README.md#working-with-cheerio-or-jsdom) | ||
@@ -141,3 +145,3 @@ | ||
Crawler by default use [Cheerio](https://github.com/cheeriojs/cheerio) instead of [Jsdom](https://github.com/tmpvar/jsdom). Jsdom is more robust but can be hard to install (espacially on windows) because of [contextify](https://github.com/tmpvar/jsdom#contextify). | ||
Crawler by default use [Cheerio](https://github.com/cheeriojs/cheerio) instead of [Jsdom](https://github.com/tmpvar/jsdom). Jsdom is more robust but can be hard to install (espacially on windows) because of [contextify](https://github.com/tmpvar/jsdom#contextify). | ||
Which is why, if you want to use jsdom you will have to build it, and `require('jsdom')` in your own script before passing it to crawler. This is to avoid cheerio crawler user to build jsdom when installing crawler. | ||
@@ -155,3 +159,3 @@ | ||
normalizeWhitespace: true, | ||
xmlMode: true | ||
xmlMode: true | ||
} | ||
@@ -171,3 +175,3 @@ } | ||
For a full list of options and their effects, see [this](https://github.com/fb55/DomHandler) and | ||
[htmlparser2's options](https://github.com/fb55/htmlparser2/wiki/Parser-options). | ||
[htmlparser2's options](https://github.com/fb55/htmlparser2/wiki/Parser-options). | ||
[source](https://github.com/cheeriojs/cheerio#loading) | ||
@@ -201,3 +205,15 @@ | ||
### Alternative: Docker | ||
After [installing Docker](http://docs.docker.com/), you can run: | ||
// Builds the local test environment | ||
$ docker build -t node-crawler . | ||
// Runs tests | ||
$ docker run node-crawler sh -c "gunicorn httpbin:app -b 127.0.0.1:8000 -w 6 --daemon && npm install && npm test" | ||
// You can also ssh into the container for easier debugging | ||
$ docker run -i -t node-crawler bash | ||
[![build status](https://secure.travis-ci.org/sylvinus/node-crawler.png)](http://travis-ci.org/sylvinus/node-crawler) | ||
@@ -204,0 +220,0 @@ |
@@ -120,3 +120,3 @@ 'use strict'; | ||
}); | ||
it('should return an error on a malformed html if jQuery is jsdom', function(done) { | ||
it('should not return an error on a malformed html if jQuery is jsdom', function(done) { | ||
c.queue({ | ||
@@ -126,4 +126,4 @@ html : '<html><p>hello <div>dude</p></html>', | ||
callback : function(error, response) { | ||
expect(error).not.to.be.null; | ||
expect(response).to.be.undefined; | ||
expect(error).to.be.null; | ||
expect(response).not.to.be.undefined; | ||
done(); | ||
@@ -130,0 +130,0 @@ } |
Sorry, the diff of this file is not supported yet
233777
22
232