Comparing version
@@ -1,4 +0,11 @@ | ||
node-webcrawler ChangeLog | ||
node-crawler ChangeLog | ||
------------------------- | ||
1.2.2 | ||
- [#353](https://github.com/bda-research/node-crawler/pull/353) Release automate (@mike442144) | ||
- [#338](https://github.com/bda-research/node-crawler/pull/338) #comment Adding support for Https socks5. Agent is imported directly … (@djpavlovic) | ||
- [#336](https://github.com/bda-research/node-crawler/pull/336) Update README.md (@DanielHabenicht) | ||
- [#329](https://github.com/bda-research/node-crawler/pull/329) add support for removeRefererHeader request option to preserve referer during redirects (@petskratt) | ||
- [#314](https://github.com/bda-research/node-crawler/pull/314) docs: fix typo (@Jason-Cooke) | ||
1.2.1 | ||
@@ -5,0 +12,0 @@ * [#310](https://github.com/bda-research/node-crawler/issues/310) Upgrade dependencies' version(@mike442144) |
@@ -112,3 +112,3 @@ | ||
self.seen.initialize().then(()=> log('debug', 'seenreq is initialized.')).catch(e => log('error', e)); | ||
self.on('_release', function(){ | ||
@@ -345,3 +345,3 @@ log('debug','Queue size: %d',this.queueSize); | ||
var requestArgs = ['uri','url','qs','method','headers','body','form','formData','json','multipart','followRedirect','followAllRedirects', 'maxRedirects','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever', 'agent']; | ||
var requestArgs = ['uri','url','qs','method','headers','body','form','formData','json','multipart','followRedirect','followAllRedirects','maxRedirects','removeRefererHeader','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever', 'agent', 'strictSSL', 'agentOptions', 'agentClass']; | ||
@@ -371,6 +371,5 @@ request(_.pick.apply(self,[ropts].concat(requestArgs)), function(error,response) { | ||
if (options.retries) { | ||
self.options.skipDuplicates = false; | ||
setTimeout(function() { | ||
options.retries--; | ||
self.queue(options); | ||
self._schedule(options); | ||
options.release(); | ||
@@ -377,0 +376,0 @@ },options.retryTimeout); |
{ | ||
"name": "crawler", | ||
"version": "1.2.1", | ||
"version": "1.2.2", | ||
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously", | ||
@@ -5,0 +5,0 @@ "main": "./lib/crawler.js", |
@@ -167,3 +167,3 @@ | ||
Crawler picks options only needed by request, so dont't worry about the redundance. | ||
Crawler picks options only needed by request, so don't worry about the redundance. | ||
@@ -175,2 +175,3 @@ ## Raw body | ||
```js | ||
var Crawler = require("crawler"); | ||
var fs = require('fs'); | ||
@@ -422,4 +423,30 @@ | ||
* `options.referer`: [String](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#String_type) If truthy sets the HTTP referer header | ||
* `options.removeRefererHeader`: [Boolean](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Boolean_type) If true preserves the set referer during redirects | ||
* `options.headers`: [Object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object) Raw key-value of http headers | ||
### Https socks5 | ||
```js | ||
const Agent = require('socks5-https-client/lib/Agent'); | ||
//... | ||
var c = new Crawler({ | ||
// rateLimit: 2000, | ||
maxConnections: 20, | ||
agentClass: Agent, //adding socks5 https agent | ||
method: 'GET', | ||
strictSSL: true, | ||
agentOptions: { | ||
socksHost: 'localhost', | ||
socksPort: 9050 | ||
}, | ||
// debug: true, | ||
callback: function (error, res, done) { | ||
if (error) { | ||
console.log(error); | ||
} else { | ||
// | ||
} | ||
done(); | ||
} | ||
}); | ||
``` | ||
@@ -426,0 +453,0 @@ |
Sorry, the diff of this file is not supported yet
851981
0.16%531
5.36%2557
-0.04%