New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

crawler

Package Overview
Dependencies
Maintainers
4
Versions
40
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler - npm Package Compare versions

Comparing version

to
1.2.2

9

CHANGELOG.md

@@ -1,4 +0,11 @@

node-webcrawler ChangeLog
node-crawler ChangeLog
-------------------------
1.2.2
- [#353](https://github.com/bda-research/node-crawler/pull/353) Release automate (@mike442144)
- [#338](https://github.com/bda-research/node-crawler/pull/338) #comment Adding support for Https socks5. Agent is imported directly … (@djpavlovic)
- [#336](https://github.com/bda-research/node-crawler/pull/336) Update README.md (@DanielHabenicht)
- [#329](https://github.com/bda-research/node-crawler/pull/329) add support for removeRefererHeader request option to preserve referer during redirects (@petskratt)
- [#314](https://github.com/bda-research/node-crawler/pull/314) docs: fix typo (@Jason-Cooke)
1.2.1

@@ -5,0 +12,0 @@ * [#310](https://github.com/bda-research/node-crawler/issues/310) Upgrade dependencies' version(@mike442144)

7

lib/crawler.js

@@ -112,3 +112,3 @@

self.seen.initialize().then(()=> log('debug', 'seenreq is initialized.')).catch(e => log('error', e));
self.on('_release', function(){

@@ -345,3 +345,3 @@ log('debug','Queue size: %d',this.queueSize);

var requestArgs = ['uri','url','qs','method','headers','body','form','formData','json','multipart','followRedirect','followAllRedirects', 'maxRedirects','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever', 'agent'];
var requestArgs = ['uri','url','qs','method','headers','body','form','formData','json','multipart','followRedirect','followAllRedirects','maxRedirects','removeRefererHeader','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever', 'agent', 'strictSSL', 'agentOptions', 'agentClass'];

@@ -371,6 +371,5 @@ request(_.pick.apply(self,[ropts].concat(requestArgs)), function(error,response) {

if (options.retries) {
self.options.skipDuplicates = false;
setTimeout(function() {
options.retries--;
self.queue(options);
self._schedule(options);
options.release();

@@ -377,0 +376,0 @@ },options.retryTimeout);

{
"name": "crawler",
"version": "1.2.1",
"version": "1.2.2",
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously",

@@ -5,0 +5,0 @@ "main": "./lib/crawler.js",

@@ -167,3 +167,3 @@

Crawler picks options only needed by request, so dont't worry about the redundance.
Crawler picks options only needed by request, so don't worry about the redundance.

@@ -175,2 +175,3 @@ ## Raw body

```js
var Crawler = require("crawler");
var fs = require('fs');

@@ -422,4 +423,30 @@

* `options.referer`: [String](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#String_type) If truthy sets the HTTP referer header
* `options.removeRefererHeader`: [Boolean](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Boolean_type) If true preserves the set referer during redirects
* `options.headers`: [Object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object) Raw key-value of http headers
### Https socks5
```js
const Agent = require('socks5-https-client/lib/Agent');
//...
var c = new Crawler({
// rateLimit: 2000,
maxConnections: 20,
agentClass: Agent, //adding socks5 https agent
method: 'GET',
strictSSL: true,
agentOptions: {
socksHost: 'localhost',
socksPort: 9050
},
// debug: true,
callback: function (error, res, done) {
if (error) {
console.log(error);
} else {
//
}
done();
}
});
```

@@ -426,0 +453,0 @@

Sorry, the diff of this file is not supported yet