node-crawler ChangeLog
		-------------------------

		1.3.0
		- [#367](https://github.com/bda-research/node-crawler/pull/367) add http2 functionality (@BeijingProtoHuman)
		- [#364](https://github.com/bda-research/node-crawler/pull/364) Fix some typos (@pzmarzly)
		- [#363](https://github.com/bda-research/node-crawler/pull/363) Remove stale vendored jQuery version (@pzmarzly)

		1.2.2
		@@ -5,0 +10,0 @@ - [#353](https://github.com/bda-research/node-crawler/pull/353) Release automate (@mike442144)

examples/README.md

		@@ -11,3 +11,3 @@ # Node Crawler Examples
		### Use Proxy with Crawler
		Most large scale webscraping tasks requires us to perform countless amounts of access to a specific website. This could be higly risky using only one IP address since the website could permanately or temporarily block our IP address. Instead, we can use a proxy that gives us the freedom to access websites using multiple different IPs. Below is an example of how to use a proxy with Crawler:
		Most large scale webscraping tasks requires us to perform countless amounts of access to a specific website. This could be very risky using only one IP address since the website could permanently or temporarily block our IP address. Instead, we can use a proxy that gives us the freedom to access websites using multiple different IPs. Below is an example of how to use a proxy with Crawler:
		```javascript
		@@ -32,3 +32,3 @@ const Crawler = require("crawler");
		Some of our web scraping tasks involves downloading images or other file types, like grabbing images to train image recognition algorithms.
		With crawler, a few settings will do the trick; simply set ```encoding``` and ```jQurey``` options to ```null``` and ```false``` respectively when queuing a task. Below is an example of downloading images with Crawler:
		With crawler, a few settings will do the trick; simply set ```encoding``` and ```jQuery``` options to ```null``` and ```false``` respectively when queuing a task. Below is an example of downloading images with Crawler:
		```javascript
		@@ -35,0 +35,0 @@ const Crawler = require("crawler");

463

lib/crawler.js

		@@ -1,2 +0,1 @@

		'use strict';
		@@ -14,17 +13,27 @@
		, iconvLite = require('iconv-lite')
		, typeis = require('type-is').is;
		, typeis = require('type-is').is
		, qs = require('querystring'),
		URL = require('url').URL;

		var whacko=null, level, levels = ['silly','debug','verbose','info','warn','error','critical'];
		try{
		//NOTE for polyfill purpose, cause the http2 is just table for node 10.0
		let http2;
		try {
		http2 = require('http2');
		} catch (e) {
		//NOTE leave it empty for pass eslint
		}

		var whacko = null, level, levels = ['silly', 'debug', 'verbose', 'info', 'warn', 'error', 'critical'];
		try {
		whacko = require('whacko');
		}catch(e){
		} catch (e) {
		e.code;
		}

		function defaultLog(){ //2016-11-24T12:22:55.639Z - debug:
		if( levels.indexOf(arguments[0]) >= levels.indexOf(level) )
		console.log(new Date().toJSON()+' - '+ arguments[0] +': CRAWLER %s', util.format.apply(util, Array.prototype.slice.call(arguments, 1)));
		function defaultLog() { //2016-11-24T12:22:55.639Z - debug:
		if (levels.indexOf(arguments[0]) >= levels.indexOf(level))
		console.log(new Date().toJSON() + ' - ' + arguments[0] + ': CRAWLER %s', util.format.apply(util, Array.prototype.slice.call(arguments, 1)));
		}

		function checkJQueryNaming (options) {
		function checkJQueryNaming(options) {
		if ('jquery' in options) {
		@@ -37,5 +46,5 @@ options.jQuery = options.jquery;

		function readJqueryUrl (url, callback) {
		function readJqueryUrl(url, callback) {
		if (url.match(/^(file:\/\/\|\w+:\|\/)/)) {
		fs.readFile(url.replace(/^file:\/\//,''),'utf-8', function(err,jq) {
		fs.readFile(url.replace(/^file:\/\//, ''), 'utf-8', function (err, jq) {
		callback(err, jq);
		@@ -48,7 +57,7 @@ });

		function contentType(res){
		return get(res,'content-type').split(';').filter(item => item.trim().length !== 0).join(';');
		function contentType(res) {
		return get(res, 'content-type').split(';').filter(item => item.trim().length !== 0).join(';');
		}

		function get(res,field){
		function get(res, field) {
		return res.headers[field.toLowerCase()] \|\| '';
		@@ -59,7 +68,7 @@ }

		function Crawler (options) {
		function Crawler(options) {
		var self = this;

		options = options\|\|{};
		if(['onDrain','cache'].some(key => key in options)){
		options = options \|\| {};
		if (['onDrain', 'cache'].some(key => key in options)) {
		throw new Error('Support for "onDrain", "cache" has been removed! For more details, see https://github.com/bda-research/node-crawler');
		@@ -73,26 +82,27 @@ }

		Crawler.prototype.init = function init (options) {
		Crawler.prototype.init = function init(options) {
		var self = this;

		var defaultOptions = {
		autoWindowClose: true,
		forceUTF8: true,
		gzip: true,
		incomingEncoding: null,
		jQuery: true,
		maxConnections: 10,
		method: 'GET',
		priority: 5,
		priorityRange: 10,
		rateLimit: 0,
		referer: false,
		retries: 3,
		retryTimeout: 10000,
		timeout: 15000,
		skipDuplicates: false,
		rotateUA: false,
		homogeneous: false
		autoWindowClose: true,
		forceUTF8: true,
		gzip: true,
		incomingEncoding: null,
		jQuery: true,
		maxConnections: 10,
		method: 'GET',
		priority: 5,
		priorityRange: 10,
		rateLimit: 0,
		referer: false,
		retries: 3,
		retryTimeout: 10000,
		timeout: 15000,
		skipDuplicates: false,
		rotateUA: false,
		homogeneous: false,
		http2: false
		};

		//return defaultOptions with overriden properties from options.
		// return defaultOptions with overridden properties from options.
		self.options = _.extend(defaultOptions, options);
		@@ -106,7 +116,10 @@

		self.limiters = new Bottleneck.Cluster(self.options.maxConnections,self.options.rateLimit,self.options.priorityRange, self.options.priority, self.options.homogeneous);
		self.limiters = new Bottleneck.Cluster(self.options.maxConnections, self.options.rateLimit, self.options.priorityRange, self.options.priority, self.options.homogeneous);

		//maintain the http2 sessions
		self.http2Connections = {};

		level = self.options.debug ? 'debug' : 'info';

		if(self.options.logger)
		if (self.options.logger)
		log = self.options.logger.log.bind(self.options.logger);
		@@ -117,17 +130,20 @@
		self.seen = new seenreq(self.options.seenreq);
		self.seen.initialize().then(()=> log('debug', 'seenreq is initialized.')).catch(e => log('error', e));
		self.seen.initialize().then(() => log('debug', 'seenreq is initialized.')).catch(e => log('error', e));

		self.on('_release', function(){
		log('debug','Queue size: %d',this.queueSize);
		self.on('_release', function () {
		log('debug', 'Queue size: %d', this.queueSize);

		if(this.limiters.empty)
		if (this.limiters.empty) {

		if (Object.keys(self.http2Connections).length > 0) self._clearHttp2Session();
		return this.emit('drain');
		}
		});
		};

		Crawler.prototype.setLimiterProperty = function setLimiterProperty (limiter, property, value) {
		Crawler.prototype.setLimiterProperty = function setLimiterProperty(limiter, property, value) {
		var self = this;

		switch(property) {
		case 'rateLimit': self.limiters.key(limiter).setRateLimit(value);break;
		switch (property) {
		case 'rateLimit': self.limiters.key(limiter).setRateLimit(value); break;
		default: break;
		@@ -137,7 +153,44 @@ }

		Crawler.prototype._inject = function _inject (response, options, callback) {

		Crawler.prototype.generateHttp2RequestLine = function (options) {
		const urlObj = new URL(options.uri);

		const requestLine = {
		':method': options.method \|\| 'GET',
		':path': urlObj.pathname,
		':scheme': urlObj.protocol.replace(':', ''),
		':authority': urlObj.hostname
		};

		return requestLine;
		};

		Crawler.prototype.generateHttp2RequestBody = function (options) {
		let data = null;
		if (options.form) {
		if (!/^application\/x-www-form-urlencoded\b/.test(options.headers['content-type'])) {
		options.headers['content-type'] = 'application/x-www-form-urlencoded';
		}

		data = (typeof options.form === 'string') ? encodeURIComponent(options.form) : qs.stringify(options.form);
		} else if (options.json) {
		if (!/^application\/x-www-form-urlencoded\b/.test(options.headers['content-type'])) {
		data = JSON.stringify(options.body);
		}

		if (!options.headers['contentn-type']) options.headers['content-type'] = 'application/json';

		} else if (options.body !== undefined) {
		data = options.body;
		}

		//NOTE the default situation do nothing to the
		return data;
		};

		Crawler.prototype._inject = function _inject(response, options, callback) {
		var $;

		if (options.jQuery === 'whacko') {
		if(!whacko){
		if (!whacko) {
		throw new Error('Please install whacko by your own since `crawler` detected you specify explicitly');
		@@ -148,3 +201,3 @@ }
		callback(null, response, options, $);
		}else if (options.jQuery === 'cheerio' \|\| options.jQuery.name === 'cheerio' \|\| options.jQuery === true) {
		} else if (options.jQuery === 'cheerio' \|\| options.jQuery.name === 'cheerio' \|\| options.jQuery === true) {
		var defaultCheerioOptions = {
		@@ -159,3 +212,3 @@ normalizeWhitespace: false,
		callback(null, response, options, $);
		}else if (options.jQuery.jsdom) {
		} else if (options.jQuery.jsdom) {
		var jsdom = options.jQuery.jsdom;
		@@ -165,3 +218,3 @@ var scriptLocation = path.resolve(__dirname, '../vendor/jquery-2.1.1.min.js');
		//Use promises
		readJqueryUrl(scriptLocation, function(err, jquery) {
		readJqueryUrl(scriptLocation, function (err, jquery) {
		try {
		@@ -180,3 +233,3 @@ jsdom.env({
		} catch (err) {
		log('error',err);
		log('error', err);
		}
		@@ -187,3 +240,3 @@
		} catch (e) {
		options.callback(e,{options}, options.release);
		options.callback(e, { options }, options.release);
		}
		@@ -198,15 +251,15 @@ });

		Crawler.prototype.isIllegal = function isIllegal (options) {
		Crawler.prototype.isIllegal = function isIllegal(options) {
		return (_.isNull(options) \|\| _.isUndefined(options) \|\| (!_.isString(options) && !_.isPlainObject(options)));
		};

		Crawler.prototype.direct = function direct (options) {
		Crawler.prototype.direct = function direct(options) {
		var self = this;

		if(self.isIllegal(options) \|\| !_.isPlainObject(options)) {
		return log('warn','Illegal queue option: ', JSON.stringify(options));
		if (self.isIllegal(options) \|\| !_.isPlainObject(options)) {
		return log('warn', 'Illegal queue option: ', JSON.stringify(options));
		}

		if(!('callback' in options) \|\| !_.isFunction(options.callback)) {
		return log('warn','must specify callback function when using sending direct request with crawler');
		if (!('callback' in options) \|\| !_.isFunction(options.callback)) {
		return log('warn', 'must specify callback function when using sending direct request with crawler');
		}
		@@ -232,3 +285,3 @@

		Crawler.prototype.queue = function queue (options) {
		Crawler.prototype.queue = function queue(options) {
		var self = this;
		@@ -241,9 +294,9 @@

		for(var i = 0; i < options.length; ++i) {
		if(self.isIllegal(options[i])) {
		log('warn','Illegal queue option: ', JSON.stringify(options[i]));
		for (var i = 0; i < options.length; ++i) {
		if (self.isIllegal(options[i])) {
		log('warn', 'Illegal queue option: ', JSON.stringify(options[i]));
		continue;
		}
		self._pushToQueue(
		_.isString(options[i]) ? {uri: options[i]} : options[i]
		_.isString(options[i]) ? { uri: options[i] } : options[i]
		);
		@@ -253,3 +306,3 @@ }

		Crawler.prototype._pushToQueue = function _pushToQueue (options) {
		Crawler.prototype._pushToQueue = function _pushToQueue(options) {
		var self = this;
		@@ -268,3 +321,3 @@
		// If duplicate skipping is enabled, avoid queueing entirely for URLs we already crawled
		if (!self.options.skipDuplicates){
		if (!self.options.skipDuplicates) {
		self._schedule(options);
		@@ -275,3 +328,3 @@ return;
		self.seen.exists(options, options.seenreq).then(rst => {
		if(!rst){
		if (!rst) {
		self._schedule(options);
		@@ -282,9 +335,10 @@ }

		Crawler.prototype._schedule = function _scheduler(options){
		Crawler.prototype._schedule = function _scheduler(options) {
		var self = this;
		self.emit('schedule',options);
		//NOTE this will be used to add proxy outside the class
		self.emit('schedule', options);

		self.limiters.key(options.limiter\|\|'default').submit(options.priority,function(done, limiter){
		options.release = function(){ done();self.emit('_release'); };
		if(!options.callback)
		self.limiters.key(options.limiter \|\| 'default').submit(options.priority, function (done, limiter) {
		options.release = function () { done(); self.emit('_release'); };
		if (!options.callback)
		options.callback = options.release;
		@@ -297,5 +351,5 @@
		if (options.html) {
		self._onContent(null, options, {body:options.html,headers:{'content-type':'text/html'}});
		self._onContent(null, options, { body: options.html, headers: { 'content-type': 'text/html' } });
		} else if (typeof options.uri === 'function') {
		options.uri(function(uri) {
		options.uri(function (uri) {
		options.uri = uri;
		@@ -308,10 +362,26 @@ self._buildHttpRequest(options);
		});

		};

		Crawler.prototype._buildHttpRequest = function _buildHTTPRequest (options) {
		Crawler.prototype._clearHttp2Session = function _clearHttp2Session() {
		log('debug', `Crawler clear all ${Object.keys(this.http2Connections).length} http2 connections`);
		Object.keys(this.http2Connections).forEach(hostName => {
		this._closeAndDeleteHttp2Session(hostName);
		log('debug', `http2 connection to ${hostName} closed`);
		});
		};

		Crawler.prototype._closeAndDeleteHttp2Session = function _closeAndDeleteHttp2Session(targetHost) {
		if (this.http2Connections[targetHost]) {
		this.http2Connections[targetHost].close();
		delete this.http2Connections[targetHost];
		}
		};

		Crawler.prototype._buildHttpRequest = function _buildHTTPRequest(options) {
		var self = this;

		log('debug',options.method+' '+options.uri);
		if(options.proxy)
		log('debug','Use proxy: %s', options.proxy);
		log('debug', options.method + ' ' + options.uri);
		if (options.proxy)
		log('debug', 'Use proxy: %s', options.proxy);

		@@ -323,16 +393,16 @@ // Cloning keeps the opts parameter clean:

		var ropts = _.assign({},options);
		var ropts = _.assign({}, options);

		if (!ropts.headers) { ropts.headers={}; }
		if (ropts.forceUTF8) {ropts.encoding=null;}
		if (!ropts.headers) { ropts.headers = {}; }
		if (ropts.forceUTF8) { ropts.encoding = null; }
		// specifying json in request will have request sets body to JSON representation of value and
		// adds Content-type: application/json header. Additionally, parses the response body as JSON
		// so the response will be JSON object, no need to deal with encoding
		if (ropts.json) {options.encoding=null;}
		if (ropts.json) { options.encoding = null; }
		if (ropts.userAgent) {
		if(self.options.rotateUA && _.isArray(ropts.userAgent)){
		if (self.options.rotateUA && _.isArray(ropts.userAgent)) {
		ropts.headers['User-Agent'] = ropts.userAgent[0];
		// If "rotateUA" is true, rotate User-Agent
		options.userAgent.push(options.userAgent.shift());
		}else{
		} else {
		ropts.headers['User-Agent'] = ropts.userAgent;
		@@ -350,11 +420,11 @@ }

		var doRequest = function(err) {
		if(err) {
		err.message = 'Error in preRequest' + (err.message ? ', '+err.message : err.message);
		switch(err.op) {
		case 'retry': log('debug', err.message + ', retry ' + options.uri);self._onContent(err,options);break;
		case 'fail': log('debug', err.message + ', fail ' + options.uri);options.callback(err,{options:options},options.release);break;
		case 'abort': log('debug', err.message + ', abort ' + options.uri);options.release();break;
		case 'queue': log('debug', err.message + ', queue ' + options.uri);self.queue(options);options.release();break;
		default: log('debug', err.message + ', retry ' + options.uri);self._onContent(err,options);break;
		var doRequest = function (err) {
		if (err) {
		err.message = 'Error in preRequest' + (err.message ? ', ' + err.message : err.message);
		switch (err.op) {
		case 'retry': log('debug', err.message + ', retry ' + options.uri); self._onContent(err, options); break;
		case 'fail': log('debug', err.message + ', fail ' + options.uri); options.callback(err, { options: options }, options.release); break;
		case 'abort': log('debug', err.message + ', abort ' + options.uri); options.release(); break;
		case 'queue': log('debug', err.message + ', queue ' + options.uri); self.queue(options); options.release(); break;
		default: log('debug', err.message + ', retry ' + options.uri); self._onContent(err, options); break;
		}
		@@ -364,15 +434,28 @@ return;

		if(ropts.skipEventRequest !== true) {
		self.emit('request',ropts);
		}
		//do http2.* request
		if (ropts.http2) {
		if (!http2) {
		process.nextTick(() => {
		const notSupportedHttp2Error = new Error('you are trying to use http2 API which may not be supported for your current environment or node version');
		notSupportedHttp2Error.code = 'NOHTTP2SUPPORT';
		self._onContent(notSupportedHttp2Error, options);
		});
		return;
		}
		self._http2request(ropts, options);
		} else {
		if (ropts.skipEventRequest !== true) {
		self.emit('request', ropts);
		}

		var requestArgs = ['uri','url','qs','method','headers','body','form','formData','json','multipart','followRedirect','followAllRedirects','maxRedirects','removeRefererHeader','encoding','pool','timeout','proxy','auth','oauth','strictSSL','jar','aws','gzip','time','tunnel','proxyHeaderWhiteList','proxyHeaderExclusiveList','localAddress','forever', 'agent', 'strictSSL', 'agentOptions', 'agentClass'];
		var requestArgs = ['uri', 'url', 'qs', 'method', 'headers', 'body', 'form', 'formData', 'json', 'multipart', 'followRedirect', 'followAllRedirects', 'maxRedirects', 'removeRefererHeader', 'encoding', 'pool', 'timeout', 'proxy', 'auth', 'oauth', 'strictSSL', 'jar', 'aws', 'gzip', 'time', 'tunnel', 'proxyHeaderWhiteList', 'proxyHeaderExclusiveList', 'localAddress', 'forever', 'agent', 'strictSSL', 'agentOptions', 'agentClass'];

		request(_.pick.apply(self,[ropts].concat(requestArgs)), function(error,response) {
		if (error) {
		return self._onContent(error, options);
		}
		request(_.pick.apply(self, [ropts].concat(requestArgs)), function (error, response) {
		if (error) {
		return self._onContent(error, options);
		}

		self._onContent(error,options,response);
		});
		self._onContent(error, options, response);
		});
		}
		};
		@@ -387,30 +470,130 @@

		Crawler.prototype._onContent = function _onContent (error, options, response) {
		Crawler.prototype._buildHttp2Session = function _buildHttp2Session(targetHost) {
		const self = this;

		const newHttp2Connection = self.http2Connections[targetHost] = http2.connect(targetHost);

		log('debug', `connect to a new ${targetHost}`);

		newHttp2Connection.on('error', (err) => {
		log('warn', `Http2 stession error ${targetHost}, got error ${err}`);
		}).on('goaway', () => {
		log('debug', `Http2 session${targetHost} connection goaway`);
		}).on('connect', () => {
		log('debug', `Http2 session${targetHost} connection init`);
		}).once('close', () => {
		log('debug', `Http2 session ${targetHost} connection closed`);
		});
		};

		Crawler.prototype._http2request = function _http2request(ropts, options) {
		const self = this;

		const targetHost = new URL(ropts.uri).origin;
		ropts.headers = Object.assign(ropts.headers, self.generateHttp2RequestLine(ropts));
		const requestBody = ropts.headers[':method'] === 'GET' ? null : self.generateHttp2RequestBody(ropts);
		const response = {
		headers: {}
		};
		const chunks = [];
		let http2Error = null;

		if (!self.http2Connections[targetHost] \|\| self.http2Connections[targetHost].destroyed) {
		self._buildHttp2Session(targetHost);
		}

		let req = null;
		try {
		req = self.http2Connections[targetHost].request(ropts.headers);
		} catch (e) {
		//to handle the goaway issue， goaway will make the session can not be established
		//but it can not be detected at the moment that stream init
		//try catch seems the way to sovle it
		self._onContent(e, options, response);
		return;
		}

		req.on('response', headers => {
		//Where build the response obj
		response.statusCode = headers[':status'];
		response.request = {
		uri: `${req.sentHeaders[':scheme']}://${req.sentHeaders[':authority']}${req.sentHeaders[':path']}`,
		method: req.sentHeaders[':method'],
		headers: Object.assign({}, req.sentHeaders, req.sentInfoHeaders)
		};
		for (const name in headers) {
		response.headers[name] = headers[name];
		}
		});

		req.on('error', (err) => {
		log('debug', `Http2 stream error${ropts.uri}, got error ${err}`);
		http2Error = err;
		});

		req.on('data', chunk => {
		chunks.push(chunk);
		});

		req.setTimeout(self.options.timeout);

		req.on('timeout', () => {
		const error = new Error('ESOCKETTIMEDOUT');
		error.code = 'ESOCKETTIMEDOUT';
		http2Error = error;
		req.close();
		});

		req.once('close', () => {
		if (http2Error) self._onContent(http2Error, options, response);
		else {
		response.body = Buffer.concat(chunks);
		self._onContent(null, options, response);
		}
		});

		req.on('end', () => {
		log('debug', `${ropts.uri} stream ends`);
		});

		//set request body
		req.end(requestBody);
		};

		Crawler.prototype._onContent = function _onContent(error, options, response) {
		var self = this;

		if (error) {
		log('error','Error '+error+' when fetching '+ (options.uri\|\|options.url)+(options.retries ? ' ('+options.retries+' retries left)' : ''));

		if (options.retries) {
		setTimeout(function() {
		options.retries--;
		self._schedule(options);
		options.release();
		},options.retryTimeout);
		} else{
		options.callback(error,{options:options},options.release);
		switch (error.code) {
		case 'NOHTTP2SUPPORT':
		//if the enviroment is not support http2 api, all request rely on http2 protocol
		//are aborted immediately no matter how many retry times left
		log('error', 'Error ' + error + ' when fetching ' + (options.uri \|\| options.url) + ' skip all retry times');
		break;
		default:
		log('error', 'Error ' + error + ' when fetching ' + (options.uri \|\| options.url) + (options.retries ? ' (' + options.retries + ' retries left)' : ''));
		if (options.retries) {
		setTimeout(function () {
		options.retries--;
		self._schedule(options);
		options.release();
		}, options.retryTimeout);
		return;
		}
		break;
		}

		options.callback(error, { options: options }, options.release);

		return;
		}

		if (!response.body) { response.body=''; }
		if (!response.body) { response.body = ''; }

		log('debug','Got '+(options.uri\|\|'html')+' ('+response.body.length+' bytes)...');
		log('debug', 'Got ' + (options.uri \|\| 'html') + ' (' + response.body.length + ' bytes)...');

		try{
		self._doEncoding(options,response);
		}catch(e){
		log('error',e);
		return options.callback(e,{options:options},options.release);
		try {
		self._doEncoding(options, response);
		} catch (e) {
		return options.callback(e, { options: options }, options.release);
		}
		@@ -420,13 +603,13 @@

		if(options.method === 'HEAD' \|\| !options.jQuery){
		return options.callback(null,response,options.release);
		if (options.method === 'HEAD' \|\| !options.jQuery) {
		return options.callback(null, response, options.release);
		}

		var injectableTypes = ['html','xhtml','text/xml', 'application/xml', '+xml'];
		if (!options.html && !typeis(contentType(response), injectableTypes)){
		log('warn','response body is not HTML, skip injecting. Set jQuery to false to suppress this message');
		return options.callback(null,response,options.release);
		var injectableTypes = ['html', 'xhtml', 'text/xml', 'application/xml', '+xml'];
		if (!options.html && !typeis(contentType(response), injectableTypes)) {
		log('warn', 'response body is not HTML, skip injecting. Set jQuery to false to suppress this message');
		return options.callback(null, response, options.release);
		}

		log('debug','Injecting');
		log('debug', 'Injecting');

		@@ -436,4 +619,4 @@ self._inject(response, options, self._injected.bind(self));

		Crawler.prototype._injected = function(errors, response, options, $){
		log('debug','Injected');
		Crawler.prototype._injected = function (errors, response, options, $) {
		log('debug', 'Injected');

		@@ -444,6 +627,6 @@ response.$ = $;

		Crawler.prototype._doEncoding = function(options,response){
		Crawler.prototype._doEncoding = function (options, response) {
		var self = this;

		if(options.encoding === null){
		if (options.encoding === null) {
		return;
		@@ -455,3 +638,3 @@ }
		response.charset = charset;
		log('debug','Charset ' + charset);
		log('debug', 'Charset ' + charset);

		@@ -466,6 +649,6 @@ if (charset !== 'utf-8' && charset !== 'ascii') {// convert response.body into 'utf-8' encoded buffer

		Crawler.prototype._parseCharset = function(res){
		Crawler.prototype._parseCharset = function (res) {
		//Browsers treat gb2312 as gbk, but iconv-lite not.
		//Replace gb2312 with gbk, in order to parse the pages which say gb2312 but actually are gbk.
		function getCharset(str){
		function getCharset(str) {
		var charset = (str && str.match(/charset=['"]?([\w.-]+)/i) \|\| [0, null])[1];
		@@ -479,7 +662,7 @@ return charset && charset.replace(/:\d{4}$\|[^0-9a-z]/g, '') == 'gb2312' ? 'gbk' : charset;
		var charset = charsetParser(contentType(res));
		if(charset)
		if (charset)
		return charset;

		if(!typeis(contentType(res), ['html'])){
		log('debug','Charset not detected in response headers, please specify using `incomingEncoding`, use `utf-8` by default');
		if (!typeis(contentType(res), ['html'])) {
		log('debug', 'Charset not detected in response headers, please specify using `incomingEncoding`, use `utf-8` by default');
		return 'utf-8';
		@@ -489,3 +672,3 @@ }
		var body = res.body instanceof Buffer ? res.body.toString() : res.body;
		charset = charsetParser(contentType(res),body,'utf-8');
		charset = charsetParser(contentType(res), body, 'utf-8');

		@@ -495,4 +678,4 @@ return charset;

		Object.defineProperty(Crawler.prototype,'queueSize',{
		get:function(){
		Object.defineProperty(Crawler.prototype, 'queueSize', {
		get: function () {
		return this.limiters.unfinishedClients;
		@@ -499,0 +682,0 @@ }

package.json

		{
		"name": "crawler",
		"version": "1.2.2",
		"version": "1.3.0",
		"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously",
		@@ -12,2 +12,3 @@ "main": "./lib/crawler.js",
		"test": "mocha --timeout=15000 tests/*.test.js",
		"http2test": "mocha --timeout=15000 tests/http2*.test.js",
		"cover": "nyc --reporter=lcovonly --reporter=text --reporter=text-summary mocha --timeout=15000 --reporter spec tests/*.test.js"
		@@ -20,3 +21,3 @@ },
		"engine-strict": {
		"node": ">=4.0.0"
		"node": ">=10.0.0"
		},
		@@ -38,4 +39,4 @@ "dependencies": {
		"mocha": "^6.1.0",
		"nock": "^13.0.5",
		"mocha-testdata": "^1.2.0",
		"nock": "^10.0.6",
		"nyc": "^13.1.0",
		@@ -42,0 +43,0 @@ "sinon": "^7.0.0",

README.md

		@@ -249,2 +249,25 @@

		## Work with Http2

		Node-crawler now supports http request. Proxy functionality for http2 request does not be included now. It will be added in the future.

		```js
		crawler.queue({
		//unit test work with httpbin http2 server. It could be used for test
		uri: 'https://nghttp2.org/httpbin/status/200',
		method: 'GET',
		http2: true, //set http2 to be true will make a http2 request
		callback: (error, response, done) => {
		if(error) {
		console.error(error);
		return done();
		}

		console.log(`inside callback`);
		console.log(response.body);
		return done();
		}
		})
		```

		## Work with bottleneck
		@@ -425,2 +448,6 @@

		### Http2

		* `options.http2`: [Boolean](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Boolean_type) If true, request will be sent in http2 protocol (Default false)

		### Https socks5
		@@ -427,0 +454,0 @@ ```js

vendor/jquery-1.11.1.min.js

.travis.yml

Sorry, the diff of this file is not supported yet

CNAME

Sorry, the diff of this file is not supported yet

crawler - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics