mip-page-crawler
Advanced tools
+45
-24
@@ -7,10 +7,10 @@ var request = require('request'); | ||
| function Crawler(options) { | ||
| function Crawler(options) { | ||
| var defaultOptions = { | ||
| method: 'GET', | ||
| userAgent: DEFAULT_USERAGENT | ||
| } | ||
| var defaultOptions = { | ||
| method: 'GET', | ||
| userAgent: DEFAULT_USERAGENT | ||
| } | ||
| this.options = _.extend(defaultOptions, options); | ||
| this.options = _.extend(defaultOptions, options); | ||
@@ -20,21 +20,42 @@ } | ||
| Crawler.prototype.doCrawl = function(url) { | ||
| var ropt = { | ||
| url: url, | ||
| headers: { | ||
| 'User-Agent': this.options.userAgent | ||
| } | ||
| } | ||
| return new Promise(function(resolve, reject) { | ||
| request(ropt, function(error, response, body) { | ||
| if(!error && (response.statusCode == 200)) { | ||
| resolve(body); | ||
| } else { | ||
| reject(error); | ||
| } | ||
| }); | ||
| }); | ||
| var ropt = { | ||
| url: url, | ||
| headers: { | ||
| 'User-Agent': this.options.userAgent | ||
| }, | ||
| timeout: 5000 | ||
| } | ||
| return new Promise(function(resolve, reject) { | ||
| try { | ||
| request(ropt, function(err, res, body) { | ||
| if (!err && res.statusCode == 200) { | ||
| resolve(body); | ||
| } else { | ||
| if(!err){ | ||
| if(res.statusCode == 404) { | ||
| reject({message: '抓取网页不存在', code: 2001}) | ||
| } else if(res.statusCode == 403) { | ||
| reject({message: '抓取网页访问禁止', code: 2002}) | ||
| } else if(res.statusCode == 503) { | ||
| reject({message: '抓取网页服务器错误', code: 2003}) | ||
| } else { | ||
| reject({message: '抓取网页 HTTP 错误[' + res.statusCode + ']', code: 2000}) | ||
| } | ||
| } else { | ||
| if(err.code === 'ENOTFOUND') { | ||
| reject({message: '提交的地址不存在', code: 1001}) | ||
| } else if(err.code === 'ETIMEDOUT' || err.code === 'ESOCKETTIMEDOUT') { | ||
| reject({message: '抓取连接超时', code: 1002}) | ||
| } else { | ||
| reject({message: '其他连接错误', code: 1000}) | ||
| } | ||
| } | ||
| } | ||
| }); | ||
| } catch(e) { | ||
| reject({message: '其他抓取错误', code: 3000}) | ||
| } | ||
| }); | ||
| } | ||
| module.exports = Crawler; | ||
| module.exports = Crawler; |
+1
-1
| { | ||
| "name": "mip-page-crawler", | ||
| "version": "1.0.0", | ||
| "version": "1.0.1", | ||
| "description": "crawler mip page", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
2699
82%52
73.33%