mip-page-crawler
Advanced tools
Comparing version 1.0.0 to 1.0.1
@@ -7,10 +7,10 @@ var request = require('request'); | ||
function Crawler(options) { | ||
function Crawler(options) { | ||
var defaultOptions = { | ||
method: 'GET', | ||
userAgent: DEFAULT_USERAGENT | ||
} | ||
var defaultOptions = { | ||
method: 'GET', | ||
userAgent: DEFAULT_USERAGENT | ||
} | ||
this.options = _.extend(defaultOptions, options); | ||
this.options = _.extend(defaultOptions, options); | ||
@@ -20,21 +20,42 @@ } | ||
Crawler.prototype.doCrawl = function(url) { | ||
var ropt = { | ||
url: url, | ||
headers: { | ||
'User-Agent': this.options.userAgent | ||
} | ||
} | ||
return new Promise(function(resolve, reject) { | ||
request(ropt, function(error, response, body) { | ||
if(!error && (response.statusCode == 200)) { | ||
resolve(body); | ||
} else { | ||
reject(error); | ||
} | ||
}); | ||
}); | ||
var ropt = { | ||
url: url, | ||
headers: { | ||
'User-Agent': this.options.userAgent | ||
}, | ||
timeout: 5000 | ||
} | ||
return new Promise(function(resolve, reject) { | ||
try { | ||
request(ropt, function(err, res, body) { | ||
if (!err && res.statusCode == 200) { | ||
resolve(body); | ||
} else { | ||
if(!err){ | ||
if(res.statusCode == 404) { | ||
reject({message: '抓取网页不存在', code: 2001}) | ||
} else if(res.statusCode == 403) { | ||
reject({message: '抓取网页访问禁止', code: 2002}) | ||
} else if(res.statusCode == 503) { | ||
reject({message: '抓取网页服务器错误', code: 2003}) | ||
} else { | ||
reject({message: '抓取网页 HTTP 错误[' + res.statusCode + ']', code: 2000}) | ||
} | ||
} else { | ||
if(err.code === 'ENOTFOUND') { | ||
reject({message: '提交的地址不存在', code: 1001}) | ||
} else if(err.code === 'ETIMEDOUT' || err.code === 'ESOCKETTIMEDOUT') { | ||
reject({message: '抓取连接超时', code: 1002}) | ||
} else { | ||
reject({message: '其他连接错误', code: 1000}) | ||
} | ||
} | ||
} | ||
}); | ||
} catch(e) { | ||
reject({message: '其他抓取错误', code: 3000}) | ||
} | ||
}); | ||
} | ||
module.exports = Crawler; | ||
module.exports = Crawler; |
{ | ||
"name": "mip-page-crawler", | ||
"version": "1.0.0", | ||
"version": "1.0.1", | ||
"description": "crawler mip page", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2699
52