crawler.plugins.downloader
Advanced tools
Comparing version 0.1.13 to 0.1.14
export declare const Types: { | ||
engine: string; | ||
}; | ||
export declare const pluginName = "crawler.plugin.downloader"; |
@@ -6,2 +6,3 @@ "use strict"; | ||
}; | ||
exports.pluginName = "crawler.plugin.downloader"; | ||
//# sourceMappingURL=constants.js.map |
@@ -97,2 +97,3 @@ "use strict"; | ||
body: data, | ||
charset: "auto", | ||
// json: true, | ||
@@ -99,0 +100,0 @@ headers: headers, |
@@ -60,2 +60,3 @@ "use strict"; | ||
var inversify_1 = require("inversify"); | ||
require('superagent-charset')(request); | ||
var SuperAgentEngine = (function (_super) { | ||
@@ -78,13 +79,13 @@ __extends(SuperAgentEngine, _super); | ||
this.use(function (ctx, next) { return __awaiter(_this, void 0, void 0, function () { | ||
var path, _a, method, _b, _c, data, _d, settings, _e, params, _f, _g, timeout, _h, header, curReq, _j, e_1; | ||
return __generator(this, function (_k) { | ||
switch (_k.label) { | ||
var path, _a, method, _b, _c, data, _d, settings, _e, params, _f, _g, timeout, _h, header, _j, charset, curReq, _k, e_1; | ||
return __generator(this, function (_l) { | ||
switch (_l.label) { | ||
case 0: | ||
path = this.getFullPath(ctx.instance || {}, ctx.executeInfo || {}); | ||
_a = (ctx.instance || {}).method, method = _a === void 0 ? "" : _a; | ||
_b = ctx.executeInfo || {}, _c = _b.data, data = _c === void 0 ? null : _c, _d = _b.settings, settings = _d === void 0 ? {} : _d, _e = _b.params, params = _e === void 0 ? {} : _e; | ||
_f = settings || {}, _g = _f.timeout, timeout = _g === void 0 ? 5000 : _g, _h = _f.header, header = _h === void 0 ? {} : _h; | ||
_k.label = 1; | ||
_b = ctx.executeInfo || {}, _c = _b.data, data = _c === void 0 ? null : _c, _d = _b.settings, settings = _d === void 0 ? {} : _d, _e = _b.params, params = _e === void 0 ? null : _e; | ||
_f = settings || {}, _g = _f.timeout, timeout = _g === void 0 ? 5000 : _g, _h = _f.header, header = _h === void 0 ? {} : _h, _j = _f.charset, charset = _j === void 0 ? "utf-8" : _j; | ||
_l.label = 1; | ||
case 1: | ||
_k.trys.push([1, 3, , 4]); | ||
_l.trys.push([1, 3, , 4]); | ||
curReq = request(method.toString(), path); | ||
@@ -98,10 +99,11 @@ params && curReq.query(params); | ||
}); | ||
_j = ctx; | ||
charset && curReq.charset(charset); | ||
_k = ctx; | ||
return [4 /*yield*/, curReq]; | ||
case 2: | ||
_j.result = _k.sent(); | ||
_k.result = _l.sent(); | ||
ctx.result.body = ctx.result.text; | ||
return [3 /*break*/, 4]; | ||
case 3: | ||
e_1 = _k.sent(); | ||
e_1 = _l.sent(); | ||
ctx.err = e_1; | ||
@@ -112,3 +114,3 @@ ctx.isError = true; | ||
case 5: | ||
_k.sent(); | ||
_l.sent(); | ||
return [2 /*return*/]; | ||
@@ -115,0 +117,0 @@ } |
@@ -48,2 +48,17 @@ "use strict"; | ||
return __generator(this, function (_a) { | ||
seneca.seneca.act("role:crawler.plugin.downloader,cmd:html", { | ||
"queueItem": { | ||
"protocol": "https", | ||
"host": "item.jd.com", | ||
"query": "", | ||
"port": 80, | ||
"path": "/10468590470.html", | ||
"depth": 2, | ||
"url": "https://item.jd.com/10468590470.html", | ||
"_id": "14ca64908864d9a0fbc173eed901b289" | ||
}, | ||
"engine": "superagent", | ||
"charset": "gbk", | ||
"header": {} | ||
}, console.log); | ||
console.log("crawler.plugins.downloader plugin ready!"); | ||
@@ -50,0 +65,0 @@ return [2 /*return*/]; |
@@ -7,3 +7,6 @@ export declare class DownloadPlugin { | ||
*/ | ||
html({queueItem, proxyInfo, engine}: { | ||
html({queueItem, proxyInfo, save, header, charset, engine}: { | ||
charset: string; | ||
save: boolean; | ||
header: any; | ||
queueItem: any; | ||
@@ -14,5 +17,6 @@ proxyInfo: any; | ||
statusCode: number | undefined; | ||
responseBody: any; | ||
crawlerCount: number; | ||
}>; | ||
inter({url, path, params, data, header, method, engine}: any): any; | ||
inter({url, path, params, data, header, method, engine, _id}: any): any; | ||
} |
@@ -58,3 +58,3 @@ "use strict"; | ||
var proxy_1 = require("../proxy"); | ||
// import * as bluebird from 'bluebird'; | ||
var constants_1 = require("../constants"); | ||
var DownloadPlugin = (function () { | ||
@@ -68,3 +68,3 @@ function DownloadPlugin() { | ||
DownloadPlugin.prototype.html = function (_a, options) { | ||
var queueItem = _a.queueItem, proxyInfo = _a.proxyInfo, _b = _a.engine, engine = _b === void 0 ? "superagent" : _b; | ||
var queueItem = _a.queueItem, proxyInfo = _a.proxyInfo, _b = _a.save, save = _b === void 0 ? true : _b, _c = _a.header, header = _c === void 0 ? {} : _c, charset = _a.charset, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d; | ||
return __awaiter(this, void 0, void 0, function () { | ||
@@ -87,3 +87,3 @@ var res, expireSeneca, download; | ||
"interfaces": [{ | ||
"path": "/", | ||
"path": "", | ||
"method": "get", | ||
@@ -94,6 +94,12 @@ "key": "download", | ||
}); | ||
return [4 /*yield*/, this.proxy.proxy.execute("/download/download", proxyInfo || {})]; | ||
return [4 /*yield*/, this.proxy.proxy.execute("/download/download", { | ||
settings: { | ||
header: header, | ||
charset: charset | ||
} | ||
})]; | ||
case 1: | ||
res = _a.sent(); | ||
expireSeneca = options.seneca.delegate({ expire$: 15 }); | ||
if (!save) return [3 /*break*/, 3]; | ||
expireSeneca = options.seneca.delegate({ expire$: 60 }); | ||
download = expireSeneca.make$('downloads', __assign({ id: queueItem._id, data: res.statusCode }, queueItem, { responseBody: res.body })); | ||
@@ -103,9 +109,8 @@ return [4 /*yield*/, download.saveAsync()]; | ||
_a.sent(); | ||
// download = expireSeneca.make$('downloads'); | ||
// console.log(await download.loadAsync({ id: queueItem._id })); | ||
return [2 /*return*/, { | ||
statusCode: res.statusCode, | ||
// responseBody: res.body, | ||
crawlerCount: ~~queueItem.crawlerCount + 1 | ||
}]; | ||
_a.label = 3; | ||
case 3: return [2 /*return*/, { | ||
statusCode: res.statusCode, | ||
responseBody: save ? null : res.body, | ||
crawlerCount: ~~queueItem.crawlerCount + 1 | ||
}]; | ||
} | ||
@@ -116,3 +121,3 @@ }); | ||
DownloadPlugin.prototype.inter = function (_a) { | ||
var url = _a.url, _b = _a.path, path = _b === void 0 ? "" : _b, params = _a.params, data = _a.data, header = _a.header, _c = _a.method, method = _c === void 0 ? "get" : _c, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d; | ||
var url = _a.url, _b = _a.path, path = _b === void 0 ? "" : _b, params = _a.params, data = _a.data, header = _a.header, _c = _a.method, method = _c === void 0 ? "get" : _c, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d, _e = _a._id, _id = _e === void 0 ? "" : _e; | ||
/** | ||
@@ -157,3 +162,3 @@ * 添加接口信息 | ||
__decorate([ | ||
crawler_plugins_common_1.Add("role:crawler.plugin.downloader,cmd:html"), | ||
crawler_plugins_common_1.Add("role:" + constants_1.pluginName + ",cmd:html"), | ||
__metadata("design:type", Function), | ||
@@ -164,3 +169,3 @@ __metadata("design:paramtypes", [Object, Object]), | ||
__decorate([ | ||
crawler_plugins_common_1.Add("role:crawler.plugin.downloader,cmd:interface"), | ||
crawler_plugins_common_1.Add("role:" + constants_1.pluginName + ",cmd:interfaces"), | ||
__metadata("design:type", Function), | ||
@@ -171,3 +176,3 @@ __metadata("design:paramtypes", [Object]), | ||
DownloadPlugin = __decorate([ | ||
crawler_plugins_common_1.Plugin("crawler.plugin.downloader"), | ||
crawler_plugins_common_1.Plugin(constants_1.pluginName), | ||
inversify_1.injectable() | ||
@@ -174,0 +179,0 @@ ], DownloadPlugin); |
{ | ||
"name": "crawler.plugins.downloader", | ||
"version": "0.1.13", | ||
"version": "0.1.14", | ||
"description": "爬虫下载插件", | ||
@@ -9,3 +9,3 @@ "main": "out/index.js", | ||
"start": "node out/index.js", | ||
"dock": "docker run -d --network=host --name crawler.plugins.downloader -e CONFIG_PATH=/app/config.json crawler.plugins.downloader", | ||
"dock": "docker run -it --rm --network=bridge --name crawler.plugins.downloader -e HOST=192.168.31.153 -e CONFIG_PATH=/app/config.js crawler.plugins.downloader", | ||
"dockfile": "docker build -t crawler.plugins.downloader .", | ||
@@ -22,3 +22,3 @@ "curl": "curl -d '{\"role\":\"crawler.plugin.downloader\",\"cmd\":\"download\",\"id\":3}' http://0.0.0.0:9999/act" | ||
"dependencies": { | ||
"crawler.plugins.common": "^0.1.13", | ||
"crawler.plugins.common": "^0.1.14", | ||
"inversify": "^4.2.0", | ||
@@ -39,3 +39,4 @@ "modelproxy": "^0.5.19", | ||
"seneca-transport": "^2.1.1", | ||
"superagent": "^3.5.2" | ||
"superagent": "^3.5.2", | ||
"superagent-charset": "^1.2.0" | ||
}, | ||
@@ -42,0 +43,0 @@ "devDependencies": { |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
68314
35
1023
18
+ Addedsuperagent-charset@^1.2.0
+ Addediconv-lite@0.4.24(transitive)
+ Addedsuperagent-charset@1.2.0(transitive)