Socket
Socket
Sign inDemoInstall

crawler.plugins.downloader

Package Overview
Dependencies
311
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.1.13 to 0.1.14

config.js

1

out/constants.d.ts
export declare const Types: {
engine: string;
};
export declare const pluginName = "crawler.plugin.downloader";

@@ -6,2 +6,3 @@ "use strict";

};
exports.pluginName = "crawler.plugin.downloader";
//# sourceMappingURL=constants.js.map

@@ -97,2 +97,3 @@ "use strict";

body: data,
charset: "auto",
// json: true,

@@ -99,0 +100,0 @@ headers: headers,

24

out/engines/superagent.js

@@ -60,2 +60,3 @@ "use strict";

var inversify_1 = require("inversify");
require('superagent-charset')(request);
var SuperAgentEngine = (function (_super) {

@@ -78,13 +79,13 @@ __extends(SuperAgentEngine, _super);

this.use(function (ctx, next) { return __awaiter(_this, void 0, void 0, function () {
var path, _a, method, _b, _c, data, _d, settings, _e, params, _f, _g, timeout, _h, header, curReq, _j, e_1;
return __generator(this, function (_k) {
switch (_k.label) {
var path, _a, method, _b, _c, data, _d, settings, _e, params, _f, _g, timeout, _h, header, _j, charset, curReq, _k, e_1;
return __generator(this, function (_l) {
switch (_l.label) {
case 0:
path = this.getFullPath(ctx.instance || {}, ctx.executeInfo || {});
_a = (ctx.instance || {}).method, method = _a === void 0 ? "" : _a;
_b = ctx.executeInfo || {}, _c = _b.data, data = _c === void 0 ? null : _c, _d = _b.settings, settings = _d === void 0 ? {} : _d, _e = _b.params, params = _e === void 0 ? {} : _e;
_f = settings || {}, _g = _f.timeout, timeout = _g === void 0 ? 5000 : _g, _h = _f.header, header = _h === void 0 ? {} : _h;
_k.label = 1;
_b = ctx.executeInfo || {}, _c = _b.data, data = _c === void 0 ? null : _c, _d = _b.settings, settings = _d === void 0 ? {} : _d, _e = _b.params, params = _e === void 0 ? null : _e;
_f = settings || {}, _g = _f.timeout, timeout = _g === void 0 ? 5000 : _g, _h = _f.header, header = _h === void 0 ? {} : _h, _j = _f.charset, charset = _j === void 0 ? "utf-8" : _j;
_l.label = 1;
case 1:
_k.trys.push([1, 3, , 4]);
_l.trys.push([1, 3, , 4]);
curReq = request(method.toString(), path);

@@ -98,10 +99,11 @@ params && curReq.query(params);

});
_j = ctx;
charset && curReq.charset(charset);
_k = ctx;
return [4 /*yield*/, curReq];
case 2:
_j.result = _k.sent();
_k.result = _l.sent();
ctx.result.body = ctx.result.text;
return [3 /*break*/, 4];
case 3:
e_1 = _k.sent();
e_1 = _l.sent();
ctx.err = e_1;

@@ -112,3 +114,3 @@ ctx.isError = true;

case 5:
_k.sent();
_l.sent();
return [2 /*return*/];

@@ -115,0 +117,0 @@ }

@@ -48,2 +48,17 @@ "use strict";

return __generator(this, function (_a) {
seneca.seneca.act("role:crawler.plugin.downloader,cmd:html", {
"queueItem": {
"protocol": "https",
"host": "item.jd.com",
"query": "",
"port": 80,
"path": "/10468590470.html",
"depth": 2,
"url": "https://item.jd.com/10468590470.html",
"_id": "14ca64908864d9a0fbc173eed901b289"
},
"engine": "superagent",
"charset": "gbk",
"header": {}
}, console.log);
console.log("crawler.plugins.downloader plugin ready!");

@@ -50,0 +65,0 @@ return [2 /*return*/];

@@ -7,3 +7,6 @@ export declare class DownloadPlugin {

*/
html({queueItem, proxyInfo, engine}: {
html({queueItem, proxyInfo, save, header, charset, engine}: {
charset: string;
save: boolean;
header: any;
queueItem: any;

@@ -14,5 +17,6 @@ proxyInfo: any;

statusCode: number | undefined;
responseBody: any;
crawlerCount: number;
}>;
inter({url, path, params, data, header, method, engine}: any): any;
inter({url, path, params, data, header, method, engine, _id}: any): any;
}

@@ -58,3 +58,3 @@ "use strict";

var proxy_1 = require("../proxy");
// import * as bluebird from 'bluebird';
var constants_1 = require("../constants");
var DownloadPlugin = (function () {

@@ -68,3 +68,3 @@ function DownloadPlugin() {

DownloadPlugin.prototype.html = function (_a, options) {
var queueItem = _a.queueItem, proxyInfo = _a.proxyInfo, _b = _a.engine, engine = _b === void 0 ? "superagent" : _b;
var queueItem = _a.queueItem, proxyInfo = _a.proxyInfo, _b = _a.save, save = _b === void 0 ? true : _b, _c = _a.header, header = _c === void 0 ? {} : _c, charset = _a.charset, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d;
return __awaiter(this, void 0, void 0, function () {

@@ -87,3 +87,3 @@ var res, expireSeneca, download;

"interfaces": [{
"path": "/",
"path": "",
"method": "get",

@@ -94,6 +94,12 @@ "key": "download",

});
return [4 /*yield*/, this.proxy.proxy.execute("/download/download", proxyInfo || {})];
return [4 /*yield*/, this.proxy.proxy.execute("/download/download", {
settings: {
header: header,
charset: charset
}
})];
case 1:
res = _a.sent();
expireSeneca = options.seneca.delegate({ expire$: 15 });
if (!save) return [3 /*break*/, 3];
expireSeneca = options.seneca.delegate({ expire$: 60 });
download = expireSeneca.make$('downloads', __assign({ id: queueItem._id, data: res.statusCode }, queueItem, { responseBody: res.body }));

@@ -103,9 +109,8 @@ return [4 /*yield*/, download.saveAsync()];

_a.sent();
// download = expireSeneca.make$('downloads');
// console.log(await download.loadAsync({ id: queueItem._id }));
return [2 /*return*/, {
statusCode: res.statusCode,
// responseBody: res.body,
crawlerCount: ~~queueItem.crawlerCount + 1
}];
_a.label = 3;
case 3: return [2 /*return*/, {
statusCode: res.statusCode,
responseBody: save ? null : res.body,
crawlerCount: ~~queueItem.crawlerCount + 1
}];
}

@@ -116,3 +121,3 @@ });

DownloadPlugin.prototype.inter = function (_a) {
var url = _a.url, _b = _a.path, path = _b === void 0 ? "" : _b, params = _a.params, data = _a.data, header = _a.header, _c = _a.method, method = _c === void 0 ? "get" : _c, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d;
var url = _a.url, _b = _a.path, path = _b === void 0 ? "" : _b, params = _a.params, data = _a.data, header = _a.header, _c = _a.method, method = _c === void 0 ? "get" : _c, _d = _a.engine, engine = _d === void 0 ? "superagent" : _d, _e = _a._id, _id = _e === void 0 ? "" : _e;
/**

@@ -157,3 +162,3 @@ * 添加接口信息

__decorate([
crawler_plugins_common_1.Add("role:crawler.plugin.downloader,cmd:html"),
crawler_plugins_common_1.Add("role:" + constants_1.pluginName + ",cmd:html"),
__metadata("design:type", Function),

@@ -164,3 +169,3 @@ __metadata("design:paramtypes", [Object, Object]),

__decorate([
crawler_plugins_common_1.Add("role:crawler.plugin.downloader,cmd:interface"),
crawler_plugins_common_1.Add("role:" + constants_1.pluginName + ",cmd:interfaces"),
__metadata("design:type", Function),

@@ -171,3 +176,3 @@ __metadata("design:paramtypes", [Object]),

DownloadPlugin = __decorate([
crawler_plugins_common_1.Plugin("crawler.plugin.downloader"),
crawler_plugins_common_1.Plugin(constants_1.pluginName),
inversify_1.injectable()

@@ -174,0 +179,0 @@ ], DownloadPlugin);

{
"name": "crawler.plugins.downloader",
"version": "0.1.13",
"version": "0.1.14",
"description": "爬虫下载插件",

@@ -9,3 +9,3 @@ "main": "out/index.js",

"start": "node out/index.js",
"dock": "docker run -d --network=host --name crawler.plugins.downloader -e CONFIG_PATH=/app/config.json crawler.plugins.downloader",
"dock": "docker run -it --rm --network=bridge --name crawler.plugins.downloader -e HOST=192.168.31.153 -e CONFIG_PATH=/app/config.js crawler.plugins.downloader",
"dockfile": "docker build -t crawler.plugins.downloader .",

@@ -22,3 +22,3 @@ "curl": "curl -d '{\"role\":\"crawler.plugin.downloader\",\"cmd\":\"download\",\"id\":3}' http://0.0.0.0:9999/act"

"dependencies": {
"crawler.plugins.common": "^0.1.13",
"crawler.plugins.common": "^0.1.14",
"inversify": "^4.2.0",

@@ -39,3 +39,4 @@ "modelproxy": "^0.5.19",

"seneca-transport": "^2.1.1",
"superagent": "^3.5.2"
"superagent": "^3.5.2",
"superagent-charset": "^1.2.0"
},

@@ -42,0 +43,0 @@ "devDependencies": {

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc