crawler.plugins.downloader
Advanced tools
Comparing version 0.1.16 to 0.1.17
@@ -10,6 +10,5 @@ module.exports = { | ||
"redis-store-expires": { | ||
"uri": "redis://123.59.44.152:6379", | ||
"host": "123.59.44.152", | ||
"host": "47.92.126.120", | ||
"port": 6379, | ||
"options": {}, | ||
"auth": "crawler", | ||
"expire": 20, | ||
@@ -23,3 +22,3 @@ "entityspec": { | ||
"consul-registry": { | ||
"host": "123.59.44.152" | ||
"host": "47.92.126.120" | ||
}, | ||
@@ -29,3 +28,2 @@ "mesh": { | ||
"auto": true, | ||
// "stop": false, | ||
"host": process.env.HOST, | ||
@@ -38,4 +36,3 @@ "discover": { | ||
"listen": [{ | ||
"pin": "role:crawler.plugin.downloader,cmd:*", | ||
"port": 9001 | ||
"pin": "role:crawler.plugin.downloader,cmd:*" | ||
}] | ||
@@ -42,0 +39,0 @@ } |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Types = { | ||
engine: 'modelproxy-engine' | ||
engine: "modelproxy-engine" | ||
}; | ||
exports.pluginName = "crawler.plugin.downloader"; | ||
//# sourceMappingURL=constants.js.map |
@@ -1,3 +0,3 @@ | ||
import * as inversify from 'inversify'; | ||
import * as inversify from "inversify"; | ||
export declare const container: inversify.interfaces.Container; | ||
export declare const provider: any; |
@@ -1,2 +0,2 @@ | ||
import { modelProxy, IInterfaceModel, IExecute } from 'modelproxy'; | ||
import { modelProxy, IInterfaceModel, IExecute } from "modelproxy"; | ||
export declare class RequestEngine extends modelProxy.BaseEngine { | ||
@@ -3,0 +3,0 @@ engineName: string; |
@@ -1,2 +0,2 @@ | ||
import { modelProxy, IInterfaceModel, IExecute } from 'modelproxy'; | ||
import { modelProxy, IInterfaceModel, IExecute } from "modelproxy"; | ||
export declare class SuperAgentEngine extends modelProxy.BaseEngine { | ||
@@ -3,0 +3,0 @@ engineName: string; |
@@ -60,3 +60,3 @@ "use strict"; | ||
var inversify_1 = require("inversify"); | ||
require('superagent-charset')(request); | ||
require("superagent-charset")(request); | ||
var SuperAgentEngine = (function (_super) { | ||
@@ -63,0 +63,0 @@ __extends(SuperAgentEngine, _super); |
@@ -1,1 +0,1 @@ | ||
import 'reflect-metadata'; | ||
import "reflect-metadata"; |
@@ -48,18 +48,3 @@ "use strict"; | ||
return __generator(this, function (_a) { | ||
seneca.seneca.act("role:crawler.plugin.downloader,cmd:html", { | ||
"queueItem": { | ||
"protocol": "https", | ||
"host": "item.jd.com", | ||
"query": "", | ||
"port": 80, | ||
"path": "/10468590470.html", | ||
"depth": 2, | ||
"url": "https://item.jd.com/10468590470.html", | ||
"_id": "14ca64908864d9a0fbc173eed901b289" | ||
}, | ||
"engine": "superagent", | ||
"charset": "gbk", | ||
"header": {} | ||
}, console.log); | ||
console.log("crawler.plugins.downloader plugin ready!"); | ||
console.log("crawler.plugins.downloader ready!"); | ||
return [2 /*return*/]; | ||
@@ -66,0 +51,0 @@ }); |
@@ -15,7 +15,7 @@ export declare class DownloadPlugin { | ||
}, options: any): Promise<{ | ||
crawlerCount: number; | ||
responseBody: any; | ||
statusCode: number | undefined; | ||
responseBody: any; | ||
crawlerCount: number; | ||
}>; | ||
inter({url, path, params, data, header, method, engine, _id}: any): any; | ||
} |
@@ -69,6 +69,7 @@ "use strict"; | ||
return __awaiter(this, void 0, void 0, function () { | ||
var res, expireSeneca, download; | ||
var start, res, expireSeneca, download; | ||
return __generator(this, function (_a) { | ||
switch (_a.label) { | ||
case 0: | ||
start = Date.now(); | ||
/** | ||
@@ -78,15 +79,15 @@ * 添加接口信息 | ||
this.proxy.proxy.loadConfig({ | ||
"engine": engine, | ||
"interfaces": [{ | ||
"key": "download", | ||
"method": "get", | ||
"path": "", | ||
"title": "" | ||
}], | ||
"key": "download", | ||
"title": "download下载接口", | ||
"state": "html", | ||
"engine": engine, | ||
"states": { | ||
"html": queueItem.url | ||
}, | ||
"interfaces": [{ | ||
"path": "", | ||
"method": "get", | ||
"key": "download", | ||
"title": "" | ||
}] | ||
"title": "download下载接口", | ||
}); | ||
@@ -103,3 +104,3 @@ return [4 /*yield*/, this.proxy.proxy.execute("/download/download", { | ||
expireSeneca = options.seneca.delegate({ expire$: 60 }); | ||
download = expireSeneca.make$('downloads', __assign({ id: queueItem._id, data: res.statusCode }, queueItem, { responseBody: res.body })); | ||
download = expireSeneca.make$("downloads", __assign({ data: res.statusCode, id: queueItem._id }, queueItem, { responseBody: res.body })); | ||
return [4 /*yield*/, download.saveAsync()]; | ||
@@ -109,7 +110,9 @@ case 2: | ||
_a.label = 3; | ||
case 3: return [2 /*return*/, { | ||
statusCode: res.statusCode, | ||
responseBody: save ? null : res.body, | ||
crawlerCount: ~~queueItem.crawlerCount + 1 | ||
}]; | ||
case 3: | ||
console.log(queueItem.url, "-----downloader 成功;耗时:", Date.now() - start, "ms"); | ||
return [2 /*return*/, { | ||
crawlerCount: 1 * queueItem.crawlerCount + 1, | ||
responseBody: save ? null : res.body, | ||
statusCode: res.statusCode, | ||
}]; | ||
} | ||
@@ -125,15 +128,15 @@ }); | ||
this.proxy.proxy.loadConfig({ | ||
"engine": engine, | ||
"interfaces": [{ | ||
"key": "interface", | ||
"method": method, | ||
"path": path, | ||
"title": "" | ||
}], | ||
"key": "download", | ||
"title": "download下载接口", | ||
"state": "interface", | ||
"engine": engine, | ||
"states": { | ||
"interface": url | ||
}, | ||
"interfaces": [{ | ||
"path": path, | ||
"method": method, | ||
"key": "interface", | ||
"title": "" | ||
}] | ||
"title": "download下载接口", | ||
}); | ||
@@ -149,4 +152,4 @@ /** | ||
return { | ||
responseBody: res.body, | ||
statusCode: res.statusCode, | ||
responseBody: res.body | ||
}; | ||
@@ -153,0 +156,0 @@ }); |
@@ -1,1 +0,1 @@ | ||
import 'reflect-metadata'; | ||
import "reflect-metadata"; |
@@ -49,7 +49,2 @@ "use strict"; | ||
}; | ||
var HOST = process.env.HOST || process.argv[2] || "0.0.0.0"; | ||
var BASES = (process.env.BASES || process.argv[3] || '').split(','); | ||
var PORT = process.env.PORT; | ||
var BROADCAST = process.env.BROADCAST; | ||
var REGISTRY = JSON.parse(process.env.REGISTRY || '{"active":true}'); | ||
var seneca = new crawler_plugins_common_1.Seneca(container_1.container, { | ||
@@ -69,22 +64,2 @@ tag: pluginName | ||
}); }); | ||
// request1.get("https://search.jd.com/search?keyword=%E6%B2%99%E5%8F%91&enc=utf-8&ev=exbrand_%E8%8A%9D%E5%8D%8E%E4%BB%95%EF%BC%88CHEERS%EF%BC%89/").end(console.log) | ||
// request("https://search.jd.com/search?keyword=%E6%B2%99%E5%8F%91&enc=utf-8&ev=exbrand_%E8%8A%9D%E5%8D%8E%E4%BB%95%EF%BC%88CHEERS%EF%BC%89/", { | ||
// method: "get", | ||
// resolveWithFullResponse: true, | ||
// rejectUnauthorized: false, | ||
// port: 443, | ||
// // headers: { | ||
// // Host: "search.jd.com", | ||
// // Referrer:"", | ||
// // Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", | ||
// // "Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6" | ||
// // }, | ||
// // agentOptions: { | ||
// // secureProtocol: 'SSLv3_method' | ||
// // }, | ||
// userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", | ||
// timeout: 5000 | ||
// }, undefined).then((res) => { | ||
// console.log(res, /1999.00/i.test(res.body)); | ||
// }); | ||
//# sourceMappingURL=test.js.map |
{ | ||
"name": "crawler.plugins.downloader", | ||
"version": "0.1.16", | ||
"version": "0.1.17", | ||
"description": "爬虫下载插件", | ||
@@ -21,3 +21,3 @@ "main": "out/index.js", | ||
"dependencies": { | ||
"crawler.plugins.common": "^0.1.16", | ||
"crawler.plugins.common": "^0.1.17", | ||
"inversify": "^4.2.0", | ||
@@ -24,0 +24,0 @@ "modelproxy": "^0.5.19", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 5 instances in 1 package
1
56532
32
827