crawler.plugins.mq
Advanced tools
Comparing version 0.1.14 to 0.1.15
@@ -25,3 +25,3 @@ module.exports = { | ||
"mesh": { | ||
"isbase": true, | ||
"isbase": false, | ||
"auto": true, | ||
@@ -31,3 +31,2 @@ "host": process.env.HOST, | ||
"sneeze": { | ||
// "silent": JSON.parse(SILENT), | ||
"silent": true, | ||
@@ -40,3 +39,6 @@ "swim": { interval: 1111 } | ||
} | ||
} | ||
}, | ||
"listen": [{ | ||
"pin": "role:crawler.plugin.task,cmd:*" | ||
}] | ||
} | ||
@@ -43,0 +45,0 @@ }, |
export declare const Types: { | ||
engine: string; | ||
}; | ||
export declare const pluginMqName = "crawler.plugin.mq"; | ||
export declare const pluginTaskName = "crawler.plugin.task"; | ||
export declare const pluginResultName = "crawler.plugin.result"; |
@@ -6,2 +6,5 @@ "use strict"; | ||
}; | ||
exports.pluginMqName = "crawler.plugin.mq"; | ||
exports.pluginTaskName = "crawler.plugin.task"; | ||
exports.pluginResultName = "crawler.plugin.result"; | ||
//# sourceMappingURL=constants.js.map |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var inversify_1 = require("inversify"); | ||
// import { MQueuePlugin } from "./plugins/mq"; | ||
// import { TaskPlugin } from "./plugins/task"; | ||
// import { MQueueService } from "./libs/mq"; | ||
var crawler_plugins_common_1 = require("crawler.plugins.common"); | ||
var mq_1 = require("./plugins/mq"); | ||
var task_1 = require("./plugins/task"); | ||
var result_1 = require("./plugins/result"); | ||
var mq_2 = require("./libs/mq"); | ||
exports.container = new inversify_1.Container(); | ||
// container.bind<PluginBase>(CommonTypes._plugin).to(MQueuePlugin).inSingletonScope().whenAnyAncestorNamed("MQueuePlugin"); | ||
// container.bind<PluginBase>(CommonTypes._plugin).to(TaskPlugin).inSingletonScope().whenAnyAncestorNamed("TaskPlugin"); | ||
// container.bind<MQueueService>(MQueueService).toSelf(); | ||
exports.container.bind(crawler_plugins_common_1.Types._plugin).to(mq_1.MQueuePlugin).inSingletonScope().whenAnyAncestorNamed("MQueuePlugin"); | ||
exports.container.bind(crawler_plugins_common_1.Types._plugin).to(task_1.TaskPlugin).inSingletonScope().whenAnyAncestorNamed("TaskPlugin"); | ||
exports.container.bind(crawler_plugins_common_1.Types._plugin).to(result_1.ResultPlugin).inSingletonScope().whenAnyAncestorNamed("ResultPlugin"); | ||
exports.container.bind(mq_2.MQueueService).toSelf(); | ||
//# sourceMappingURL=container.js.map |
156
out/index.js
@@ -42,9 +42,157 @@ "use strict"; | ||
var container_1 = require("./container"); | ||
var constants_1 = require("./constants"); | ||
// import config from './config/test'; | ||
var seneca = new crawler_plugins_common_1.Seneca(container_1.container, { | ||
tag: "crawler.plugins.base" | ||
tag: "crawler.plugins.schedule" | ||
}); | ||
seneca.seneca | ||
.ready(function () { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { | ||
return [2 /*return*/]; | ||
}); }); }); | ||
.ready(function () { return __awaiter(_this, void 0, void 0, function () { | ||
return __generator(this, function (_a) { | ||
switch (_a.label) { | ||
case 0: | ||
console.log("ready"); | ||
/** | ||
* 1. 地址queue化 | ||
* 2. 下载页面 | ||
* 3. 分析结果 | ||
* 4. url存入elasticsearch | ||
* 5. url放入queue | ||
*/ | ||
return [4 /*yield*/, seneca.seneca.actAsync("role:" + constants_1.pluginTaskName + ",cmd:add", { | ||
key: "testplugin", | ||
plugins: [{ | ||
"key": "queue", | ||
"partten": "role:crawler.plugin.queue,cmd:queue", | ||
"title": "把地址queue化", | ||
"data": { | ||
"queueConfig": { | ||
"ignoreWWWDomain": false, | ||
"stripWWWDomain": false, | ||
"scanSubdomains": true, | ||
"host": "www.jd.com", | ||
"initialProtocol": "https", | ||
"initialPort": 80, | ||
"stripQuerystring": false, | ||
"fetchConditions": [], | ||
"domainWhiteList": ["(.*?).jd.com"], | ||
"filterByDomain": true | ||
}, | ||
"urls": ["https://search.jd.com/search?keyword=%E6%B2%99%E5%8F%91&enc=utf-8&ev=exbrand_%E8%8A%9D%E5%8D%8E%E4%BB%95%EF%BC%88CHEERS%EF%BC%89/"] | ||
} | ||
}, { | ||
"partten": "role:crawler.plugin.downloader,cmd:html", | ||
"title": "下载页面", | ||
"result": { | ||
"move": { | ||
"/data/queueItem": "/queueItem" | ||
} | ||
}, | ||
"data": {} | ||
}, { | ||
"key": "result", | ||
"partten": "role:crawler.plugin.html,cmd:html", | ||
"title": "分析页面", | ||
"result": { | ||
"move": { | ||
"/data/queueItem": "/queueItem" | ||
} | ||
}, | ||
"data": { | ||
"pages": [{ | ||
"key": "brandlist", | ||
"path": "*", | ||
"enabled": 1, | ||
"fields": { | ||
"none": { | ||
"data": [{ | ||
"key": "totalPage", | ||
"dealStrategy": "normal", | ||
"selector": [ | ||
"#J_topPage .fp-text i" | ||
], | ||
"methodInfo": { | ||
"text": [] | ||
} | ||
}, | ||
{ | ||
"key": "skus", | ||
"selector": ["#J_goodsList ul:eq(0) > li"], | ||
"dealStrategy": "array", | ||
"data": [{ | ||
"key": "sku", | ||
"selector": [], | ||
"dealStrategy": "normal", | ||
"methodInfo": { | ||
"attr": ["data-sku"] | ||
} | ||
}, | ||
{ | ||
"key": "price", | ||
"selector": [".p-price i"], | ||
"dealStrategy": "normal", | ||
"methodInfo": { | ||
"text": [] | ||
} | ||
}, | ||
{ | ||
"key": "comment", | ||
"selector": [".p-commit strong a"], | ||
"dealStrategy": "normal", | ||
"methodInfo": { | ||
"text": [] | ||
}, | ||
"formats": [{ | ||
"key": "regexp", | ||
"settings": { | ||
"regexp": "/\\d+/", | ||
"index": 0 | ||
} | ||
}, { | ||
"key": "num" | ||
}] | ||
} | ||
] | ||
} | ||
] | ||
} | ||
} | ||
}] | ||
} | ||
}, { | ||
"key": "urls", | ||
"partten": "role:crawler.plugin.store.es,cmd:saveUrls", | ||
"title": "存储爬取到的urls", | ||
"result": { | ||
"move": { | ||
"/data/urls": "/queue" | ||
} | ||
}, | ||
"data": { | ||
"esIndex": "testplugin", | ||
"esType": "url" | ||
} | ||
}, { | ||
"partten": "role:" + constants_1.pluginMqName + ",cmd:addItemToQueue", | ||
"title": "把存储的url放入queue", | ||
"result": { | ||
"/data/items": "/urls" | ||
}, | ||
"data": { | ||
"key": "testplugin", | ||
} | ||
}] | ||
})]; | ||
case 1: | ||
/** | ||
* 1. 地址queue化 | ||
* 2. 下载页面 | ||
* 3. 分析结果 | ||
* 4. url存入elasticsearch | ||
* 5. url放入queue | ||
*/ | ||
_a.sent(); | ||
return [2 /*return*/]; | ||
} | ||
}); | ||
}); }); | ||
//# sourceMappingURL=index.js.map |
{ | ||
"name": "crawler.plugins.mq", | ||
"version": "0.1.14", | ||
"version": "0.1.15", | ||
"description": "", | ||
@@ -9,4 +9,5 @@ "main": "index.js", | ||
"start": "node out/index.js", | ||
"dock": "docker run --net=host --name crawler.plugins.base -p 39999:39999/udp -e HOST=0.0.0.0 crawler.plugin.base", | ||
"dockfile": "docker build -t crawler.plugin.base ." | ||
"dock": "docker run --net=host --name crawler.plugins.schedule -p 39999:39999/udp -e HOST=0.0.0.0 crawler.plugin.schedule", | ||
"dockfile": "docker build -t crawler.plugin.schedule .", | ||
"curl": "curl -d '{\"role\":\"crawler.plugin.schedule\",\"cmd\":\"download\",\"id\":3}' http://0.0.0.0:9999/act" | ||
}, | ||
@@ -17,4 +18,5 @@ "author": "NICK", | ||
"amqplib": "^0.5.1", | ||
"crawler.plugins.common": "^0.1.14", | ||
"crawler.plugins.common": "^0.1.15", | ||
"inversify": "^4.2.0", | ||
"json-pointer": "^0.6.0", | ||
"lodash": "^4.17.4", | ||
@@ -32,2 +34,3 @@ "reflect-metadata": "^0.1.10", | ||
"@types/inversify": "^2.0.33", | ||
"@types/json-pointer": "^1.0.30", | ||
"@types/lodash": "^4.14.71", | ||
@@ -34,0 +37,0 @@ "@types/node": "^8.0.10", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
85160
32
1419
12
7
+ Addedjson-pointer@^0.6.0
+ Addedforeach@2.0.6(transitive)
+ Addedjson-pointer@0.6.2(transitive)