declarative-crawler
Advanced tools
Comparing version 0.0.7 to 0.0.8
@@ -110,6 +110,15 @@ "use strict"; | ||
// 这里不需要等待启动返回,因此直接使用 Promise 异步执行 | ||
router.get("/start", function (ctx, next) { | ||
// 启动整个爬虫 | ||
_this.crawlerScheduler.run().then(); | ||
router.get("/start/:crawlerName", function (ctx, next) { | ||
// 获取到路径参数 | ||
var crawlerName = ctx.params.crawlerName; | ||
if (crawlerName === "all") { | ||
// 启动整个爬虫 | ||
_this.crawlerScheduler.run().then(); | ||
} else { | ||
// 启动指定名爬虫 | ||
_this.crawlerScheduler.run(crawlerName).then(); | ||
} | ||
// 返回结构 | ||
@@ -116,0 +125,0 @@ ctx.body = { |
@@ -14,2 +14,4 @@ "use strict"; | ||
var _logger = require("../../utils/logger"); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
@@ -126,3 +128,4 @@ | ||
console.error(_context.t1); | ||
_logger.errorLogger.log(_context.t1.message); | ||
return _context.abrupt("return", false); | ||
@@ -129,0 +132,0 @@ |
@@ -109,3 +109,3 @@ "use strict"; | ||
value: function () { | ||
var _ref2 = _asyncToGenerator(regeneratorRuntime.mark(function _callee2() { | ||
var _ref2 = _asyncToGenerator(regeneratorRuntime.mark(function _callee2(crawlerNameToRun) { | ||
var _this2 = this; | ||
@@ -119,3 +119,11 @@ | ||
case 0: | ||
crawlerNames = Object.keys(this.crawlers); | ||
crawlerNames = Object.keys(this.crawlers).filter(function (crawlerName) { | ||
// 如果没有设置过滤值,则默认全部运行 | ||
if (!crawlerNameToRun) { | ||
return true; | ||
} else { | ||
// 否则仅运行指定爬虫 | ||
return crawlerName === crawlerNameToRun; | ||
} | ||
}); | ||
_iteratorNormalCompletion = true; | ||
@@ -218,3 +226,3 @@ _didIteratorError = false; | ||
function run() { | ||
function run(_x2) { | ||
return _ref2.apply(this, arguments); | ||
@@ -221,0 +229,0 @@ } |
@@ -22,2 +22,4 @@ "use strict"; | ||
var _logger = require("../../utils/logger"); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
@@ -307,3 +309,3 @@ | ||
// 如果这一步发生异常,则报错 | ||
console.error(_context6.t0); | ||
_logger.errorLogger.log(_context6.t0.message); | ||
return _context6.abrupt("return"); | ||
@@ -378,3 +380,4 @@ | ||
console.log(_context6.t1.message); | ||
_logger.errorLogger.log(_context6.t1.message); | ||
this.crawler && _supervisor.dcEmitter.emit("Spider", new _SpiderMessage2.default(_SpiderMessage2.default.VALIDATE_FAILURE, this, _context6.t1.message)); | ||
@@ -381,0 +384,0 @@ |
@@ -18,2 +18,4 @@ "use strict"; | ||
var _logger = require("../../../utils/logger"); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
@@ -178,3 +180,3 @@ | ||
}).on("error", function (err) { | ||
console.error(err); | ||
_logger.errorLogger.log(err.message); | ||
}); | ||
@@ -181,0 +183,0 @@ |
{ | ||
"name": "declarative-crawler", | ||
"version": "0.0.7", | ||
"version": "0.0.8", | ||
"description": "Declarative and Observable Distributed Crawler For Web, RDB, OS, also can act as a Monitor or ETL for your system", | ||
@@ -5,0 +5,0 @@ "scripts": { |
@@ -68,6 +68,14 @@ // @flow | ||
// 这里不需要等待启动返回,因此直接使用 Promise 异步执行 | ||
router.get("/start", (ctx, next) => { | ||
// 启动整个爬虫 | ||
this.crawlerScheduler.run().then(); | ||
router.get("/start/:crawlerName", (ctx, next) => { | ||
// 获取到路径参数 | ||
const { crawlerName } = ctx.params; | ||
if (crawlerName === "all") { | ||
// 启动整个爬虫 | ||
this.crawlerScheduler.run().then(); | ||
} else { | ||
// 启动指定名爬虫 | ||
this.crawlerScheduler.run(crawlerName).then(); | ||
} | ||
// 返回结构 | ||
@@ -74,0 +82,0 @@ ctx.body = { |
// @flow | ||
import Persistor from "./Persistor"; | ||
import { errorLogger } from "../../utils/logger"; | ||
const download = require("image-downloader"); | ||
@@ -23,3 +24,4 @@ | ||
} catch (e) { | ||
console.error(e); | ||
errorLogger.log(e.message); | ||
return false; | ||
@@ -26,0 +28,0 @@ } |
@@ -56,4 +56,14 @@ // @flow | ||
*/ | ||
async run() { | ||
let crawlerNames = Object.keys(this.crawlers); | ||
async run(crawlerNameToRun: string) { | ||
let crawlerNames = Object.keys( | ||
this.crawlers | ||
).filter((crawlerName: string) => { | ||
// 如果没有设置过滤值,则默认全部运行 | ||
if (!crawlerNameToRun) { | ||
return true; | ||
} else { | ||
// 否则仅运行指定爬虫 | ||
return crawlerName === crawlerNameToRun; | ||
} | ||
}); | ||
@@ -60,0 +70,0 @@ for (let crawlerName of crawlerNames) { |
@@ -8,2 +8,3 @@ // @flow | ||
import SpiderMessage from "../crawler/store/entity/SpiderMessage"; | ||
import { errorLogger } from "../../utils/logger"; | ||
@@ -144,3 +145,3 @@ type ModelType = { | ||
// 如果这一步发生异常,则报错 | ||
console.error(e); | ||
errorLogger.log(e.message); | ||
return; | ||
@@ -205,3 +206,4 @@ } | ||
} catch (e) { | ||
console.log(e.message); | ||
errorLogger.log(e.message); | ||
this.crawler && | ||
@@ -208,0 +210,0 @@ dcEmitter.emit( |
// @flow | ||
import HTMLSpider from "./HTMLSpider"; | ||
import { override } from "core-decorators"; | ||
import { errorLogger } from "../../../utils/logger"; | ||
const CDP = require("chrome-remote-interface"); | ||
@@ -45,3 +46,2 @@ | ||
@override async fetch(url: string, option: Object): Promise<any> { | ||
return new Promise(async (resolve, reject) => { | ||
@@ -98,3 +98,3 @@ // 设置抓取过时,最多 1 分钟 | ||
).on("error", err => { | ||
console.error(err); | ||
errorLogger.log(err.message); | ||
}); | ||
@@ -101,0 +101,0 @@ }); |
253454
103
4217