New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

declarative-crawler

Package Overview
Dependencies
Maintainers
1
Versions
20
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

declarative-crawler - npm Package Compare versions

Comparing version 0.0.7 to 0.0.8

dist/utils/logger.js

15

dist/server/CrawlerServer.js

@@ -110,6 +110,15 @@ "use strict";

// 这里不需要等待启动返回,因此直接使用 Promise 异步执行
router.get("/start", function (ctx, next) {
// 启动整个爬虫
_this.crawlerScheduler.run().then();
router.get("/start/:crawlerName", function (ctx, next) {
// 获取到路径参数
var crawlerName = ctx.params.crawlerName;
if (crawlerName === "all") {
// 启动整个爬虫
_this.crawlerScheduler.run().then();
} else {
// 启动指定名爬虫
_this.crawlerScheduler.run(crawlerName).then();
}
// 返回结构

@@ -116,0 +125,0 @@ ctx.body = {

@@ -14,2 +14,4 @@ "use strict";

var _logger = require("../../utils/logger");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

@@ -126,3 +128,4 @@

console.error(_context.t1);
_logger.errorLogger.log(_context.t1.message);
return _context.abrupt("return", false);

@@ -129,0 +132,0 @@

14

dist/source/crawler/CrawlerScheduler.js

@@ -109,3 +109,3 @@ "use strict";

value: function () {
var _ref2 = _asyncToGenerator(regeneratorRuntime.mark(function _callee2() {
var _ref2 = _asyncToGenerator(regeneratorRuntime.mark(function _callee2(crawlerNameToRun) {
var _this2 = this;

@@ -119,3 +119,11 @@

case 0:
crawlerNames = Object.keys(this.crawlers);
crawlerNames = Object.keys(this.crawlers).filter(function (crawlerName) {
// 如果没有设置过滤值,则默认全部运行
if (!crawlerNameToRun) {
return true;
} else {
// 否则仅运行指定爬虫
return crawlerName === crawlerNameToRun;
}
});
_iteratorNormalCompletion = true;

@@ -218,3 +226,3 @@ _didIteratorError = false;

function run() {
function run(_x2) {
return _ref2.apply(this, arguments);

@@ -221,0 +229,0 @@ }

@@ -22,2 +22,4 @@ "use strict";

var _logger = require("../../utils/logger");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

@@ -307,3 +309,3 @@

// 如果这一步发生异常,则报错
console.error(_context6.t0);
_logger.errorLogger.log(_context6.t0.message);
return _context6.abrupt("return");

@@ -378,3 +380,4 @@

console.log(_context6.t1.message);
_logger.errorLogger.log(_context6.t1.message);
this.crawler && _supervisor.dcEmitter.emit("Spider", new _SpiderMessage2.default(_SpiderMessage2.default.VALIDATE_FAILURE, this, _context6.t1.message));

@@ -381,0 +384,0 @@

@@ -18,2 +18,4 @@ "use strict";

var _logger = require("../../../utils/logger");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

@@ -178,3 +180,3 @@

}).on("error", function (err) {
console.error(err);
_logger.errorLogger.log(err.message);
});

@@ -181,0 +183,0 @@

{
"name": "declarative-crawler",
"version": "0.0.7",
"version": "0.0.8",
"description": "Declarative and Observable Distributed Crawler For Web, RDB, OS, also can act as a Monitor or ETL for your system",

@@ -5,0 +5,0 @@ "scripts": {

@@ -68,6 +68,14 @@ // @flow

// 这里不需要等待启动返回,因此直接使用 Promise 异步执行
router.get("/start", (ctx, next) => {
// 启动整个爬虫
this.crawlerScheduler.run().then();
router.get("/start/:crawlerName", (ctx, next) => {
// 获取到路径参数
const { crawlerName } = ctx.params;
if (crawlerName === "all") {
// 启动整个爬虫
this.crawlerScheduler.run().then();
} else {
// 启动指定名爬虫
this.crawlerScheduler.run(crawlerName).then();
}
// 返回结构

@@ -74,0 +82,0 @@ ctx.body = {

// @flow
import Persistor from "./Persistor";
import { errorLogger } from "../../utils/logger";
const download = require("image-downloader");

@@ -23,3 +24,4 @@

} catch (e) {
console.error(e);
errorLogger.log(e.message);
return false;

@@ -26,0 +28,0 @@ }

@@ -56,4 +56,14 @@ // @flow

*/
async run() {
let crawlerNames = Object.keys(this.crawlers);
async run(crawlerNameToRun: string) {
let crawlerNames = Object.keys(
this.crawlers
).filter((crawlerName: string) => {
// 如果没有设置过滤值,则默认全部运行
if (!crawlerNameToRun) {
return true;
} else {
// 否则仅运行指定爬虫
return crawlerName === crawlerNameToRun;
}
});

@@ -60,0 +70,0 @@ for (let crawlerName of crawlerNames) {

@@ -8,2 +8,3 @@ // @flow

import SpiderMessage from "../crawler/store/entity/SpiderMessage";
import { errorLogger } from "../../utils/logger";

@@ -144,3 +145,3 @@ type ModelType = {

// 如果这一步发生异常,则报错
console.error(e);
errorLogger.log(e.message);
return;

@@ -205,3 +206,4 @@ }

} catch (e) {
console.log(e.message);
errorLogger.log(e.message);
this.crawler &&

@@ -208,0 +210,0 @@ dcEmitter.emit(

// @flow
import HTMLSpider from "./HTMLSpider";
import { override } from "core-decorators";
import { errorLogger } from "../../../utils/logger";
const CDP = require("chrome-remote-interface");

@@ -45,3 +46,2 @@

@override async fetch(url: string, option: Object): Promise<any> {
return new Promise(async (resolve, reject) => {

@@ -98,3 +98,3 @@ // 设置抓取过时,最多 1 分钟

).on("error", err => {
console.error(err);
errorLogger.log(err.message);
});

@@ -101,0 +101,0 @@ });

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc