New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

declarative-crawler

Package Overview
Dependencies
Maintainers
1
Versions
20
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

declarative-crawler - npm Package Compare versions

Comparing version 0.0.4 to 0.0.5

3

__demo__/hackernews/crawler/NewsCrawler.js

@@ -5,2 +5,5 @@ // @flow

/**
* Description 新闻爬虫
*/
export default class NewsCrawler extends Crawler {

@@ -7,0 +10,0 @@ initialize() {

9

__demo__/zhihu/crawler/BeautyTopicCrawler.js

@@ -31,5 +31,8 @@ // @flow

// 存在有效二级链接
return href.indexOf("zhihu.com") > -1
? href
: `https://www.zhihu.com${href}`;
return {
url: href.indexOf("zhihu.com") > -1
? href
: `https://www.zhihu.com${href}`,
extra: feedItem
};
}

@@ -36,0 +39,0 @@ });

// @flow
import { $ } from "../../../src/utils/parser/HTMLParser";
import HeadlessChromeSpider from "../../../src/spider/web/HeadlessChromeSpider";
import HeadlessChromeSpider
from "../../../src/source/spider/web/HeadlessChromeSpider";
/**

@@ -9,2 +9,4 @@ * @function 知乎某个话题答案的爬虫

export default class TopicSpider extends HeadlessChromeSpider {
static displayName = "话题蜘蛛";
// 定义模型

@@ -11,0 +13,0 @@ model = {

@@ -10,2 +10,6 @@ "use strict";

/**
* Description 获取操作系统信息
* @returns {Promise}
*/
var getOSInfo = function () {

@@ -69,3 +73,2 @@ var _ref = _asyncToGenerator(regeneratorRuntime.mark(function _callee() {

var router = new Router();
var CrawlerServer = function () {

@@ -108,2 +111,3 @@

// 启动整个爬虫
// 这里不需要等待启动返回,因此直接使用 Promise 异步执行
router.get("/start", function (ctx, next) {

@@ -170,3 +174,5 @@ // 启动整个爬虫

app.listen(this.httpOption.port, this.httpOption.host, function () {
console.log("服务端开始运行");
var baseUrl = _this.httpOption.host + ":" + _this.httpOption.port;
console.log("\n \u722C\u866B\u670D\u52A1\u7AEF\u5F00\u59CB\u8FD0\u884C\uFF1A\n " + baseUrl + "/ - \u67E5\u770B\u722C\u866B\u5217\u8868\n " + baseUrl + "/:crawlerName - \u67E5\u770B\u67D0\u4E2A\u722C\u866B\u8BE6\u60C5\n " + baseUrl + "/start - \u542F\u52A8\u6240\u6709\u722C\u866B\n " + baseUrl + "/status - \u67E5\u770B\u7CFB\u7EDF\u72B6\u6001\n ");
});

@@ -173,0 +179,0 @@

@@ -184,3 +184,3 @@ "use strict";

if (spiderTask.request.extra) {
spiderTask.nextSpiderInstance.setExtra(spiderTask.request.extra);
spiderTask.spiderInstance.setExtra(spiderTask.request.extra);
}

@@ -187,0 +187,0 @@

{
"name": "declarative-crawler",
"version": "0.0.4",
"version": "0.0.5",
"description": "Declarative and Observable Distributed Crawler For Web, RDB, OS, also can act as a Monitor or ETL for your system",

@@ -26,21 +26,21 @@ "scripts": {

"cheerio": "^0.22.0",
"chrome-remote-interface": "^0.20.0",
"core-decorators": "^0.17.0",
"es6-promise": "^3.2.1",
"fluent-fetcher": "0.2.4",
"image-downloader": "^3.2.1",
"chrome-remote-interface": "^0.24.0",
"core-decorators": "^0.19.0",
"es6-promise": "^4.1.1",
"fluent-fetcher": "^0.3.0",
"image-downloader": "^3.2.2",
"isomorphic-fetch": "^2.2.1",
"isomorphic-urlencode": "0.0.9",
"koa": "^2.2.0",
"koa": "^2.3.0",
"koa-router": "next",
"md5": "^2.2.1",
"mysql": "^2.13.0",
"pidusage": "^1.1.1",
"pidusage": "^1.1.5",
"qrcode-terminal": "^0.11.0",
"wolfy87-eventemitter": "^5.1.0"
"wolfy87-eventemitter": "^5.2.1"
},
"devDependencies": {
"babel-cli": "^6.14.0",
"babel-core": "^6.14.0",
"babel-jest": "^19.0.0",
"babel-core": "^6.25.0",
"babel-jest": "^20.0.3",
"babel-plugin-async-to-promises": "^1.0.5",

@@ -47,0 +47,0 @@ "babel-plugin-transform-class-properties": "^6.24.1",

@@ -7,3 +7,4 @@ // @flow

import CrawlerScheduler from "../source/crawler/CrawlerScheduler";
import CrawlerStatistics from "../source/crawler/store/entity/CrawlerStatistics";
import CrawlerStatistics
from "../source/crawler/store/entity/CrawlerStatistics";
const pusage = require("pidusage");

@@ -16,2 +17,6 @@ const os = require("os");

/**
* Description 获取操作系统信息
* @returns {Promise}
*/
async function getOSInfo() {

@@ -64,2 +69,3 @@ return new Promise(resolve => {

// 启动整个爬虫
// 这里不需要等待启动返回,因此直接使用 Promise 异步执行
router.get("/start", (ctx, next) => {

@@ -105,5 +111,15 @@ // 启动整个爬虫

app.listen(this.httpOption.port, this.httpOption.host, () => {
console.log("服务端开始运行");
const baseUrl = `${this.httpOption.host}:${this.httpOption.port}`;
console.log(
`
爬虫服务端开始运行:
${baseUrl}/ - 查看爬虫列表
${baseUrl}/:crawlerName - 查看某个爬虫详情
${baseUrl}/start - 启动所有爬虫
${baseUrl}/status - 查看系统状态
`
);
});
}
}

@@ -136,3 +136,3 @@ // @flow

// 取出某个任务实例
let spiderTask = this._spiderTasks.shift();
let spiderTask:SpiderTask = this._spiderTasks.shift();

@@ -147,3 +147,3 @@ // 设置爬虫的请求

if (spiderTask.request.extra) {
spiderTask.nextSpiderInstance.setExtra(spiderTask.request.extra);
spiderTask.spiderInstance.setExtra(spiderTask.request.extra);
}

@@ -150,0 +150,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc