Comparing version 3.2.7 to 3.2.8
import { XCrawlBaseConfig, XCrawlInstance } from './types' | ||
export default function xCrawl(baseConfig?: XCrawlBaseConfig): XCrawlInstance | ||
export * from './types' | ||
export * from './types/api' | ||
export * from './types/common' | ||
export * from './types/request' | ||
export type * from './types' | ||
export type * from './types/api' | ||
export type * from './types/common' | ||
export type * from './types/request' |
@@ -513,11 +513,27 @@ 'use strict'; | ||
case 19: | ||
_context.next = 21; | ||
if (!requestConfig.headers) { | ||
_context.next = 22; | ||
break; | ||
} | ||
_context.next = 22; | ||
return page.setExtraHTTPHeaders(Headers); | ||
case 22: | ||
httpResponse = null; | ||
_context.prev = 23; | ||
_context.next = 26; | ||
return page["goto"](requestConfig.url, { | ||
timeout: requestConfig.timeout | ||
}); | ||
case 21: | ||
case 26: | ||
httpResponse = _context.sent; | ||
_context.next = 24; | ||
_context.next = 32; | ||
break; | ||
case 29: | ||
_context.prev = 29; | ||
_context.t0 = _context["catch"](23); | ||
console.log("error: ".concat(logError(_context.t0.message))); | ||
case 32: | ||
_context.next = 34; | ||
return page.content(); | ||
case 24: | ||
case 34: | ||
content = _context.sent; | ||
@@ -534,7 +550,7 @@ res = { | ||
return _context.abrupt("return", res); | ||
case 28: | ||
case 38: | ||
case "end": | ||
return _context.stop(); | ||
} | ||
}, _callee); | ||
}, _callee, null, [[23, 29]]); | ||
})); | ||
@@ -541,0 +557,0 @@ return _crawlPage.apply(this, arguments); |
@@ -5,3 +5,3 @@ /// <reference types="node" /> | ||
import { JSDOM } from 'jsdom' | ||
import { RequestConfigObject } from './request' | ||
import { RequestConfigObjectV1, RequestConfigObjectV2 } from './request' | ||
import { AnyObject, MapTypeObject } from './common' | ||
@@ -14,9 +14,10 @@ export type IntervalTime = | ||
} | ||
export type RequestConfig = string | RequestConfigObject | ||
export interface MergeRequestConfigObject { | ||
url: string | ||
timeout?: number | ||
proxy?: string | ||
} | ||
type MergeRequestConfig = string | MergeRequestConfigObject | ||
export type RequestConfig = string | RequestConfigObjectV2 | ||
type MergeRequestConfig = | ||
| string | ||
| { | ||
url: string | ||
timeout?: number | ||
proxy?: string | ||
} | ||
export type MergeConfigRawConfig = { | ||
@@ -26,13 +27,14 @@ requestConfig: MergeRequestConfig | MergeRequestConfig[] | ||
} | ||
export type MergeConfigV1<T extends AnyObject> = MapTypeObject< | ||
export type MergeConfigV1 = { | ||
requestConfig: RequestConfigObjectV1[] | ||
intervalTime?: IntervalTime | ||
} | ||
export type MergeConfigV2<T extends AnyObject> = MapTypeObject< | ||
T, | ||
'requestConfig' | ||
> & { | ||
requestConfig: RequestConfigObject[] | ||
requestConfig: RequestConfigObjectV2[] | ||
intervalTime?: IntervalTime | ||
} | ||
export type MergeConfigV2 = { | ||
requestConfig: MergeRequestConfigObject[] | ||
intervalTime?: IntervalTime | ||
} | ||
export type CrawlPageConfig = string | RequestConfigObjectV1 | ||
export interface CrawlBaseConfigV1 { | ||
@@ -42,3 +44,2 @@ requestConfig: RequestConfig | RequestConfig[] | ||
} | ||
export type CrawlPageConfig = string | MergeRequestConfigObject | ||
export interface CrawlDataConfig extends CrawlBaseConfigV1 {} | ||
@@ -63,2 +64,8 @@ export interface CrawlFileConfig extends CrawlBaseConfigV1 { | ||
export type CrawlResCommonArrV1<T> = CrawlResCommonV1<T>[] | ||
export interface CrawlPage { | ||
httpResponse: HTTPResponse | null | ||
browser: Browser | ||
page: Page | ||
jsdom: JSDOM | ||
} | ||
export interface FileInfo { | ||
@@ -70,8 +77,2 @@ fileName: string | ||
} | ||
export interface CrawlPage { | ||
httpResponse: HTTPResponse | null | ||
browser: Browser | ||
page: Page | ||
jsdom: JSDOM | ||
} | ||
export {} |
@@ -26,4 +26,10 @@ /// <reference types="node" /> | ||
| 'UNLINK' | ||
export interface RequestConfigObject { | ||
export interface RequestConfigObjectV1 { | ||
url: string | ||
headers?: AnyObject | ||
timeout?: number | ||
proxy?: string | ||
} | ||
export interface RequestConfigObjectV2 { | ||
url: string | ||
method?: Method | ||
@@ -30,0 +36,0 @@ headers?: AnyObject |
{ | ||
"name": "x-crawl", | ||
"version": "3.2.7", | ||
"version": "3.2.8", | ||
"author": "coderHXL", | ||
@@ -5,0 +5,0 @@ "description": "x-crawl is a flexible nodejs crawler library.", |
@@ -12,3 +12,3 @@ # x-crawl [![npm](https://img.shields.io/npm/v/x-crawl.svg)](https://www.npmjs.com/package/x-crawl) [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/coder-hxl/x-crawl/blob/main/LICENSE) | ||
- Support asynchronous/synchronous way to crawl data. | ||
- The writing method is very flexible and supports multiple ways to write request configuration and obtain crawling results. | ||
- Flexible writing, support a variety of ways to write request configuration and obtain crawl results. | ||
- Flexible crawling interval, up to you to use/avoid high concurrent crawling. | ||
@@ -18,3 +18,3 @@ - With simple configuration, operations such as crawling pages, batch network requests, and batch download of file resources can be performed. | ||
- The built-in puppeteer crawls the page, and uses the jsdom library to analyze the content of the page, and also supports self-analysis. | ||
- Capture and record the success and failure of batch crawling, and highlight the reminders. | ||
- Capture the success and failure of the climb and highlight the reminder. | ||
- Written in TypeScript, has types, provides generics. | ||
@@ -70,17 +70,16 @@ | ||
* [Method](#Method) | ||
* [RequestConfigObject](#RequestConfigObject) | ||
* [RequestConfigObjectV1](#RequestConfigObjectV1) | ||
* [RequestConfigObjectV2](#RequestConfigObjectV2) | ||
* [RequestConfig](#RequestConfig) | ||
* [MergeRequestConfigObject](#MergeRequestConfigObject) | ||
* [IntervalTime](#IntervalTime) | ||
* [XCrawlBaseConfig](#XCrawlBaseConfig) | ||
* [CrawlPageConfig](#CrawlPageConfig ) | ||
* [CrawlBaseConfigV1](#CrawlBaseConfigV1) | ||
* [CrawlPageConfig](#CrawlPageConfig ) | ||
* [CrawlDataConfig](#CrawlDataConfig) | ||
* [CrawlFileConfig](#CrawlFileConfig) | ||
* [StartPollingConfig](#StartPollingConfig) | ||
* [XCrawlInstance](#XCrawlInstance) | ||
* [CrawlResCommonV1](#CrawlResCommonV1) | ||
* [CrawlResCommonArrV1](#CrawlResCommonArrV1) | ||
* [CrawlPage](#CrawlPage-2) | ||
* [FileInfo](#FileInfo) | ||
* [CrawlPage](#CrawlPage) | ||
- [More](#More) | ||
@@ -688,7 +687,18 @@ | ||
### RequestConfigObject | ||
### RequestConfigObjectV1 | ||
```ts | ||
interface RequestConfigObject { | ||
interface RequestConfigObjectV1 { | ||
url: string | ||
headers?: AnyObject | ||
timeout?: number | ||
proxy?: string | ||
} | ||
``` | ||
### RequestConfigObjectV2 | ||
```ts | ||
interface RequestConfigObjectV2 { | ||
url: string | ||
method?: Method | ||
@@ -706,15 +716,5 @@ headers?: AnyObject | ||
```ts | ||
type RequestConfig = string | RequestConfigObject | ||
type RequestConfig = string | RequestConfigObjectV2 | ||
``` | ||
### MergeRequestConfigObject | ||
```ts | ||
interface MergeRequestConfigObject { | ||
url: string | ||
timeout?: number | ||
proxy?: string | ||
} | ||
``` | ||
### IntervalTime | ||
@@ -741,2 +741,8 @@ | ||
### CrawlPageConfig | ||
```ts | ||
type CrawlPageConfig = string | RequestConfigObjectV1 | ||
``` | ||
### CrawlBaseConfigV1 | ||
@@ -751,8 +757,2 @@ | ||
### CrawlPageConfig | ||
```ts | ||
type CrawlPageConfig = string | MergeRequestConfigObject | ||
``` | ||
### CrawlDataConfig | ||
@@ -815,3 +815,3 @@ | ||
```ts | ||
interface CrawlCommon<T> { | ||
interface CrawlResCommonV1<T> { | ||
id: number | ||
@@ -830,2 +830,13 @@ statusCode: number | undefined | ||
### CrawlPage | ||
```ts | ||
interface CrawlPage { | ||
httpResponse: HTTPResponse | null // The type of HTTPResponse in the puppeteer library | ||
browser: Browser // The Browser type of the puppeteer library | ||
page: Page // The Page type of the puppeteer library | ||
jsdom: JSDOM // jsdom type of the JSDOM library | ||
} | ||
``` | ||
### FileInfo | ||
@@ -842,15 +853,4 @@ | ||
### CrawlPage | ||
```ts | ||
interface CrawlPage { | ||
httpResponse: HTTPResponse | null // The type of HTTPResponse in the puppeteer library | ||
browser // The type of Browser in the puppeteer library | ||
page: Page // The type of Page in the puppeteer library | ||
jsdom: JSDOM // The type of JSDOM in the jsdom library | ||
} | ||
``` | ||
## More | ||
If you have any **questions** or **needs** , please submit **Issues in** https://github.com/coder-hxl/x-crawl/issues . |
Sorry, the diff of this file is not supported yet
112483
1733