Socket
Socket
Sign inDemoInstall

@web-master/node-web-crawler

Package Overview
Dependencies
74
Maintainers
1
Versions
17
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.4.0 to 0.5.0

dist/interfaces/index.d.ts

18

dist/index.d.ts

@@ -1,16 +0,4 @@

import { ScraperConfig, ScrapeOptions, ScrapeOptionElement, ScrapeOptionList, ScrapeResult } from '@web-master/node-web-scraper';
interface CrawlLinkOptions {
url: string;
crawl: string | {
selector: string;
convert?: (link: string) => string;
};
fetch?: ScrapeOptions;
}
interface CrawlerConfig {
target: string[] | CrawlLinkOptions;
fetch: (data?: any, index?: number) => ScrapeOptions;
}
declare function crawl<T>(config: CrawlerConfig): Promise<T[]>;
export { crawl, CrawlerConfig, CrawlLinkOptions, ScraperConfig, ScrapeOptions, ScrapeOptionElement, ScrapeOptionList, ScrapeResult, };
import { CrawlConfig, CrawlConfigPuppeteer, CrawlLinkOptions } from './interfaces';
declare function crawl<T>(config: CrawlConfig | CrawlConfigPuppeteer): Promise<T[]>;
export { crawl, CrawlConfig, CrawlConfigPuppeteer, CrawlLinkOptions, };
export default crawl;

@@ -34,5 +34,5 @@ "use strict";

}
const { url, crawl, fetch } = possibleUrls;
const { url, iterator, fetch } = possibleUrls;
let holder;
if (typeof crawl === 'string') {
if (typeof iterator === 'string') {
holder = await node_web_scraper_1.scrape({

@@ -42,3 +42,3 @@ target: url,

urls: {
listItem: crawl,
listItem: iterator,
data: {

@@ -57,3 +57,3 @@ url: { attr: 'href' },

else {
const { selector, convert } = crawl;
const { selector, convert } = iterator;
holder = await node_web_scraper_1.scrape({

@@ -78,14 +78,26 @@ target: url,

async function crawl(config) {
const { target, fetch } = config;
const [urls, data] = await resolve(target);
return crawlAll(urls, fetch, data);
if (node_web_scraper_1.isScrapeConfigDefault(config)) {
const { target, fetch } = config;
const [urls, data] = await resolve(target);
return crawlAll(urls, fetch, data);
}
if (node_web_scraper_1.isScrapeConfigPuppeteer(config)) {
const { target, fetch, waitFor } = config;
const [urls, data] = await resolve(target);
return crawlAll(urls, fetch, data, waitFor);
}
throw new Error('InvalidProgramException');
}
exports.crawl = crawl;
async function crawlAll(urls, fetch, data) {
async function crawlAll(urls, fetch, data, waitFor) {
const results = [];
for (let i = 0; i < urls.length; i++) {
results.push(await node_web_scraper_1.scrape({
let config = {
target: urls[i],
fetch: fetch(data, i),
}));
};
if (waitFor) {
config = Object.assign(config, { waitFor });
}
results.push(await node_web_scraper_1.scrape(config));
}

@@ -92,0 +104,0 @@ return results;

import {
crawl,
CrawlerConfig,
CrawlConfig,
CrawlConfigPuppeteer,
CrawlLinkOptions,
ScraperConfig,
ScrapeOptions,
ScrapeOptionElement,
ScrapeOptionList,
ScrapeResult,
} from './dist';
export {
crawl,
CrawlerConfig,
CrawlConfig,
CrawlConfigPuppeteer,
CrawlLinkOptions,
ScraperConfig,
ScrapeOptions,
ScrapeOptionElement,
ScrapeOptionList,
ScrapeResult,
};
export default crawl;
{
"name": "@web-master/node-web-crawler",
"version": "0.4.0",
"version": "0.5.0",
"description": "Crawl web as easy as possible",

@@ -8,3 +8,3 @@ "repository": "git@github.com:saltyshiomix/web-master.git",

"license": "MIT",
"homepage": "https://github.com/saltyshiomix/web-master/tree/master/packages/node-web-crawler",
"homepage": "https://github.com/saltyshiomix/web-master/tree/master/packages/node-web-crawler/README.md",
"keywords": [

@@ -37,3 +37,3 @@ "crawler",

"dependencies": {
"@web-master/node-web-scraper": "^0.4.0"
"@web-master/node-web-scraper": "^0.5.0"
},

@@ -56,3 +56,3 @@ "devDependencies": {

},
"gitHead": "8b5b10b85e10637a2e80a89a3a214003c923833b"
"gitHead": "3330c23e8934e241f32c906c9b94d26a12500b18"
}

@@ -25,2 +25,4 @@ <p align="center">😎 @web-master/node-web-crawler 😎</p>

### Basic
```js

@@ -53,2 +55,32 @@ import crawl from '@web-master/node-web-crawler';

### Waitable (by using `puppeteer`)
```js
import crawl from '@web-master/node-web-crawler';
// crawl data on each link
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (path) => `https://news.ycombinator.com/${path}`,
},
},
waitFor: 3 * 1000, // wait for the content loaded! (like single page apps)
fetch: () => ({
title: '.title',
}),
});
console.log(data);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
```
## TypeScript Support

@@ -55,0 +87,0 @@

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc