😎 @web-master/node-web-crawler 😎
Crawl web as easy as possible
Description
It crawls the target page, collects links and scrapes data on each page :)
Installation
$ npm install --save @web-master/node-web-crawler
Usage
Basic
import crawl from '@web-master/node-web-crawler';
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
Waitable (by using puppeteer
)
import crawl from '@web-master/node-web-crawler';
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
waitFor: 3 * 1000,
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
TypeScript Support
import crawl from '@web-master/node-web-crawler';
interface HackerNewsPage {
title: string;
}
const pages: HackerNewsPage[] = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(pages);
Related