nodejs-web-scraper
Advanced tools
Comparing version 5.1.0 to 6.0.0
const Operation = require('./Operation') | ||
var cheerio = require('cheerio'); | ||
var cheerioAdv = require('cheerio-advanced-selectors') | ||
cheerio = cheerioAdv.wrap(cheerio) | ||
// var cheerioAdv = require('cheerio-advanced-selectors') | ||
// cheerio = cheerioAdv.wrap(cheerio) | ||
// const fs = require('fs') | ||
@@ -6,0 +6,0 @@ const { createElementList, getNodeContent } = require('../utils/cheerio') |
const HttpOperation = require('./HttpOperation'); | ||
var cheerio = require('cheerio'); | ||
var cheerioAdv = require('cheerio-advanced-selectors'); | ||
cheerio = cheerioAdv.wrap(cheerio); | ||
var cheerio = require('cheerio') | ||
// var cheerioAdv = require('cheerio-advanced-selectors'); | ||
// cheerio = cheerioAdv.wrap(cheerio); | ||
const fs = require('fs'); | ||
@@ -6,0 +6,0 @@ const { promisify } = require('util'); |
const Operation = require('./Operation'); | ||
var cheerio = require('cheerio'); | ||
var cheerioAdv = require('cheerio-advanced-selectors'); | ||
cheerio = cheerioAdv.wrap(cheerio); | ||
var cheerio = require('cheerio') | ||
// var cheerioAdv = require('cheerio-advanced-selectors'); | ||
// cheerio = cheerioAdv.wrap(cheerio); | ||
const { createDelay } = require('../utils/delay'); | ||
@@ -6,0 +6,0 @@ const rpur = require('../utils/rpur') |
@@ -5,5 +5,5 @@ const HttpOperation = require('./HttpOperation'); | ||
// const Operation = require('./Operation')//For jsdoc | ||
var cheerio = require('cheerio'); | ||
var cheerioAdv = require('cheerio-advanced-selectors'); | ||
cheerio = cheerioAdv.wrap(cheerio); | ||
var cheerio = require('cheerio') | ||
// var cheerioAdv = require('cheerio-advanced-selectors'); | ||
// cheerio = cheerioAdv.wrap(cheerio); | ||
const { getBaseUrlFromBaseTag, createElementList } = require('../utils/cheerio'); | ||
@@ -10,0 +10,0 @@ const { getAbsoluteUrl } = require('../utils/url'); |
{ | ||
"name": "nodejs-web-scraper", | ||
"version": "5.1.0", | ||
"version": "6.0.0", | ||
"description": "A web scraper for NodeJs", | ||
@@ -23,4 +23,3 @@ "main": "index.js", | ||
"dependencies": { | ||
"cheerio": "^1.0.0-rc.2", | ||
"cheerio-advanced-selectors": "^2.0.1", | ||
"cheerio": "^1.0.0-rc.9", | ||
"https-proxy-agent": "^5.0.0", | ||
@@ -27,0 +26,0 @@ "mime-types": "^2.1.20", |
nodejs-web-scraper is a simple tool for scraping/crawling server-side rendered pages. | ||
It supports features like recursive scraping(pages that "open" other pages), file download and handling, automatic retries of failed requests, concurrency limitation, pagination, request delay, etc. Tested on Node 10 and 12(Windows 7, Linux Mint). | ||
The API uses cheerio-advanced-selectors. [Click here for reference](https://www.npmjs.com/package/cheerio-advanced-selectors) | ||
The API uses Cheerio selectors. [Click here for reference](https://www.npmjs.com/package/cheerio) | ||
@@ -65,3 +65,3 @@ For any questions or suggestions, please open a Github issue or contact me via https://nodejs-web-scraper.ibrod83.com/about | ||
//Any valid cheerio-advanced-selectors selector can be passed. For further reference: https://cheerio.js.org/ | ||
//Any valid cheerio selector can be passed. For further reference: https://cheerio.js.org/ | ||
const category = new OpenLinks('.category',{name:'category'});//Opens each category page. | ||
@@ -406,3 +406,3 @@ | ||
In some cases, using the cheerio-advanced-selectors isn't enough to properly filter the DOM nodes. This is where the "condition" hook comes in. Both OpenLinks and DownloadContent can register a function with this hook, allowing you to decide if this DOM node should be scraped, by returning true or false. | ||
In some cases, using the cheerio selectors isn't enough to properly filter the DOM nodes. This is where the "condition" hook comes in. Both OpenLinks and DownloadContent can register a function with this hook, allowing you to decide if this DOM node should be scraped, by returning true or false. | ||
@@ -409,0 +409,0 @@ ```javascript |
6
115685
- Removedcheerio-advanced-selectors@^2.0.1
- Removedcheerio-advanced-selectors@2.0.1(transitive)
Updatedcheerio@^1.0.0-rc.9