@@ -10,2 +10,3 @@ const Bottleneck = require('bottleneck')
		const defaults = {
		parseDOM: true,
		json: false,
		@@ -35,2 +36,4 @@ maxConcurrent: 5,
		async function getPage (page, emitter, opts) {
		emitter.emit('beforePageLoad', page)

		if (opts.json) {
		@@ -47,4 +50,4 @@ try {
		const body = (await got(page.url)).body
		const $ = cheerio.load(body)
		const pageCopy = Object.assign({}, page, { body, $ })
		const pageCopy = Object.assign({}, page, { body })
		if (opts.parseDOM) pageCopy.$ = cheerio.load(body)
		emitter.emit('page', pageCopy)
		@@ -51,0 +54,0 @@ } catch (err) {

package.json

		{
		"name": "domwaiter",
		"description": "A well-behaved URL scraper that brings you delicious DOM objects",
		"version": "1.1.0",
		"version": "1.4.0",
		"repository": "https://github.com/zeke/domwaiter",
		@@ -14,3 +14,3 @@ "main": "index.js",
		"cheerio": "^1.0.0-rc.3",
		"got": "^10.6.0"
		"got": "^11.8.5"
		},
		@@ -27,3 +27,8 @@ "devDependencies": {
		}
		},
		"release": {
		"branches": [
		"main"
		]
		}
		}

readme.md

		@@ -13,3 +13,3 @@ # domwaiter
		- Rate limiting powered by [bottleneck](https://ghub.io/bottleneck)
		- DOM parsing powered by [cheerio](https://ghub.io/cheerio)
		- DOM parsing powered by [cheerio](https://ghub.io/cheerio) (optional; can be disabled)
		- HTTP requests powered by [got](https://ghub.io/got)
		@@ -54,3 +54,4 @@
		- `opts` Object (optional)
		- `json` Boolean - Set to `true` if you're fetching JSON instead of HTML. If `true`, a `json` property will be present on each emitted `page` object (and the `$` and `body` properties will NOT be present).
		- `parseDOM` Boolean - Defaults to `true`. Set to `false` if you don't need the parsed `page.$` DOM object. Disabling DOM parsing will boost performance.
		- `json` Boolean - Defaults to `false`. Set to `true` if you're fetching JSON instead of HTML. If `true`, a `json` property will be present on each emitted `page` object (and the `$` and `body` properties will NOT be present).
		- `maxConcurrent` Number - How many jobs can be executing at the same time. Defaults to `5`. This option is passed to the underlying [bottleneck](https://ghub.io/bottleneck#docs) instance.
		@@ -63,3 +64,4 @@ - `minTime`: Number - How long to wait after launching a job before launching another one. Defaults to `500` (milliseconds). This option is passed to the underlying [bottleneck](https://ghub.io/bottleneck#docs) instance.

		- `page` - Emitted as each page has been requested and parsed. Returns an object which is a shallow clone of the original `page` object you provided, but with two added properties:
		- `beforePageLoad` - Emitted with `page` object for any optional prehandling you want to do, e.g. setting up a request timer.
		- `page` - Emitted after the page has been requested and the response is parsed. Returns an object which is a shallow clone of the original `page` object you provided, but with two added properties:
		- `body`: the raw HTTP response body text
		@@ -66,0 +68,0 @@ - `$`: The body parsed into a jQuery-like [cheerio](https://ghub.io/cheerio) DOM object.

test.js

		@@ -12,3 +12,3 @@ const domwaiter = require('.')

		test('emits events', (done) => {
		test('emits `page` and `done` events', (done) => {
		const mock = nock('https://example.com')
		@@ -47,2 +47,21 @@ .get('/foo')

		test('emits a `beforePageLoad` event with page object', (done) => {
		const mock = nock('https://example.com')
		.get('/foo')
		.reply(200)

		const pages = [
		{ url: 'https://example.com/foo' }
		]

		const waiter = domwaiter(pages, { minTime: 10 })

		waiter
		.on('beforePageLoad', (page) => {
		expect(mock.isDone())
		expect(page && page.url)
		done()
		})
		})

		test('emits errors for failed requests', (done) => {
		@@ -78,2 +97,31 @@ const mock = nock('https://example.com')

		test('allows `parseDOM` option to skip cheerio parsing', (done) => {
		const mock = nock('https://example.com')
		.get('/foo')
		.reply(200, '<html><title>Hello, foo</title></html>')

		const pages = [
		{ url: 'https://example.com/foo' }
		]

		const waiter = domwaiter(pages, { minTime: 10, parseDOM: false })
		const results = []

		waiter
		.on('page', (page) => {
		results.push(page)
		})
		.on('done', () => {
		expect(mock.isDone()).toBe(true)
		expect(results.length).toBe(1)
		expect(results[0].body).toContain('Hello, foo')
		expect(results[0].$).toBe(undefined)
		done()
		})
		.on('error', (err) => {
		console.error('domwaiter error')
		console.error(err)
		})
		})

		test('supports json responses', (done) => {
		@@ -80,0 +128,0 @@ const mock = nock('https://example.com')

.github/workflows/release.yml

Sorry, the diff of this file is not supported yet

domwaiter - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Dependency changes