🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
Book a DemoInstallSign in
Socket

domwaiter

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

domwaiter - npm Package Compare versions

Comparing version

to
1.4.0

LICENSE

7

index.js

@@ -10,2 +10,3 @@ const Bottleneck = require('bottleneck')

const defaults = {
parseDOM: true,
json: false,

@@ -35,2 +36,4 @@ maxConcurrent: 5,

async function getPage (page, emitter, opts) {
emitter.emit('beforePageLoad', page)
if (opts.json) {

@@ -47,4 +50,4 @@ try {

const body = (await got(page.url)).body
const $ = cheerio.load(body)
const pageCopy = Object.assign({}, page, { body, $ })
const pageCopy = Object.assign({}, page, { body })
if (opts.parseDOM) pageCopy.$ = cheerio.load(body)
emitter.emit('page', pageCopy)

@@ -51,0 +54,0 @@ } catch (err) {

{
"name": "domwaiter",
"description": "A well-behaved URL scraper that brings you delicious DOM objects",
"version": "1.1.0",
"version": "1.4.0",
"repository": "https://github.com/zeke/domwaiter",

@@ -14,3 +14,3 @@ "main": "index.js",

"cheerio": "^1.0.0-rc.3",
"got": "^10.6.0"
"got": "^11.8.5"
},

@@ -27,3 +27,8 @@ "devDependencies": {

}
},
"release": {
"branches": [
"main"
]
}
}

@@ -13,3 +13,3 @@ # domwaiter

- Rate limiting powered by [bottleneck](https://ghub.io/bottleneck)
- DOM parsing powered by [cheerio](https://ghub.io/cheerio)
- DOM parsing powered by [cheerio](https://ghub.io/cheerio) (optional; can be disabled)
- HTTP requests powered by [got](https://ghub.io/got)

@@ -54,3 +54,4 @@

- `opts` Object (optional)
- `json` Boolean - Set to `true` if you're fetching JSON instead of HTML. If `true`, a `json` property will be present on each emitted `page` object (and the `$` and `body` properties will NOT be present).
- `parseDOM` Boolean - Defaults to `true`. Set to `false` if you don't need the parsed `page.$` DOM object. Disabling DOM parsing will boost performance.
- `json` Boolean - Defaults to `false`. Set to `true` if you're fetching JSON instead of HTML. If `true`, a `json` property will be present on each emitted `page` object (and the `$` and `body` properties will NOT be present).
- `maxConcurrent` Number - How many jobs can be executing at the same time. Defaults to `5`. This option is passed to the underlying [bottleneck](https://ghub.io/bottleneck#docs) instance.

@@ -63,3 +64,4 @@ - `minTime`: Number - How long to wait after launching a job before launching another one. Defaults to `500` (milliseconds). This option is passed to the underlying [bottleneck](https://ghub.io/bottleneck#docs) instance.

- `page` - Emitted as each page has been requested and parsed. Returns an object which is a shallow clone of the original `page` object you provided, but with two added properties:
- `beforePageLoad` - Emitted with `page` object for any optional prehandling you want to do, e.g. setting up a request timer.
- `page` - Emitted after the page has been requested and the response is parsed. Returns an object which is a shallow clone of the original `page` object you provided, but with two added properties:
- `body`: the raw HTTP response body text

@@ -66,0 +68,0 @@ - `$`: The body parsed into a jQuery-like [cheerio](https://ghub.io/cheerio) DOM object.

@@ -12,3 +12,3 @@ const domwaiter = require('.')

test('emits events', (done) => {
test('emits `page` and `done` events', (done) => {
const mock = nock('https://example.com')

@@ -47,2 +47,21 @@ .get('/foo')

test('emits a `beforePageLoad` event with page object', (done) => {
const mock = nock('https://example.com')
.get('/foo')
.reply(200)
const pages = [
{ url: 'https://example.com/foo' }
]
const waiter = domwaiter(pages, { minTime: 10 })
waiter
.on('beforePageLoad', (page) => {
expect(mock.isDone())
expect(page && page.url)
done()
})
})
test('emits errors for failed requests', (done) => {

@@ -78,2 +97,31 @@ const mock = nock('https://example.com')

test('allows `parseDOM` option to skip cheerio parsing', (done) => {
const mock = nock('https://example.com')
.get('/foo')
.reply(200, '<html><title>Hello, foo</title></html>')
const pages = [
{ url: 'https://example.com/foo' }
]
const waiter = domwaiter(pages, { minTime: 10, parseDOM: false })
const results = []
waiter
.on('page', (page) => {
results.push(page)
})
.on('done', () => {
expect(mock.isDone()).toBe(true)
expect(results.length).toBe(1)
expect(results[0].body).toContain('Hello, foo')
expect(results[0].$).toBe(undefined)
done()
})
.on('error', (err) => {
console.error('domwaiter error')
console.error(err)
})
})
test('supports json responses', (done) => {

@@ -80,0 +128,0 @@ const mock = nock('https://example.com')

Sorry, the diff of this file is not supported yet