Comparing version 1.3.2 to 2.0.0
@@ -24,3 +24,3 @@ 'use strict'; | ||
(0, _async.waterfall)([function (next) { | ||
_async2.default.waterfall([function (next) { | ||
return handleSelectorString((0, _lodash.merge)(args, { selector: sourceSelector }), next); | ||
@@ -42,3 +42,3 @@ }, function (result, next) { | ||
(0, _async.mapLimit)(result, concurrency, function (source, mapNext) { | ||
_async2.default.mapLimit(result, concurrency, function (source, mapNext) { | ||
var crawlArgs = (0, _lodash.merge)(args, { | ||
@@ -74,2 +74,4 @@ selector: targetSelector, | ||
var _async2 = _interopRequireDefault(_async); | ||
var _debug = require('debug'); | ||
@@ -124,10 +126,11 @@ | ||
done(new Error('no element found')); | ||
done(); | ||
} | ||
function handleSelectorObject(args, done) { | ||
var selector = args.selector; | ||
var concurrency = args.concurrency, | ||
selector = args.selector; | ||
(0, _async.mapValuesLimit)(selector, 1, function (value, key, next) { | ||
_async2.default.mapValuesLimit(selector, concurrency, function (value, key, next) { | ||
return handleSelector((0, _lodash.merge)(args, { selector: value }), next); | ||
@@ -156,3 +159,5 @@ }, done); | ||
var engine = args.engine, | ||
source = args.source; | ||
retry = args.retry, | ||
url = args.url, | ||
waitForSelector = args.waitForSelector; | ||
@@ -163,11 +168,11 @@ var context = (0, _lodash.merge)({}, args.context); | ||
if ((0, _utils.isUrl)(source)) { | ||
context.url = source; | ||
context.baseUrl = (0, _utils.getBaseUrl)(source); | ||
if ((0, _utils.isUrl)(url)) { | ||
context.url = url; | ||
context.baseUrl = (0, _utils.getBaseUrl)(url); | ||
debug('crawl absolute link: ' + source); | ||
debug('crawl absolute link: ' + url); | ||
link = source; | ||
} else if ((0, _utils.isRelativeUrl)(source)) { | ||
link = _url2.default.resolve(context.baseUrl, source); | ||
link = url; | ||
} else if ((0, _utils.isRelativeUrl)(url)) { | ||
link = _url2.default.resolve(context.baseUrl, url); | ||
@@ -179,8 +184,32 @@ context.url = link; | ||
engine.retrieveContent(link).catch(done).then(function (content) { | ||
handleSelector((0, _lodash.merge)(args, { | ||
content: content, | ||
context: context | ||
}), done); | ||
var attempt = 0; | ||
var interval = retry.interval, | ||
times = retry.times; | ||
_async2.default.retry({ | ||
interval: interval, | ||
times: times | ||
}, function (callback) { | ||
debug('retrieveContent attempt ' + attempt + ' => ' + link); | ||
attempt += 1; | ||
engine.retrieveContent(link, waitForSelector).then(function (content) { | ||
if (!(0, _lodash.isString)(content) || content.length < 100) { | ||
callback(new Error('invalid content')); | ||
return; | ||
} | ||
debug('content', content.length); | ||
handleSelector((0, _lodash.merge)(args, { | ||
content: content, | ||
context: context | ||
}), callback); | ||
}, callback); | ||
}, function (err, res) { | ||
return done(null, res); | ||
}); | ||
} |
@@ -42,3 +42,3 @@ 'use strict'; | ||
var _ref = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee(url) { | ||
var selector = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'body'; | ||
var waitForSelector = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'body'; | ||
var page, content; | ||
@@ -49,35 +49,37 @@ return regeneratorRuntime.wrap(function _callee$(_context) { | ||
case 0: | ||
_context.next = 2; | ||
_context.prev = 0; | ||
_context.next = 3; | ||
return this.browser.newPage(); | ||
case 2: | ||
case 3: | ||
page = _context.sent; | ||
_context.next = 5; | ||
_context.next = 6; | ||
return page.setViewport(this.viewportOptions); | ||
case 5: | ||
_context.next = 7; | ||
case 6: | ||
_context.next = 8; | ||
return page.goto(url, this.gotoOptions); | ||
case 7: | ||
_context.next = 9; | ||
return page.waitFor(selector); | ||
case 8: | ||
_context.next = 10; | ||
return page.waitForSelector(waitForSelector); | ||
case 9: | ||
_context.next = 11; | ||
return page.evaluate(function (sel) { | ||
var element = document.querySelector(sel); // eslint-disable-line no-undef | ||
case 10: | ||
_context.next = 12; | ||
return page.content(); | ||
return element ? element.innerHTML : null; | ||
}, selector); | ||
case 11: | ||
case 12: | ||
content = _context.sent; | ||
_context.next = 14; | ||
_context.next = 15; | ||
return page.close(); | ||
case 14: | ||
case 15: | ||
return _context.abrupt('return', content); | ||
case 15: | ||
case 18: | ||
_context.prev = 18; | ||
_context.t0 = _context['catch'](0); | ||
return _context.abrupt('return', _context.t0); | ||
case 21: | ||
case 'end': | ||
@@ -87,3 +89,3 @@ return _context.stop(); | ||
} | ||
}, _callee, this); | ||
}, _callee, this, [[0, 18]]); | ||
})); | ||
@@ -105,10 +107,16 @@ | ||
case 0: | ||
_context2.next = 2; | ||
_context2.prev = 0; | ||
_context2.next = 3; | ||
return _puppeteer2.default.launch(this.launchOptions); | ||
case 2: | ||
case 3: | ||
this.browser = _context2.sent; | ||
return _context2.abrupt('return', this.browser); | ||
case 4: | ||
case 7: | ||
_context2.prev = 7; | ||
_context2.t0 = _context2['catch'](0); | ||
return _context2.abrupt('return', _context2.t0); | ||
case 10: | ||
case 'end': | ||
@@ -118,3 +126,3 @@ return _context2.stop(); | ||
} | ||
}, _callee2, this); | ||
}, _callee2, this, [[0, 7]]); | ||
})); | ||
@@ -121,0 +129,0 @@ |
284
lib/index.js
@@ -44,2 +44,3 @@ 'use strict'; | ||
var concurrency = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : _os2.default.cpus().length; | ||
var retry = arguments[1]; | ||
@@ -56,2 +57,6 @@ _classCallCheck(this, OhScrap); | ||
}; | ||
_this.retry = retry || { | ||
interval: 1500, | ||
times: 5 | ||
}; | ||
return _this; | ||
@@ -131,9 +136,7 @@ } | ||
value: function () { | ||
var _ref3 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee5(getSource, selector) { | ||
var _ref3 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee5(args, done) { | ||
var _this2 = this; | ||
var keepGoing = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : function () { | ||
return false; | ||
}; | ||
var count; | ||
var getUrl, selector, _args$keepGoing, keepGoing, waitForSelector, count; | ||
return regeneratorRuntime.wrap(function _callee5$(_context5) { | ||
@@ -143,86 +146,120 @@ while (1) { | ||
case 0: | ||
count = 0; | ||
_context5.next = 3; | ||
getUrl = args.getUrl, selector = args.selector, _args$keepGoing = args.keepGoing, keepGoing = _args$keepGoing === undefined ? function () { | ||
return false; | ||
} : _args$keepGoing, waitForSelector = args.waitForSelector; | ||
_context5.prev = 1; | ||
_context5.next = 4; | ||
return this.init(); | ||
case 3: | ||
case 4: | ||
_context5.next = 10; | ||
break; | ||
debug('started'); | ||
case 6: | ||
_context5.prev = 6; | ||
_context5.t0 = _context5['catch'](1); | ||
return _context5.abrupt('return', new Promise(function (resolve) { | ||
(0, _async.forever)(function (next) { | ||
var source = getSource(count); | ||
done(_context5.t0); | ||
return _context5.abrupt('return'); | ||
(0, _crawl.crawl)({ | ||
engine: _this2.engine, | ||
selector: selector, | ||
source: source | ||
}, function () { | ||
var _ref4 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3(err, result) { | ||
var flag; | ||
return regeneratorRuntime.wrap(function _callee3$(_context3) { | ||
while (1) { | ||
switch (_context3.prev = _context3.next) { | ||
case 0: | ||
if (!err) { | ||
_context3.next = 3; | ||
break; | ||
} | ||
case 10: | ||
next(err); | ||
return _context3.abrupt('return'); | ||
debug('until'); | ||
case 3: | ||
count = 0; | ||
_this2.emit('data', { count: count, result: result, source: source }); | ||
_context3.next = 6; | ||
return keepGoing({ count: count, result: result, source: source }); | ||
(0, _async.forever)(function (next) { | ||
debug('count ' + count); | ||
case 6: | ||
flag = _context3.sent; | ||
var url = getUrl(count); | ||
_this2.start({ | ||
selector: selector, | ||
url: url, | ||
waitForSelector: waitForSelector | ||
}, function () { | ||
var _ref4 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3(err, result) { | ||
var flag; | ||
return regeneratorRuntime.wrap(function _callee3$(_context3) { | ||
while (1) { | ||
switch (_context3.prev = _context3.next) { | ||
case 0: | ||
if (!err) { | ||
_context3.next = 3; | ||
break; | ||
} | ||
if (flag) { | ||
count += 1; | ||
next(err); | ||
return _context3.abrupt('return'); | ||
next(); | ||
} else { | ||
next(count); | ||
} | ||
case 3: | ||
_context3.next = 5; | ||
return keepGoing({ count: count, result: result, url: url }); | ||
case 8: | ||
case 'end': | ||
return _context3.stop(); | ||
} | ||
case 5: | ||
flag = _context3.sent; | ||
debug('keepGoing', flag); | ||
if (!(flag !== true)) { | ||
_context3.next = 10; | ||
break; | ||
} | ||
next(true); | ||
return _context3.abrupt('return'); | ||
case 10: | ||
_this2.emit('data', { count: count, result: result, url: url }); | ||
count += 1; | ||
next(); | ||
case 13: | ||
case 'end': | ||
return _context3.stop(); | ||
} | ||
}, _callee3, _this2); | ||
})); | ||
} | ||
}, _callee3, _this2); | ||
})); | ||
return function (_x5, _x6) { | ||
return _ref4.apply(this, arguments); | ||
}; | ||
}()); | ||
}, _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee4() { | ||
return regeneratorRuntime.wrap(function _callee4$(_context4) { | ||
while (1) { | ||
switch (_context4.prev = _context4.next) { | ||
case 0: | ||
_context4.next = 2; | ||
return _this2.teardown(); | ||
return function (_x4, _x5) { | ||
return _ref4.apply(this, arguments); | ||
}; | ||
}()); | ||
}, _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee4() { | ||
return regeneratorRuntime.wrap(function _callee4$(_context4) { | ||
while (1) { | ||
switch (_context4.prev = _context4.next) { | ||
case 0: | ||
_context4.prev = 0; | ||
_context4.next = 3; | ||
return _this2.teardown(); | ||
case 2: | ||
case 3: | ||
_context4.next = 9; | ||
break; | ||
resolve(count); | ||
case 5: | ||
_context4.prev = 5; | ||
_context4.t0 = _context4['catch'](0); | ||
case 3: | ||
case 'end': | ||
return _context4.stop(); | ||
} | ||
done(_context4.t0); | ||
return _context4.abrupt('return'); | ||
case 9: | ||
done(null, count); | ||
case 10: | ||
case 'end': | ||
return _context4.stop(); | ||
} | ||
}, _callee4, _this2); | ||
}))); | ||
})); | ||
} | ||
}, _callee4, _this2, [[0, 5]]); | ||
}))); | ||
case 5: | ||
case 13: | ||
case 'end': | ||
@@ -232,6 +269,6 @@ return _context5.stop(); | ||
} | ||
}, _callee5, this); | ||
}, _callee5, this, [[1, 6]]); | ||
})); | ||
function until(_x3, _x4) { | ||
function until(_x2, _x3) { | ||
return _ref3.apply(this, arguments); | ||
@@ -245,5 +282,7 @@ } | ||
value: function () { | ||
var _ref6 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee7(source, selector) { | ||
var _ref6 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee7(args, done) { | ||
var _this3 = this; | ||
var selector, url, _args$waitForSelector, waitForSelector; | ||
return regeneratorRuntime.wrap(function _callee7$(_context7) { | ||
@@ -253,52 +292,77 @@ while (1) { | ||
case 0: | ||
_context7.next = 2; | ||
selector = args.selector, url = args.url, _args$waitForSelector = args.waitForSelector, waitForSelector = _args$waitForSelector === undefined ? 'body' : _args$waitForSelector; | ||
_context7.prev = 1; | ||
_context7.next = 4; | ||
return this.init(); | ||
case 2: | ||
case 4: | ||
_context7.next = 10; | ||
break; | ||
case 6: | ||
_context7.prev = 6; | ||
_context7.t0 = _context7['catch'](1); | ||
done(_context7.t0); | ||
return _context7.abrupt('return'); | ||
case 10: | ||
debug('started'); | ||
return _context7.abrupt('return', new Promise(function (resolve, reject) { | ||
(0, _crawl.crawl)({ | ||
concurrency: _this3.concurrency, | ||
engine: _this3.engine, | ||
selector: selector, | ||
source: source | ||
}, function () { | ||
var _ref7 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee6(err, res) { | ||
return regeneratorRuntime.wrap(function _callee6$(_context6) { | ||
while (1) { | ||
switch (_context6.prev = _context6.next) { | ||
case 0: | ||
_context6.next = 2; | ||
return _this3.teardown(); | ||
(0, _crawl.crawl)({ | ||
concurrency: this.concurrency, | ||
engine: this.engine, | ||
retry: this.retry, | ||
selector: selector, | ||
url: url, | ||
waitForSelector: waitForSelector | ||
}, function () { | ||
var _ref7 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee6(err, res) { | ||
return regeneratorRuntime.wrap(function _callee6$(_context6) { | ||
while (1) { | ||
switch (_context6.prev = _context6.next) { | ||
case 0: | ||
_context6.prev = 0; | ||
_context6.next = 3; | ||
return _this3.teardown(); | ||
case 2: | ||
if (!err) { | ||
_context6.next = 5; | ||
break; | ||
} | ||
case 3: | ||
_context6.next = 9; | ||
break; | ||
reject(err); | ||
return _context6.abrupt('return'); | ||
case 5: | ||
_context6.prev = 5; | ||
_context6.t0 = _context6['catch'](0); | ||
case 5: | ||
done(_context6.t0); | ||
return _context6.abrupt('return'); | ||
resolve(res); | ||
case 9: | ||
if (!err) { | ||
_context6.next = 12; | ||
break; | ||
} | ||
case 6: | ||
case 'end': | ||
return _context6.stop(); | ||
} | ||
done(err); | ||
return _context6.abrupt('return'); | ||
case 12: | ||
done(null, res); | ||
case 13: | ||
case 'end': | ||
return _context6.stop(); | ||
} | ||
}, _callee6, _this3); | ||
})); | ||
} | ||
}, _callee6, _this3, [[0, 5]]); | ||
})); | ||
return function (_x9, _x10) { | ||
return _ref7.apply(this, arguments); | ||
}; | ||
}()); | ||
})); | ||
return function (_x8, _x9) { | ||
return _ref7.apply(this, arguments); | ||
}; | ||
}()); | ||
case 4: | ||
case 12: | ||
case 'end': | ||
@@ -308,6 +372,6 @@ return _context7.stop(); | ||
} | ||
}, _callee7, this); | ||
}, _callee7, this, [[1, 6]]); | ||
})); | ||
function start(_x7, _x8) { | ||
function start(_x6, _x7) { | ||
return _ref6.apply(this, arguments); | ||
@@ -314,0 +378,0 @@ } |
{ | ||
"name": "oh-scrap", | ||
"version": "1.3.2", | ||
"version": "2.0.0", | ||
"description": "Node Module skeleton", | ||
@@ -5,0 +5,0 @@ "main": "lib/index.js", |
/* eslint-disable no-use-before-define */ | ||
import { mapLimit, mapValuesLimit, waterfall } from 'async'; | ||
import async from 'async'; | ||
import Debug from 'debug'; | ||
@@ -41,11 +41,11 @@ import { | ||
done(new Error('no element found')); | ||
done(); | ||
} | ||
export function handleSelectorObject(args, done) { | ||
const { selector } = args; | ||
const { concurrency, selector } = args; | ||
mapValuesLimit( | ||
async.mapValuesLimit( | ||
selector, | ||
1, | ||
concurrency, | ||
(value, key, next) => handleSelector(merge(args, { selector: value }), next), | ||
@@ -62,3 +62,3 @@ done, | ||
waterfall([ | ||
async.waterfall([ | ||
next => handleSelectorString(merge(args, { selector: sourceSelector }), next), | ||
@@ -80,3 +80,3 @@ (result, next) => { | ||
mapLimit(result, concurrency, (source, mapNext) => { | ||
async.mapLimit(result, concurrency, (source, mapNext) => { | ||
const crawlArgs = merge(args, { | ||
@@ -111,3 +111,5 @@ selector: targetSelector, | ||
export function crawl(args, done) { | ||
const { engine, source } = args; | ||
const { | ||
engine, retry, url, waitForSelector, | ||
} = args; | ||
const context = merge({}, args.context); | ||
@@ -117,11 +119,11 @@ | ||
if (isUrl(source)) { | ||
context.url = source; | ||
context.baseUrl = getBaseUrl(source); | ||
if (isUrl(url)) { | ||
context.url = url; | ||
context.baseUrl = getBaseUrl(url); | ||
debug(`crawl absolute link: ${source}`); | ||
debug(`crawl absolute link: ${url}`); | ||
link = source; | ||
} else if (isRelativeUrl(source)) { | ||
link = URL.resolve(context.baseUrl, source); | ||
link = url; | ||
} else if (isRelativeUrl(url)) { | ||
link = URL.resolve(context.baseUrl, url); | ||
@@ -133,10 +135,29 @@ context.url = link; | ||
engine.retrieveContent(link) | ||
.catch(done) | ||
.then((content) => { | ||
handleSelector(merge(args, { | ||
content, | ||
context, | ||
}), done); | ||
}); | ||
let attempt = 0; | ||
const { interval, times } = retry; | ||
async.retry({ | ||
interval, | ||
times, | ||
}, (callback) => { | ||
debug(`retrieveContent attempt ${attempt} => ${link}`); | ||
attempt += 1; | ||
engine.retrieveContent(link, waitForSelector) | ||
.then((content) => { | ||
if (!isString(content) || content.length < 100) { | ||
callback(new Error('invalid content')); | ||
return; | ||
} | ||
debug('content', content.length); | ||
handleSelector(merge(args, { | ||
content, | ||
context, | ||
}), callback); | ||
}, callback); | ||
}, (err, res) => done(null, res)); | ||
} |
@@ -20,26 +20,30 @@ import puppeteer from 'puppeteer'; | ||
async retrieveContent(url, selector = 'body') { | ||
const page = await this.browser.newPage(); | ||
async retrieveContent(url, waitForSelector = 'body') { | ||
try { | ||
const page = await this.browser.newPage(); | ||
await page.setViewport(this.viewportOptions); | ||
await page.setViewport(this.viewportOptions); | ||
await page.goto(url, this.gotoOptions); | ||
await page.goto(url, this.gotoOptions); | ||
await page.waitFor(selector); | ||
await page.waitForSelector(waitForSelector); | ||
const content = await page.evaluate((sel) => { | ||
const element = document.querySelector(sel); // eslint-disable-line no-undef | ||
const content = await page.content(); | ||
return element ? element.innerHTML : null; | ||
}, selector); | ||
await page.close(); | ||
await page.close(); | ||
return content; | ||
return content; | ||
} catch (e) { | ||
return e; | ||
} | ||
} | ||
async init() { | ||
this.browser = await puppeteer.launch(this.launchOptions); | ||
try { | ||
this.browser = await puppeteer.launch(this.launchOptions); | ||
return this.browser; | ||
return this.browser; | ||
} catch (e) { | ||
return e; | ||
} | ||
} | ||
@@ -46,0 +50,0 @@ |
122
src/index.js
@@ -15,3 +15,3 @@ import 'babel-polyfill'; | ||
class OhScrap extends EventEmitter { | ||
constructor(concurrency = os.cpus().length) { | ||
constructor(concurrency = os.cpus().length, retry) { | ||
super(); | ||
@@ -25,2 +25,6 @@ | ||
}; | ||
this.retry = retry || { | ||
interval: 1500, | ||
times: 5, | ||
}; | ||
} | ||
@@ -50,64 +54,94 @@ | ||
async until(getSource, selector, keepGoing = () => false) { | ||
async until(args, done) { | ||
const { | ||
getUrl, | ||
selector, | ||
keepGoing = () => false, | ||
waitForSelector, | ||
} = args; | ||
try { | ||
await this.init(); | ||
} catch (e) { | ||
done(e); | ||
return; | ||
} | ||
debug('until'); | ||
let count = 0; | ||
await this.init(); | ||
forever((next) => { | ||
debug(`count ${count}`); | ||
debug('started'); | ||
const url = getUrl(count); | ||
return new Promise((resolve) => { | ||
forever((next) => { | ||
const source = getSource(count); | ||
this.start({ | ||
selector, | ||
url, | ||
waitForSelector, | ||
}, async (err, result) => { | ||
if (err) { | ||
next(err); | ||
return; | ||
} | ||
crawl({ | ||
engine: this.engine, | ||
selector, | ||
source, | ||
}, async (err, result) => { | ||
if (err) { | ||
next(err); | ||
return; | ||
} | ||
const flag = await keepGoing({ count, result, url }); | ||
this.emit('data', { count, result, source }); | ||
debug('keepGoing', flag); | ||
const flag = await keepGoing({ count, result, source }); | ||
if (flag !== true) { | ||
next(true); | ||
return; | ||
} | ||
if (flag) { | ||
count += 1; | ||
next(); | ||
} else { | ||
next(count); | ||
} | ||
}); | ||
}, async () => { | ||
this.emit('data', { count, result, url }); | ||
count += 1; | ||
next(); | ||
}); | ||
}, async () => { | ||
try { | ||
await this.teardown(); | ||
} catch (e) { | ||
done(e); | ||
return; | ||
} | ||
resolve(count); | ||
}); | ||
done(null, count); | ||
}); | ||
} | ||
async start(source, selector) { | ||
await this.init(); | ||
async start(args, done) { | ||
const { selector, url, waitForSelector = 'body' } = args; | ||
try { | ||
await this.init(); | ||
} catch (e) { | ||
done(e); | ||
return; | ||
} | ||
debug('started'); | ||
return new Promise((resolve, reject) => { | ||
crawl({ | ||
concurrency: this.concurrency, | ||
engine: this.engine, | ||
selector, | ||
source, | ||
}, async (err, res) => { | ||
crawl({ | ||
concurrency: this.concurrency, | ||
engine: this.engine, | ||
retry: this.retry, | ||
selector, | ||
url, | ||
waitForSelector, | ||
}, async (err, res) => { | ||
try { | ||
await this.teardown(); | ||
} catch (e) { | ||
done(e); | ||
return; | ||
} | ||
if (err) { | ||
reject(err); | ||
return; | ||
} | ||
if (err) { | ||
done(err); | ||
return; | ||
} | ||
resolve(res); | ||
}); | ||
done(null, res); | ||
}); | ||
@@ -114,0 +148,0 @@ } |
@@ -37,2 +37,3 @@ import os from 'os'; | ||
<ul>no items</ul> | ||
<p>something else to reach 100 characters</p> | ||
</body> | ||
@@ -93,6 +94,7 @@ `; | ||
it('should return a string result', async () => { | ||
const result = await ohscrap.start(PAGE_1_URL, selector); | ||
expect(result).to.equal('TITLE PAGE 1'); | ||
it('should return a string result', (done) => { | ||
ohscrap.start({ url: PAGE_1_URL, selector }, (err, result) => { | ||
expect(result).to.equal('TITLE PAGE 1'); | ||
done(); | ||
}); | ||
}); | ||
@@ -108,11 +110,12 @@ }); | ||
it('should return the same object structure populated with results', async () => { | ||
const result = await ohscrap.start(PAGE_1_URL, selector); | ||
expect(result).to.deep.equal({ | ||
title: 'TITLE PAGE 1', | ||
items: [ | ||
'test1', | ||
'test2', | ||
], | ||
it('should return the same object structure populated with results', (done) => { | ||
ohscrap.start({ url: PAGE_1_URL, selector }, (err, result) => { | ||
expect(result).to.deep.equal({ | ||
title: 'TITLE PAGE 1', | ||
items: [ | ||
'test1', | ||
'test2', | ||
], | ||
}); | ||
done(); | ||
}); | ||
@@ -128,11 +131,12 @@ }); | ||
it('should return the same object structure populated with results', async () => { | ||
const result = await ohscrap.start(PAGE_1_URL, selector); | ||
expect(result).to.deep.equal({ | ||
title: 'TITLE PAGE 1', | ||
items: [ | ||
'item1', | ||
'item2', | ||
], | ||
it('should return the same object structure populated with results', (done) => { | ||
ohscrap.start({ url: PAGE_1_URL, selector }, (err, result) => { | ||
expect(result).to.deep.equal({ | ||
title: 'TITLE PAGE 1', | ||
items: [ | ||
'item1', | ||
'item2', | ||
], | ||
}); | ||
done(); | ||
}); | ||
@@ -142,3 +146,3 @@ }); | ||
describe('and it does contain deep links', () => { | ||
describe.skip('and it does contain deep links', () => { | ||
const selector = { | ||
@@ -155,4 +159,7 @@ title: 'h1', | ||
beforeEach(async () => { | ||
result = await ohscrap.start(PAGE_1_URL, selector); | ||
beforeEach((done) => { | ||
ohscrap.start({ url: PAGE_1_URL, selector }, (err, data) => { | ||
result = data; | ||
done(); | ||
}); | ||
}); | ||
@@ -191,14 +198,13 @@ | ||
}; | ||
const getUrl = count => `http://page${count + 1}.com/`; | ||
const keepGoing = ({ result }) => { | ||
const flag = isArray(result.items) && result.items.length > 0; | ||
return Promise.resolve(flag); | ||
}; | ||
let emitStub; | ||
let totalCount; | ||
let emitStub; | ||
beforeEach(async () => { | ||
const getSource = count => `http://page${count + 1}.com/`; | ||
const keepGoing = ({ result }) => { | ||
const flag = isArray(result.items) && result.items.length > 0; | ||
return Promise.resolve(flag); | ||
}; | ||
beforeEach((done) => { | ||
emitStub = sandbox.stub(); | ||
@@ -208,3 +214,6 @@ | ||
totalCount = await ohscrap.until(getSource, selector, keepGoing); | ||
ohscrap.until({ getUrl, selector, keepGoing }, (err, res) => { | ||
totalCount = res; | ||
done(); | ||
}); | ||
}); | ||
@@ -211,0 +220,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
170981
1159