scraper-http-client-request
Advanced tools
Comparing version 1.0.1-beta.5 to 1.0.1
@@ -6,3 +6,3 @@ const STAGES_CONSTANTS = { | ||
PROXIED_CLOUDFLARE: 3, | ||
}; | ||
} | ||
const STAGE_DESCRIPTION = { | ||
@@ -13,3 +13,3 @@ [STAGES_CONSTANTS.RAW]: 'direct connection', | ||
[STAGES_CONSTANTS.PROXIED_CLOUDFLARE]: 'proxied cloudflare bypasser', | ||
}; | ||
} | ||
const RETRY_OPTIONS = { | ||
@@ -19,6 +19,6 @@ retries: 10, | ||
minTimeout: 0, | ||
}; | ||
} | ||
module.exports = { | ||
STAGES_CONSTANTS, STAGE_DESCRIPTION, RETRY_OPTIONS | ||
STAGES_CONSTANTS, STAGE_DESCRIPTION, RETRY_OPTIONS, | ||
} |
class EntityError extends Error { | ||
constructor(message, retryable = true) { | ||
super(message); | ||
this.name = 'EntityError'; | ||
this.retryable = retryable; | ||
super(message) | ||
this.name = 'EntityError' | ||
this.retryable = retryable | ||
} | ||
} | ||
module.exports = EntityError; | ||
module.exports = EntityError |
class NotRetryableError extends Error { | ||
constructor(message) { | ||
super(message); | ||
this.name = 'NotRetryableError'; | ||
super(message) | ||
this.name = 'NotRetryableError' | ||
} | ||
} | ||
module.exports = NotRetryableError; | ||
module.exports = NotRetryableError |
class ResponseError extends Error { | ||
constructor(message, response) { | ||
super(message); | ||
this.response = response; | ||
super(message) | ||
this.response = response | ||
} | ||
@@ -6,0 +6,0 @@ } |
124
index.js
@@ -6,2 +6,3 @@ const merge = require('deepmerge') | ||
const { ResponseError, NotRetryableError } = require('./errors') | ||
const { STAGE_DESCRIPTION, STAGES_CONSTANTS, RETRY_OPTIONS } = require('./constants') | ||
@@ -30,78 +31,87 @@ const DEFAULT_OPTIONS = { | ||
}, | ||
}; | ||
const { STAGE_DESCRIPTION, STAGES_CONSTANTS, RETRY_OPTIONS } = require('./constants') | ||
} | ||
const clientTypes = [ | ||
options => request.defaults(options), | ||
(options) => { | ||
const cf = new CloudflareBypasser(options); | ||
const cf = new CloudflareBypasser(options) | ||
return async (...args) => { | ||
const result = await cf.request(...args); | ||
if ((result.statusCode !== 200) && ((result.statusCode !== 301))) | ||
throw new ResponseError('Wrong status code', result) | ||
const result = await cf.request(...args) | ||
if ((result.statusCode !== 200) && ((result.statusCode !== 301))) throw new ResponseError('Wrong status code', result) | ||
return result | ||
} | ||
}, | ||
]; | ||
] | ||
function getClients(proxyWrapper, jar) { | ||
const options = merge({}, DEFAULT_OPTIONS); | ||
options.jar = jar; | ||
const clients = clientTypes.map(clientType => clientType(options)); | ||
for (let i = 0; i < clientTypes.length; i++) clients.push(proxyWrapper(clients[i])); | ||
return clients; | ||
const options = merge({}, DEFAULT_OPTIONS) | ||
options.jar = jar | ||
const clients = clientTypes.map(clientType => clientType(options)) | ||
for (let i = 0; i < clientTypes.length; i++) clients.push(proxyWrapper(clients[i])) | ||
return clients | ||
} | ||
const clientConstructor = (limiter, promiseRetryOptions = null) => { | ||
const jar = request.jar(); | ||
let proxy; | ||
let additionalOptions; | ||
let initialStage = 0; | ||
let shouldUseProxyNextTime = false; | ||
const getClient = (limiter, promiseRetryOptions = null) => { | ||
const jar = request.jar() | ||
let proxy | ||
let additionalOptions | ||
let initialStage = 0 | ||
let shouldUseProxyNextTime = false | ||
const proxyWrapper = client => (options) => { | ||
if (!proxy) throw new NotRetryableError('Proxy is not set'); | ||
if (!proxy) throw new NotRetryableError('Proxy is not set') | ||
const optionsCopy = (typeof options === 'string') | ||
? { url: options, proxy } : Object.assign({ proxy }, options); | ||
return client(optionsCopy); | ||
}; | ||
const clients = getClients(proxyWrapper, jar); | ||
const promiseRetry = limiter.wrap(promiseRetryOrigin); | ||
? { url: options, proxy } : ({ proxy, ...options }) | ||
return client(optionsCopy) | ||
} | ||
const clients = getClients(proxyWrapper, jar) | ||
const promiseRetry = limiter.wrap(promiseRetryOrigin) | ||
const requestWrapper = (options, checkResult) => { | ||
if (additionalOptions) { | ||
options = (typeof options === 'string') | ||
? merge({ url: options }, additionalOptions) : merge(options, additionalOptions); | ||
} | ||
const priority = options.priority || 5; | ||
delete options.priority; | ||
let currentStage = shouldUseProxyNextTime ? STAGES_CONSTANTS.PROXIED_RAW : initialStage; | ||
let currentRetry = 0; | ||
if (additionalOptions) options = (typeof options === 'string') | ||
? merge({ url: options }, additionalOptions) : merge(options, additionalOptions) | ||
const priority = options.priority || 5 | ||
delete options.priority | ||
let currentStage = shouldUseProxyNextTime ? STAGES_CONSTANTS.PROXIED_RAW : initialStage | ||
let currentRetry = 0 | ||
return promiseRetry.withOptions({ priority }, (retry) => { | ||
requestWrapper.usedProxy = (currentStage >= 2); | ||
requestWrapper.headers = Object.assign({}, DEFAULT_OPTIONS.headers, options.headers); | ||
//log.info(`fetching ${options.url || options} via ${STAGE_DESCRIPTION[currentStage]}, priority ${priority}, try #${currentRetry}`); | ||
requestWrapper.usedProxy = (currentStage >= 2) | ||
requestWrapper.headers = { ...DEFAULT_OPTIONS.headers, ...options.headers } | ||
return clients[currentStage](options) | ||
.then((result) => { | ||
if (!checkResult || checkResult(result)) { | ||
shouldUseProxyNextTime = false; | ||
return result.body; | ||
shouldUseProxyNextTime = false | ||
return result.body | ||
} | ||
throw new Error('result did not pass check'); | ||
throw new Error('result did not pass check') | ||
}) | ||
.catch((e) => { | ||
if (e instanceof NotRetryableError) throw e; | ||
const code = (e && e.response && e.response.statusCode) || 1000; | ||
const rawMessage = e.message || e.toString(); | ||
const errorMessage = rawMessage.length < 50 ? rawMessage : `server responded with code ${code}, try #${currentRetry}`; | ||
currentRetry++; | ||
if (e instanceof NotRetryableError) throw e | ||
const code = (e && e.response && e.response.statusCode) || 1000 | ||
const rawMessage = e.message || e.toString() | ||
const errorMessage = rawMessage.length < 50 ? rawMessage : `server responded with code ${code}, try #${currentRetry}` | ||
console.log(errorMessage) | ||
currentRetry++ | ||
if (currentRetry > 2) { | ||
currentStage += (code !== 503) && ((currentStage === STAGES_CONSTANTS.RAW) | ||
|| (currentStage === STAGES_CONSTANTS.RAW)) ? 2 : 1; | ||
currentRetry = 0; | ||
|| (currentStage === STAGES_CONSTANTS.RAW)) ? 2 : 1 | ||
currentRetry = 0 | ||
} | ||
if (currentStage > (clients.length - 1)) throw e; | ||
//log.error(`retrying ${options.url || options} because ${errorMessage}`); | ||
retry(e); | ||
}); | ||
}, promiseRetryOptions ? promiseRetryOptions : RETRY_OPTIONS); | ||
}; | ||
if (currentStage > (clients.length - 1)) throw e | ||
// log.error(`retrying ${options.url || options} because ${errorMessage}`); | ||
retry(e) | ||
}) | ||
}, promiseRetryOptions || RETRY_OPTIONS) | ||
} | ||
requestWrapper.jar = jar | ||
@@ -123,6 +133,8 @@ requestWrapper.addOptions = (options) => { | ||
} | ||
return requestWrapper; | ||
}; | ||
clientConstructor.stages = STAGES_CONSTANTS; | ||
module.exports = clientConstructor; | ||
return requestWrapper | ||
} | ||
getClient.stages = STAGES_CONSTANTS | ||
getClient.stages.descriptions = STAGE_DESCRIPTION | ||
module.exports = getClient |
{ | ||
"name": "scraper-http-client-request", | ||
"version": "1.0.1-beta.5", | ||
"description": "Powerful HTTP client for scrapers with cookies jar, cloudflare bypasser, rate limiter, concurrency and retrys", | ||
"version": "1.0.1", | ||
"description": "HTTP client for scrapers with guaranted delivery (retry), concurrency, cookies jar, zlib, Cloudflare bypasser, rate limit (throttle)", | ||
"main": "index.js", | ||
@@ -32,3 +32,10 @@ "scripts": { | ||
"request-promise-native": "^1.0.9" | ||
}, | ||
"devDependencies": { | ||
"eslint": "^7.6.0", | ||
"eslint-config-airbnb": "^18.2.0", | ||
"eslint-config-airbnb-base": "^14.2.0", | ||
"eslint-plugin-import": "^2.22.0", | ||
"eslint-plugin-jest": "^23.20.0" | ||
} | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
14098
11
1
5
164