cloudflare-scraper
Advanced tools
Comparing version 1.0.8 to 2.0.0
49
index.js
@@ -1,47 +0,2 @@ | ||
const request = require('request-promise-native'); | ||
const { isProtectedByStormwall, getStormwallCookie } = require('stormwall-bypass'); | ||
const { getUserAgent } = require('./src/utils'); | ||
const fillCookiesJar = require('./src/fillCookiesJar'); | ||
const { isCloudflareJSChallenge, isCloudflareCaptchaChallenge } = require('./src/utils'); | ||
function isCloudflareIUAMError(error) { | ||
if (error.response) { | ||
const { body } = error.response; | ||
return isCloudflareJSChallenge(body) || isCloudflareCaptchaChallenge(body); | ||
} | ||
return false; | ||
} | ||
async function handleError(error) { | ||
if (isCloudflareIUAMError(error)) { | ||
const { options } = error; | ||
await fillCookiesJar(request, options); | ||
return request(options); | ||
} | ||
throw error; | ||
} | ||
function handleResponse(response, options) { | ||
const { jar, url, uri } = options; | ||
const targetUrl = uri || url; | ||
const body = response.body || response; | ||
if (isProtectedByStormwall(body)) { | ||
const cookie = getStormwallCookie(body); | ||
jar.setCookie(cookie, targetUrl); | ||
return request(options); | ||
} | ||
return response; | ||
} | ||
async function cloudflareScraper(options) { | ||
const response = await request({ ...options }).catch(handleError); | ||
return handleResponse(response, options); | ||
} | ||
const defaultParams = { | ||
jar: request.jar(), | ||
headers: { 'User-Agent': getUserAgent() }, | ||
gzip: true | ||
}; | ||
module.exports = request.defaults(defaultParams, cloudflareScraper); | ||
import scraper from './src/lib.js'; | ||
export default scraper; |
The MIT License (MIT) | ||
===================== | ||
Copyright © `2020` `Jimmy Laurent` | ||
Copyright © `2023` `Jimmy Laurent` | ||
@@ -6,0 +6,0 @@ Permission is hereby granted, free of charge, to any person |
{ | ||
"name": "cloudflare-scraper", | ||
"version": "1.0.8", | ||
"version": "2.0.0", | ||
"description": "A package to bypass Cloudflare's protection", | ||
@@ -11,2 +11,3 @@ "author": "Jimmy Laurent", | ||
"main": "index.js", | ||
"type": "module", | ||
"scripts": {}, | ||
@@ -17,3 +18,3 @@ "keywords": [ | ||
"bypass", | ||
"puppeteer", | ||
"chrome", | ||
"request", | ||
@@ -24,12 +25,9 @@ "anti-bot" | ||
"dependencies": { | ||
"hcaptcha-solver": "^1.0.1", | ||
"puppeteer-extra": "^3.1.18", | ||
"puppeteer-extra-plugin-stealth": "^2.7.8", | ||
"request": "^2.88.2", | ||
"request-promise-native": "^1.0.8", | ||
"stormwall-bypass": "^1.0.1" | ||
"chrome-launcher": "^0.15.1", | ||
"chrome-remote-interface": "^0.31.3", | ||
"chromium": "^3.0.3", | ||
"got": "^12.5.3", | ||
"tough-cookie": "^4.1.2", | ||
"xvfb": "^0.4.0" | ||
}, | ||
"devDependencies": { | ||
"puppeteer": "^5.2.1" | ||
}, | ||
"prettier": { | ||
@@ -36,0 +34,0 @@ "printWidth": 100, |
# cloudflare-scraper | ||
Puppeteer (chromium headless) is used to retrieve cloudflare cookies then request module is used to perform requests making this solution reliable but also pretty fast. | ||
Chrome is used to retrieve cloudflare cookies then **got** is used to perform requests making this solution reliable but also pretty fast. | ||
> Version 2 is a complete rewrite: | ||
> - it doesn't use puppeteer but vanilla chromium, | ||
> - **request** package was replaced by **got** , | ||
> - headless support only works on **linux** out of the box but should be doable on windows or mac os with the help of docker or wsl. | ||
> - extra features were removed (captcha bypass, etc..) | ||
## Install | ||
```bash | ||
npm install cloudflare-scraper puppeteer | ||
npm install cloudflare-scraper | ||
``` | ||
## Extra Features | ||
Make sure you alse have **xfvb** linux package installed | ||
- **hCaptcha bypass** | ||
```bash | ||
# for ubuntu users | ||
sudo apt-get install xvfb | ||
``` | ||
- **stormwall bypass** | ||
## Quick Example | ||
```js | ||
const cloudflareScraper = require('cloudflare-scraper'); | ||
import got from 'cloudflare-scraper'; | ||
(async () => { | ||
try { | ||
const response = await cloudflareScraper.get('https://cloudflare-url.com'); | ||
console.log(response); | ||
const response = await got.get('https://nowsecure.nl'); | ||
console.log(response.body); | ||
} catch (error) { | ||
@@ -34,6 +41,35 @@ console.log(error); | ||
TODO (same api as request package) | ||
Check **got** [documenatation](https://github.com/sindresorhus/got#documentation) | ||
## TODO list | ||
## Env variables | ||
- documentation | ||
### NODE_CHROMIUM_SKIP_INSTALL (boolean) | ||
By default, chromium is downloaded but on `npm install` command but you can skip the installation by enabling this variable. | ||
```bash | ||
export NODE_CHROMIUM_SKIP_INSTALL=true | ||
``` | ||
### CHROME_EXECUTABLE_PATH (string) | ||
Specify a chrome executable | ||
```bash | ||
export CHROME_EXECUTABLE_PATH=/path/to/chrome | ||
``` | ||
### CF_SCRAPER_HEADLESS (boolean) | ||
Enable/disable headless mode (enabled by default) | ||
Note: headless mode uses "xfvb" and is only available on linux | ||
```bash | ||
export CF_SCRAPER_HEADLESS=false | ||
``` | ||
## TODO: | ||
- add proxy support | ||
- docker example |
@@ -1,37 +0,20 @@ | ||
const USER_AGENT_MAC = | ||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'; | ||
const USER_AGENT_WINDOWS = | ||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'; | ||
const USER_AGENT_LINUX = | ||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'; | ||
import { Cookie } from 'tough-cookie'; | ||
function getUserAgent() { | ||
const { platform } = process; | ||
if (platform === 'darwin') { | ||
return USER_AGENT_MAC; | ||
} | ||
if (platform === 'win32') { | ||
return USER_AGENT_WINDOWS; | ||
} | ||
return USER_AGENT_LINUX; | ||
} | ||
function convertCookieToTough(cookie) { | ||
const { name, value, expires, domain, path } = cookie; | ||
const isExpiresValid = expires && typeof expires === 'number'; | ||
function extract(string, regexp, errorMessage) { | ||
const match = string.match(regexp); | ||
if (match) { | ||
return match[1]; | ||
} | ||
if (errorMessage) { | ||
throw new Error(errorMessage); | ||
} | ||
} | ||
const expiresDate = isExpiresValid | ||
? new Date(expires * 1000) | ||
: new Date(Date.now() + DEFAULT_EXPIRATION_TIME_IN_SECONDS * 1000); | ||
function isCloudflareJSChallenge(body) { | ||
return body.includes('managed_checking_msg'); | ||
return new Cookie({ | ||
key: name, | ||
value, | ||
expires: expiresDate, | ||
domain: domain.startsWith('.') ? domain.substring(1) : domain, | ||
path | ||
}); | ||
} | ||
function isCloudflareCaptchaChallenge(body) { | ||
return body.includes('cf_captcha_kind'); | ||
} | ||
module.exports = { extract, isCloudflareJSChallenge, isCloudflareCaptchaChallenge, getUserAgent }; | ||
export { convertCookieToTough }; |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
0
74
Yes
9456
7
209
2
1
+ Addedchrome-launcher@^0.15.1
+ Addedchromium@^3.0.3
+ Addedgot@^12.5.3
+ Addedtough-cookie@^4.1.2
+ Addedxvfb@^0.4.0
+ Added@sindresorhus/is@5.6.0(transitive)
+ Added@szmarczak/http-timer@5.0.1(transitive)
+ Added@types/http-cache-semantics@4.0.4(transitive)
+ Added@types/node@22.13.1(transitive)
+ Addedcacheable-lookup@7.0.0(transitive)
+ Addedcacheable-request@10.2.14(transitive)
+ Addedchrome-launcher@0.15.2(transitive)
+ Addedchrome-remote-interface@0.31.3(transitive)
+ Addedchromium@3.2171.3008(transitive)
+ Addedcommander@2.11.0(transitive)
+ Addeddebug@2.6.9(transitive)
+ Addeddecompress-response@6.0.0(transitive)
+ Addeddefer-to-connect@2.0.1(transitive)
+ Addedescape-string-regexp@4.0.0(transitive)
+ Addedform-data-encoder@2.1.4(transitive)
+ Addedget-stream@6.0.1(transitive)
+ Addedgot@12.6.1(transitive)
+ Addedhttp-cache-semantics@4.1.1(transitive)
+ Addedhttp2-wrapper@2.2.1(transitive)
+ Addedis-docker@2.2.1(transitive)
+ Addedis-wsl@2.2.0(transitive)
+ Addedjson-buffer@3.0.1(transitive)
+ Addedkeyv@4.5.4(transitive)
+ Addedlighthouse-logger@1.4.2(transitive)
+ Addedlowercase-keys@3.0.0(transitive)
+ Addedmarky@1.2.5(transitive)
+ Addedmimic-response@3.1.04.0.0(transitive)
+ Addedms@2.0.0(transitive)
+ Addednan@2.22.0(transitive)
+ Addednormalize-url@8.0.1(transitive)
+ Addedp-cancelable@3.0.0(transitive)
+ Addedquerystringify@2.2.0(transitive)
+ Addedquick-lru@5.1.1(transitive)
+ Addedrequires-port@1.0.0(transitive)
+ Addedresolve-alpn@1.2.1(transitive)
+ Addedresponselike@3.0.0(transitive)
+ Addedsleep@6.1.0(transitive)
+ Addedtough-cookie@4.1.4(transitive)
+ Addedundici-types@6.20.0(transitive)
+ Addeduniversalify@0.2.0(transitive)
+ Addedurl-parse@1.5.10(transitive)
+ Addedws@7.5.10(transitive)
+ Addedxvfb@0.4.0(transitive)
- Removedhcaptcha-solver@^1.0.1
- Removedpuppeteer-extra@^3.1.18
- Removedrequest@^2.88.2
- Removedrequest-promise-native@^1.0.8
- Removedstormwall-bypass@^1.0.1
- Removed@types/debug@4.1.12(transitive)
- Removed@types/ms@2.1.0(transitive)
- Removedajv@6.12.6(transitive)
- Removedarr-union@3.1.0(transitive)
- Removedasn1@0.2.6(transitive)
- Removedassert-plus@1.0.0(transitive)
- Removedasynckit@0.4.0(transitive)
- Removedaws-sign2@0.7.0(transitive)
- Removedaws4@1.13.2(transitive)
- Removedbalanced-match@1.0.2(transitive)
- Removedbcrypt-pbkdf@1.0.2(transitive)
- Removedbrace-expansion@1.1.11(transitive)
- Removedcaseless@0.12.0(transitive)
- Removedclone-deep@0.2.4(transitive)
- Removedcombined-stream@1.0.8(transitive)
- Removedconcat-map@0.0.1(transitive)
- Removedcore-util-is@1.0.2(transitive)
- Removeddashdash@1.14.1(transitive)
- Removeddebug@4.4.0(transitive)
- Removeddeepmerge@4.3.1(transitive)
- Removeddelayed-stream@1.0.0(transitive)
- Removedecc-jsbn@0.1.2(transitive)
- Removedextend@3.0.2(transitive)
- Removedextsprintf@1.3.0(transitive)
- Removedfast-deep-equal@3.1.3(transitive)
- Removedfast-json-stable-stringify@2.1.0(transitive)
- Removedfor-in@0.1.81.0.2(transitive)
- Removedfor-own@0.1.5(transitive)
- Removedforever-agent@0.6.1(transitive)
- Removedform-data@2.3.3(transitive)
- Removedfs-extra@10.1.0(transitive)
- Removedfs.realpath@1.0.0(transitive)
- Removedgetpass@0.1.7(transitive)
- Removedglob@7.2.3(transitive)
- Removedgraceful-fs@4.2.11(transitive)
- Removedhar-schema@2.0.0(transitive)
- Removedhar-validator@5.1.5(transitive)
- Removedhcaptcha-solver@1.0.2(transitive)
- Removedhttp-signature@1.2.0(transitive)
- Removedinflight@1.0.6(transitive)
- Removedinherits@2.0.4(transitive)
- Removedis-buffer@1.1.6(transitive)
- Removedis-extendable@0.1.1(transitive)
- Removedis-plain-object@2.0.4(transitive)
- Removedis-typedarray@1.0.0(transitive)
- Removedisobject@3.0.1(transitive)
- Removedisstream@0.1.2(transitive)
- Removedjsbn@0.1.1(transitive)
- Removedjson-schema@0.4.0(transitive)
- Removedjson-schema-traverse@0.4.1(transitive)
- Removedjson-stringify-safe@5.0.1(transitive)
- Removedjsonfile@6.1.0(transitive)
- Removedjsprim@1.4.2(transitive)
- Removedkind-of@2.0.13.2.2(transitive)
- Removedlazy-cache@0.2.71.0.4(transitive)
- Removedlodash@4.17.21(transitive)
- Removedmerge-deep@3.0.3(transitive)
- Removedmime-db@1.52.0(transitive)
- Removedmime-types@2.1.35(transitive)
- Removedminimatch@3.1.2(transitive)
- Removedmixin-object@2.0.1(transitive)
- Removedms@2.1.3(transitive)
- Removedoauth-sign@0.9.0(transitive)
- Removedonce@1.4.0(transitive)
- Removedpath-is-absolute@1.0.1(transitive)
- Removedperformance-now@2.1.0(transitive)
- Removedpuppeteer-extra@3.3.6(transitive)
- Removedpuppeteer-extra-plugin@3.2.3(transitive)
- Removedpuppeteer-extra-plugin-stealth@2.11.2(transitive)
- Removedpuppeteer-extra-plugin-user-data-dir@2.4.1(transitive)
- Removedpuppeteer-extra-plugin-user-preferences@2.4.1(transitive)
- Removedqs@6.5.3(transitive)
- Removedrequest@2.88.2(transitive)
- Removedrequest-promise-core@1.1.4(transitive)
- Removedrequest-promise-native@1.0.9(transitive)
- Removedrimraf@3.0.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsafer-buffer@2.1.2(transitive)
- Removedshallow-clone@0.1.2(transitive)
- Removedsshpk@1.18.0(transitive)
- Removedstealthy-require@1.1.1(transitive)
- Removedstormwall-bypass@1.0.1(transitive)
- Removedtough-cookie@2.5.0(transitive)
- Removedtunnel-agent@0.6.0(transitive)
- Removedtweetnacl@0.14.5(transitive)
- Removeduniversalify@2.0.1(transitive)
- Removeduri-js@4.4.1(transitive)
- Removeduuid@3.4.0(transitive)
- Removedverror@1.10.0(transitive)
- Removedwrappy@1.0.2(transitive)