New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

cloudflare-scraper

Package Overview
Dependencies
Maintainers
1
Versions
16
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

cloudflare-scraper - npm Package Compare versions

Comparing version 1.0.8 to 2.0.0

src/Browser.js

49

index.js

@@ -1,47 +0,2 @@

const request = require('request-promise-native');
const { isProtectedByStormwall, getStormwallCookie } = require('stormwall-bypass');
const { getUserAgent } = require('./src/utils');
const fillCookiesJar = require('./src/fillCookiesJar');
const { isCloudflareJSChallenge, isCloudflareCaptchaChallenge } = require('./src/utils');
function isCloudflareIUAMError(error) {
if (error.response) {
const { body } = error.response;
return isCloudflareJSChallenge(body) || isCloudflareCaptchaChallenge(body);
}
return false;
}
async function handleError(error) {
if (isCloudflareIUAMError(error)) {
const { options } = error;
await fillCookiesJar(request, options);
return request(options);
}
throw error;
}
function handleResponse(response, options) {
const { jar, url, uri } = options;
const targetUrl = uri || url;
const body = response.body || response;
if (isProtectedByStormwall(body)) {
const cookie = getStormwallCookie(body);
jar.setCookie(cookie, targetUrl);
return request(options);
}
return response;
}
async function cloudflareScraper(options) {
const response = await request({ ...options }).catch(handleError);
return handleResponse(response, options);
}
const defaultParams = {
jar: request.jar(),
headers: { 'User-Agent': getUserAgent() },
gzip: true
};
module.exports = request.defaults(defaultParams, cloudflareScraper);
import scraper from './src/lib.js';
export default scraper;
The MIT License (MIT)
=====================
Copyright © `2020` `Jimmy Laurent`
Copyright © `2023` `Jimmy Laurent`

@@ -6,0 +6,0 @@ Permission is hereby granted, free of charge, to any person

{
"name": "cloudflare-scraper",
"version": "1.0.8",
"version": "2.0.0",
"description": "A package to bypass Cloudflare's protection",

@@ -11,2 +11,3 @@ "author": "Jimmy Laurent",

"main": "index.js",
"type": "module",
"scripts": {},

@@ -17,3 +18,3 @@ "keywords": [

"bypass",
"puppeteer",
"chrome",
"request",

@@ -24,12 +25,9 @@ "anti-bot"

"dependencies": {
"hcaptcha-solver": "^1.0.1",
"puppeteer-extra": "^3.1.18",
"puppeteer-extra-plugin-stealth": "^2.7.8",
"request": "^2.88.2",
"request-promise-native": "^1.0.8",
"stormwall-bypass": "^1.0.1"
"chrome-launcher": "^0.15.1",
"chrome-remote-interface": "^0.31.3",
"chromium": "^3.0.3",
"got": "^12.5.3",
"tough-cookie": "^4.1.2",
"xvfb": "^0.4.0"
},
"devDependencies": {
"puppeteer": "^5.2.1"
},
"prettier": {

@@ -36,0 +34,0 @@ "printWidth": 100,

# cloudflare-scraper
Puppeteer (chromium headless) is used to retrieve cloudflare cookies then request module is used to perform requests making this solution reliable but also pretty fast.
Chrome is used to retrieve cloudflare cookies then **got** is used to perform requests making this solution reliable but also pretty fast.
> Version 2 is a complete rewrite:
> - it doesn't use puppeteer but vanilla chromium,
> - **request** package was replaced by **got** ,
> - headless support only works on **linux** out of the box but should be doable on windows or mac os with the help of docker or wsl.
> - extra features were removed (captcha bypass, etc..)
## Install
```bash
npm install cloudflare-scraper puppeteer
npm install cloudflare-scraper
```
## Extra Features
Make sure you alse have **xfvb** linux package installed
- **hCaptcha bypass**
```bash
# for ubuntu users
sudo apt-get install xvfb
```
- **stormwall bypass**
## Quick Example
```js
const cloudflareScraper = require('cloudflare-scraper');
import got from 'cloudflare-scraper';
(async () => {
try {
const response = await cloudflareScraper.get('https://cloudflare-url.com');
console.log(response);
const response = await got.get('https://nowsecure.nl');
console.log(response.body);
} catch (error) {

@@ -34,6 +41,35 @@ console.log(error);

TODO (same api as request package)
Check **got** [documenatation](https://github.com/sindresorhus/got#documentation)
## TODO list
## Env variables
- documentation
### NODE_CHROMIUM_SKIP_INSTALL (boolean)
By default, chromium is downloaded but on `npm install` command but you can skip the installation by enabling this variable.
```bash
export NODE_CHROMIUM_SKIP_INSTALL=true
```
### CHROME_EXECUTABLE_PATH (string)
Specify a chrome executable
```bash
export CHROME_EXECUTABLE_PATH=/path/to/chrome
```
### CF_SCRAPER_HEADLESS (boolean)
Enable/disable headless mode (enabled by default)
Note: headless mode uses "xfvb" and is only available on linux
```bash
export CF_SCRAPER_HEADLESS=false
```
## TODO:
- add proxy support
- docker example

@@ -1,37 +0,20 @@

const USER_AGENT_MAC =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36';
const USER_AGENT_WINDOWS =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36';
const USER_AGENT_LINUX =
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36';
import { Cookie } from 'tough-cookie';
function getUserAgent() {
const { platform } = process;
if (platform === 'darwin') {
return USER_AGENT_MAC;
}
if (platform === 'win32') {
return USER_AGENT_WINDOWS;
}
return USER_AGENT_LINUX;
}
function convertCookieToTough(cookie) {
const { name, value, expires, domain, path } = cookie;
const isExpiresValid = expires && typeof expires === 'number';
function extract(string, regexp, errorMessage) {
const match = string.match(regexp);
if (match) {
return match[1];
}
if (errorMessage) {
throw new Error(errorMessage);
}
}
const expiresDate = isExpiresValid
? new Date(expires * 1000)
: new Date(Date.now() + DEFAULT_EXPIRATION_TIME_IN_SECONDS * 1000);
function isCloudflareJSChallenge(body) {
return body.includes('managed_checking_msg');
return new Cookie({
key: name,
value,
expires: expiresDate,
domain: domain.startsWith('.') ? domain.substring(1) : domain,
path
});
}
function isCloudflareCaptchaChallenge(body) {
return body.includes('cf_captcha_kind');
}
module.exports = { extract, isCloudflareJSChallenge, isCloudflareCaptchaChallenge, getUserAgent };
export { convertCookieToTough };
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc