Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

python-proxy-headers

Package Overview
Dependencies
Maintainers
1
Versions
4
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

python-proxy-headers - pypi Package Compare versions

Comparing version
0.1.0
to
0.2.0
+344
python_proxy_headers/autoscraper_proxy.py
"""
AutoScraper extension for sending and receiving proxy headers.
This module provides an AutoScraper subclass that enables:
1. Sending custom headers to proxy servers during CONNECT
2. Using our ProxySession for all HTTP requests
Example usage:
from python_proxy_headers.autoscraper_proxy import ProxyAutoScraper
scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
# Build with proxy
result = scraper.build(
url='https://example.com',
wanted_list=['Example Domain'],
request_args={'proxies': {'https': 'http://proxy:8080'}}
)
# Get results with proxy
result = scraper.get_result_similar(
url='https://other-example.com',
request_args={'proxies': {'https': 'http://proxy:8080'}}
)
"""
from typing import Dict, List, Optional, Any
from urllib.parse import urlparse
try:
from autoscraper import AutoScraper
except ImportError:
raise ImportError(
"autoscraper is required for this module. "
"Install it with: pip install autoscraper"
)
from .requests_adapter import ProxySession
class ProxyAutoScraper(AutoScraper):
"""
AutoScraper with proxy header support.
This class extends AutoScraper to use our ProxySession for HTTP requests,
enabling custom proxy headers to be sent during CONNECT tunneling.
Args:
proxy_headers: Dict of headers to send to proxy servers
stack_list: Initial stack list (rules) for the scraper
Example:
scraper = ProxyAutoScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
result = scraper.build(
url='https://finance.yahoo.com/quote/AAPL/',
wanted_list=['Apple Inc.'],
request_args={'proxies': {'https': 'http://proxy:8080'}}
)
# Use the learned rules on another page
result = scraper.get_result_similar(
url='https://finance.yahoo.com/quote/GOOG/',
request_args={'proxies': {'https': 'http://proxy:8080'}}
)
"""
def __init__(
self,
proxy_headers: Optional[Dict[str, str]] = None,
stack_list: Optional[List] = None
):
super().__init__(stack_list=stack_list)
self._proxy_headers = proxy_headers or {}
self._session: Optional[ProxySession] = None
def _get_session(self) -> ProxySession:
"""Get or create the ProxySession."""
if self._session is None:
self._session = ProxySession(proxy_headers=self._proxy_headers)
return self._session
def set_proxy_headers(self, proxy_headers: Dict[str, str]):
"""
Update the proxy headers.
This will close the current session and create a new one with
the updated headers on the next request.
Args:
proxy_headers: New proxy headers to use
"""
self._proxy_headers = proxy_headers
if self._session is not None:
self._session.close()
self._session = None
def close(self):
"""Close the underlying session."""
if self._session is not None:
self._session.close()
self._session = None
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
@classmethod
def _fetch_html(cls, url, request_args=None):
"""
Fetch HTML from URL using the standard requests.
Note: This is the class method from parent. For proxy header support,
use instance methods which use the ProxySession.
"""
# Fall back to parent implementation for class method calls
return super()._fetch_html(url, request_args)
def _fetch_html_with_proxy(self, url: str, request_args: Optional[Dict] = None) -> str:
"""
Fetch HTML from URL using ProxySession with proxy header support.
Args:
url: URL to fetch
request_args: Additional request arguments (proxies, headers, etc.)
Returns:
HTML content as string
"""
request_args = request_args or {}
# Build headers
headers = dict(self.request_headers)
if url:
headers["Host"] = urlparse(url).netloc
user_headers = request_args.pop("headers", {})
headers.update(user_headers)
# Use our ProxySession
session = self._get_session()
# Copy session-level settings if not in request_args
if 'proxies' in request_args:
session.proxies.update(request_args.pop('proxies'))
res = session.get(url, headers=headers, **request_args)
# Handle encoding
if res.encoding == "ISO-8859-1" and "ISO-8859-1" not in res.headers.get(
"Content-Type", ""
):
res.encoding = res.apparent_encoding
return res.text
def _get_soup_with_proxy(self, url=None, html=None, request_args=None):
"""
Get BeautifulSoup object using ProxySession.
Args:
url: URL to fetch (optional if html is provided)
html: HTML string (optional if url is provided)
request_args: Additional request arguments
Returns:
BeautifulSoup object
"""
from html import unescape
from bs4 import BeautifulSoup
from autoscraper.utils import normalize
if html:
html = normalize(unescape(html))
return BeautifulSoup(html, "lxml")
html = self._fetch_html_with_proxy(url, request_args)
html = normalize(unescape(html))
return BeautifulSoup(html, "lxml")
def build(
self,
url: Optional[str] = None,
wanted_list: Optional[List] = None,
wanted_dict: Optional[Dict] = None,
html: Optional[str] = None,
request_args: Optional[Dict] = None,
update: bool = False,
text_fuzz_ratio: float = 1.0,
) -> List:
"""
Build scraping rules with proxy header support.
Same as AutoScraper.build() but uses ProxySession for requests.
Parameters:
url: URL of the target web page
wanted_list: List of needed contents to be scraped
wanted_dict: Dict of needed contents (keys are aliases)
html: HTML string (alternative to URL)
request_args: Request arguments including proxies
update: If True, add to existing rules
text_fuzz_ratio: Fuzziness ratio for matching
Returns:
List of similar results
"""
from html import unescape
from autoscraper.utils import normalize, unique_hashable, unique_stack_list
if not wanted_list and not (wanted_dict and any(wanted_dict.values())):
raise ValueError("No targets were supplied")
# Use our proxy-aware soup getter
soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
result_list = []
if update is False:
self.stack_list = []
if wanted_list:
wanted_dict = {"": wanted_list}
wanted_list = []
for alias, wanted_items in wanted_dict.items():
wanted_items = [normalize(w) for w in wanted_items]
wanted_list += wanted_items
for wanted in wanted_items:
children = self._get_children(soup, wanted, url, text_fuzz_ratio)
for child in children:
result, stack = self._get_result_for_child(child, soup, url)
stack["alias"] = alias
result_list += result
self.stack_list.append(stack)
result_list = [item.text for item in result_list]
result_list = unique_hashable(result_list)
self.stack_list = unique_stack_list(self.stack_list)
return result_list
def get_result_similar(
self,
url: Optional[str] = None,
html: Optional[str] = None,
soup=None,
request_args: Optional[Dict] = None,
grouped: bool = False,
group_by_alias: bool = False,
unique: Optional[bool] = None,
attr_fuzz_ratio: float = 1.0,
keep_blank: bool = False,
keep_order: bool = False,
contain_sibling_leaves: bool = False,
):
"""
Get similar results with proxy header support.
Same as AutoScraper.get_result_similar() but uses ProxySession.
"""
if soup is None and url is not None:
soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
return super().get_result_similar(
url=url,
html=html,
soup=soup,
request_args=None, # Already fetched
grouped=grouped,
group_by_alias=group_by_alias,
unique=unique,
attr_fuzz_ratio=attr_fuzz_ratio,
keep_blank=keep_blank,
keep_order=keep_order,
contain_sibling_leaves=contain_sibling_leaves,
)
def get_result_exact(
self,
url: Optional[str] = None,
html: Optional[str] = None,
soup=None,
request_args: Optional[Dict] = None,
grouped: bool = False,
group_by_alias: bool = False,
unique: Optional[bool] = None,
attr_fuzz_ratio: float = 1.0,
keep_blank: bool = False,
):
"""
Get exact results with proxy header support.
Same as AutoScraper.get_result_exact() but uses ProxySession.
"""
if soup is None and url is not None:
soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
return super().get_result_exact(
url=url,
html=html,
soup=soup,
request_args=None, # Already fetched
grouped=grouped,
group_by_alias=group_by_alias,
unique=unique,
attr_fuzz_ratio=attr_fuzz_ratio,
keep_blank=keep_blank,
)
def get_result(
self,
url: Optional[str] = None,
html: Optional[str] = None,
request_args: Optional[Dict] = None,
grouped: bool = False,
group_by_alias: bool = False,
unique: Optional[bool] = None,
attr_fuzz_ratio: float = 1.0,
):
"""
Get similar and exact results with proxy header support.
Same as AutoScraper.get_result() but uses ProxySession.
"""
soup = self._get_soup_with_proxy(url=url, html=html, request_args=request_args)
args = dict(
url=url,
soup=soup,
grouped=grouped,
group_by_alias=group_by_alias,
unique=unique,
attr_fuzz_ratio=attr_fuzz_ratio,
)
similar = self.get_result_similar(**args)
exact = self.get_result_exact(**args)
return similar, exact
"""
CloudScraper extension for sending and receiving proxy headers.
This module provides a CloudScraper subclass that enables:
1. Sending custom headers to proxy servers during CONNECT
2. Capturing response headers from proxy servers
Example usage:
from python_proxy_headers.cloudscraper_proxy import create_scraper
scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
scraper.proxies = {'https': 'http://proxy:8080'}
response = scraper.get('https://example.com')
# Access proxy response headers (stored on the response object)
print(response.proxy_headers)
"""
from typing import Dict, Optional, Any
try:
import cloudscraper
from cloudscraper import CipherSuiteAdapter
except ImportError:
raise ImportError(
"cloudscraper is required for this module. "
"Install it with: pip install cloudscraper"
)
from .urllib3_proxy_manager import proxy_from_url
class CipherSuiteProxyHeaderAdapter(CipherSuiteAdapter):
"""
Combines CloudScraper's CipherSuiteAdapter with proxy header support.
This adapter:
- Maintains CloudScraper's TLS/cipher suite customization
- Adds the ability to send custom headers to proxy servers
- Uses our custom ProxyManager that captures proxy response headers
"""
def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
self._proxy_headers = proxy_headers or {}
super().__init__(**kwargs)
def proxy_manager_for(self, proxy, **proxy_kwargs):
"""
Return a ProxyManager for the given proxy with custom header support.
Overrides the default proxy_manager_for to use our custom ProxyManager
that supports sending and receiving proxy headers.
"""
if proxy in self.proxy_manager:
manager = self.proxy_manager[proxy]
elif proxy.lower().startswith("socks"):
# SOCKS proxies don't support custom headers
return super().proxy_manager_for(proxy, **proxy_kwargs)
else:
# Get standard proxy headers (e.g., Proxy-Authorization)
_proxy_headers = self.proxy_headers(proxy)
# Merge with our custom proxy headers
if self._proxy_headers:
_proxy_headers.update(self._proxy_headers)
# Pass SSL context if available
if hasattr(self, 'ssl_context') and self.ssl_context:
proxy_kwargs['ssl_context'] = self.ssl_context
if hasattr(self, 'source_address') and self.source_address:
proxy_kwargs['source_address'] = self.source_address
manager = self.proxy_manager[proxy] = proxy_from_url(
proxy,
proxy_headers=_proxy_headers,
num_pools=self._pool_connections,
maxsize=self._pool_maxsize,
block=self._pool_block,
**proxy_kwargs,
)
return manager
class ProxyCloudScraper(cloudscraper.CloudScraper):
"""
CloudScraper with proxy header support.
This class extends CloudScraper to add the ability to:
- Send custom headers to proxy servers during CONNECT tunneling
- Receive and access headers from proxy server responses
Args:
proxy_headers: Dict of headers to send to proxy servers
**kwargs: All other arguments passed to CloudScraper
Example:
scraper = ProxyCloudScraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
scraper.proxies = {'https': 'http://proxy.example.com:8080'}
response = scraper.get('https://httpbin.org/ip')
print(response.proxy_headers) # Headers from proxy CONNECT response
"""
def __init__(self, proxy_headers: Optional[Dict[str, str]] = None, **kwargs):
self._proxy_headers = proxy_headers or {}
# Call parent init
super().__init__(**kwargs)
# Replace the HTTPS adapter with our proxy-header-aware version
# We need to preserve the cipher suite settings from the parent
self.mount(
'https://',
CipherSuiteProxyHeaderAdapter(
proxy_headers=self._proxy_headers,
cipherSuite=self.cipherSuite,
ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
server_hostname=getattr(self, 'server_hostname', None),
source_address=getattr(self, 'source_address', None),
ssl_context=getattr(self, 'ssl_context', None)
)
)
# Also mount for HTTP (though proxy headers are mainly for HTTPS CONNECT)
self.mount(
'http://',
CipherSuiteProxyHeaderAdapter(
proxy_headers=self._proxy_headers,
cipherSuite=self.cipherSuite,
ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
server_hostname=getattr(self, 'server_hostname', None),
source_address=getattr(self, 'source_address', None),
ssl_context=getattr(self, 'ssl_context', None)
)
)
def set_proxy_headers(self, proxy_headers: Dict[str, str]):
"""
Update the proxy headers and remount adapters.
Args:
proxy_headers: New proxy headers to use
"""
self._proxy_headers = proxy_headers
# Remount adapters with new headers
self.mount(
'https://',
CipherSuiteProxyHeaderAdapter(
proxy_headers=self._proxy_headers,
cipherSuite=self.cipherSuite,
ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
server_hostname=getattr(self, 'server_hostname', None),
source_address=getattr(self, 'source_address', None),
ssl_context=getattr(self, 'ssl_context', None)
)
)
self.mount(
'http://',
CipherSuiteProxyHeaderAdapter(
proxy_headers=self._proxy_headers,
cipherSuite=self.cipherSuite,
ecdhCurve=getattr(self, 'ecdhCurve', 'prime256v1'),
server_hostname=getattr(self, 'server_hostname', None),
source_address=getattr(self, 'source_address', None),
ssl_context=getattr(self, 'ssl_context', None)
)
)
def create_scraper(
proxy_headers: Optional[Dict[str, str]] = None,
sess: Optional[Any] = None,
**kwargs
) -> ProxyCloudScraper:
"""
Create a CloudScraper with proxy header support.
This is a drop-in replacement for cloudscraper.create_scraper() that
adds proxy header capabilities.
Args:
proxy_headers: Dict of headers to send to proxy servers
sess: Existing session to copy attributes from
**kwargs: All other arguments passed to CloudScraper
Returns:
ProxyCloudScraper instance
Example:
from python_proxy_headers.cloudscraper_proxy import create_scraper
scraper = create_scraper(
proxy_headers={'X-ProxyMesh-Country': 'US'},
browser='chrome'
)
scraper.proxies = {'https': 'http://proxy:8080'}
response = scraper.get('https://example.com')
"""
scraper = ProxyCloudScraper(proxy_headers=proxy_headers, **kwargs)
if sess:
for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
val = getattr(sess, attr, None)
if val is not None:
setattr(scraper, attr, val)
return scraper
# Convenience alias
session = create_scraper
"""
PycURL extension for sending and receiving proxy headers.
This module provides helper functions and classes for working with proxy headers
in pycurl. It can be used in two ways:
1. Low-level helpers for existing pycurl code:
import pycurl
from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
c = pycurl.Curl()
c.setopt(pycurl.URL, 'https://example.com')
c.setopt(pycurl.PROXY, 'http://proxy:8080')
# Add proxy headers
set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
# Capture response headers (including proxy CONNECT headers)
capture = HeaderCapture(c)
c.perform()
print(capture.proxy_headers) # Headers from proxy CONNECT response
print(capture.origin_headers) # Headers from origin server
2. High-level convenience functions:
from python_proxy_headers.pycurl_proxy import get
response = get('https://example.com',
proxy='http://proxy:8080',
proxy_headers={'X-ProxyMesh-Country': 'US'})
print(response.proxy_headers)
"""
from io import BytesIO
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
try:
import pycurl
except ImportError:
raise ImportError(
"pycurl is required for this module. "
"Install it with: pip install pycurl"
)
# =============================================================================
# Low-level helper functions
# =============================================================================
def set_proxy_headers(curl, headers: Dict[str, str]) -> None:
"""
Set custom headers to send to the proxy server during CONNECT.
Args:
curl: A pycurl.Curl instance
headers: Dict of headers to send to the proxy
Example:
c = pycurl.Curl()
c.setopt(pycurl.PROXY, 'http://proxy:8080')
set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
c.perform()
"""
if not headers:
return
header_list = [f"{k}: {v}" for k, v in headers.items()]
# Set CURLOPT_PROXYHEADER
try:
curl.setopt(pycurl.PROXYHEADER, header_list)
except AttributeError:
# Fallback to numeric option (10228) if not exposed
curl.setopt(10228, header_list)
# Set CURLOPT_HEADEROPT to CURLHEADER_SEPARATE so proxy headers
# are only sent to the proxy, not the origin
try:
curl.setopt(pycurl.HEADEROPT, pycurl.HEADER_SEPARATE)
except AttributeError:
try:
curl.setopt(229, 1) # CURLOPT_HEADEROPT = 229, CURLHEADER_SEPARATE = 1
except pycurl.error:
pass # Option may not be available in older libcurl versions
class HeaderCapture:
"""
Captures and parses HTTP response headers from pycurl requests.
For HTTPS requests through a proxy, this separates:
- proxy_headers: Headers from the proxy's CONNECT response
- origin_headers: Headers from the origin server's response
Example:
c = pycurl.Curl()
c.setopt(pycurl.URL, 'https://example.com')
c.setopt(pycurl.PROXY, 'http://proxy:8080')
capture = HeaderCapture(c) # Installs HEADERFUNCTION callback
c.perform()
print(capture.proxy_headers) # {'X-ProxyMesh-IP': '1.2.3.4', ...}
print(capture.origin_headers) # {'Content-Type': 'text/html', ...}
print(capture.proxy_status) # 200
"""
def __init__(self, curl=None):
"""
Initialize header capture.
Args:
curl: Optional pycurl.Curl instance. If provided, automatically
installs the HEADERFUNCTION callback.
"""
self._header_lines: List[bytes] = []
self._parsed = False
self._sections: List[Tuple[Optional[int], Dict[str, str]]] = []
if curl is not None:
self.install(curl)
def install(self, curl) -> 'HeaderCapture':
"""
Install the header callback on a pycurl.Curl instance.
Args:
curl: A pycurl.Curl instance
Returns:
self, for chaining
"""
curl.setopt(pycurl.HEADERFUNCTION, self._header_callback)
return self
def _header_callback(self, header_line: bytes) -> int:
"""Callback for pycurl HEADERFUNCTION."""
self._header_lines.append(header_line)
self._parsed = False # Invalidate cache
return len(header_line)
def _parse(self) -> None:
"""Parse collected header lines into sections."""
if self._parsed:
return
self._sections = []
current_headers: Dict[str, str] = {}
current_status: Optional[int] = None
for line in self._header_lines:
line_str = line.decode('utf-8', errors='replace').strip()
if line_str.startswith('HTTP/'):
# New response section - save previous if exists
if current_headers or current_status is not None:
self._sections.append((current_status, current_headers))
current_headers = {}
# Parse status line: HTTP/1.1 200 OK
parts = line_str.split(' ', 2)
if len(parts) >= 2:
try:
current_status = int(parts[1])
except ValueError:
current_status = None
else:
current_status = None
elif ':' in line_str:
key, value = line_str.split(':', 1)
current_headers[key.strip()] = value.strip()
# Don't forget the last section
if current_headers or current_status is not None:
self._sections.append((current_status, current_headers))
self._parsed = True
def reset(self) -> None:
"""Clear captured headers for reuse."""
self._header_lines.clear()
self._sections.clear()
self._parsed = False
@property
def proxy_headers(self) -> Dict[str, str]:
"""
Headers from the proxy's CONNECT response.
Returns empty dict if not an HTTPS-via-proxy request or no headers captured.
"""
self._parse()
if len(self._sections) >= 2:
return self._sections[0][1]
return {}
@property
def proxy_status(self) -> Optional[int]:
"""
Status code from the proxy's CONNECT response.
Returns None if not an HTTPS-via-proxy request.
"""
self._parse()
if len(self._sections) >= 2:
return self._sections[0][0]
return None
@property
def origin_headers(self) -> Dict[str, str]:
"""Headers from the origin server's response."""
self._parse()
if self._sections:
return self._sections[-1][1]
return {}
@property
def origin_status(self) -> Optional[int]:
"""Status code from the origin server's response."""
self._parse()
if self._sections:
return self._sections[-1][0]
return None
@property
def all_headers(self) -> Dict[str, str]:
"""All headers merged (proxy headers take precedence for conflicts)."""
self._parse()
merged = {}
for _, headers in self._sections:
merged.update(headers)
return merged
# =============================================================================
# High-level convenience API
# =============================================================================
@dataclass
class Response:
"""Response object from high-level API."""
status_code: int
headers: Dict[str, str]
content: bytes
proxy_headers: Dict[str, str] = field(default_factory=dict)
proxy_status: Optional[int] = None
@property
def text(self) -> str:
"""Response body as text."""
return self.content.decode('utf-8', errors='replace')
def raise_for_status(self) -> None:
"""Raise exception if status code indicates error."""
if self.status_code >= 400:
raise Exception(f"HTTP Error {self.status_code}")
def request(
method: str,
url: str,
proxy: Optional[str] = None,
proxy_headers: Optional[Dict[str, str]] = None,
headers: Optional[Dict[str, str]] = None,
data: Optional[bytes] = None,
timeout: Optional[int] = None,
verify: bool = True,
) -> Response:
"""
Make an HTTP request with proxy header support.
Args:
method: HTTP method (GET, POST, etc.)
url: Target URL
proxy: Proxy URL (e.g., 'http://user:pass@proxy:8080')
proxy_headers: Headers to send to the proxy
headers: Headers to send to the origin server
data: Request body for POST/PUT/PATCH
timeout: Request timeout in seconds
verify: Whether to verify SSL certificates
Returns:
Response object with body, headers, and proxy_headers
"""
c = pycurl.Curl()
body = BytesIO()
capture = HeaderCapture(c)
try:
c.setopt(pycurl.URL, url)
c.setopt(pycurl.WRITEFUNCTION, body.write)
# HTTP method
method = method.upper()
if method == 'GET':
c.setopt(pycurl.HTTPGET, 1)
elif method == 'POST':
c.setopt(pycurl.POST, 1)
if data:
c.setopt(pycurl.POSTFIELDS, data)
elif method == 'PUT':
c.setopt(pycurl.CUSTOMREQUEST, 'PUT')
if data:
c.setopt(pycurl.POSTFIELDS, data)
elif method == 'DELETE':
c.setopt(pycurl.CUSTOMREQUEST, 'DELETE')
elif method == 'HEAD':
c.setopt(pycurl.NOBODY, 1)
elif method == 'PATCH':
c.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
if data:
c.setopt(pycurl.POSTFIELDS, data)
else:
c.setopt(pycurl.CUSTOMREQUEST, method)
# Request headers
if headers:
c.setopt(pycurl.HTTPHEADER, [f"{k}: {v}" for k, v in headers.items()])
# Proxy
if proxy:
c.setopt(pycurl.PROXY, proxy)
if proxy_headers:
set_proxy_headers(c, proxy_headers)
# Timeout
if timeout:
c.setopt(pycurl.TIMEOUT, timeout)
# SSL
if not verify:
c.setopt(pycurl.SSL_VERIFYPEER, 0)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.perform()
return Response(
status_code=c.getinfo(pycurl.RESPONSE_CODE),
headers=capture.origin_headers,
content=body.getvalue(),
proxy_headers=capture.proxy_headers,
proxy_status=capture.proxy_status,
)
finally:
c.close()
def get(url: str, **kwargs) -> Response:
"""Make a GET request."""
return request('GET', url, **kwargs)
def post(url: str, **kwargs) -> Response:
"""Make a POST request."""
return request('POST', url, **kwargs)
def put(url: str, **kwargs) -> Response:
"""Make a PUT request."""
return request('PUT', url, **kwargs)
def delete(url: str, **kwargs) -> Response:
"""Make a DELETE request."""
return request('DELETE', url, **kwargs)
def head(url: str, **kwargs) -> Response:
"""Make a HEAD request."""
return request('HEAD', url, **kwargs)
def patch(url: str, **kwargs) -> Response:
"""Make a PATCH request."""
return request('PATCH', url, **kwargs)
+114
-89

@@ -1,5 +0,5 @@

Metadata-Version: 2.2
Metadata-Version: 2.4
Name: python-proxy-headers
Version: 0.1.0
Summary: Handle custom proxy headers for http requests in various python libraries
Version: 0.2.0
Summary: Handle custom proxy headers for http & https requests in various python libraries
Author-email: ProxyMesh <support@proxymesh.com>

@@ -9,2 +9,4 @@ Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers

Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
Project-URL: Documentation, https://python-proxy-headers.readthedocs.io/en/latest/
Project-URL: ProxyMesh, https://proxymesh.com
Classifier: Programming Language :: Python :: 3

@@ -19,135 +21,158 @@ Classifier: Operating System :: OS Independent

License-File: LICENSE
Dynamic: license-file
# Python Proxy Headers
The `python-proxy-headers` package provides support for handling custom proxy headers when making HTTPS requests in various python modules.
[![Documentation Status](https://readthedocs.org/projects/python-proxy-headers/badge/?version=latest)](https://python-proxy-headers.readthedocs.io/en/latest/?badge=latest)
[![PyPI version](https://badge.fury.io/py/python-proxy-headers.svg)](https://badge.fury.io/py/python-proxy-headers)
We currently provide extensions to the following packages:
Extensions for Python HTTP libraries to support **sending and receiving custom proxy headers** during HTTPS CONNECT tunneling.
* [urllib3](https://urllib3.readthedocs.io/en/stable/)
* [requests](https://docs.python-requests.org/en/latest/index.html)
* [aiohttp](https://docs.aiohttp.org/en/stable/index.html)
* [httpx](https://www.python-httpx.org/)
## The Problem
None of these modules provide good support for parsing custom response headers from proxy servers. And some of them make it hard to send custom headers to proxy servers. So we at [ProxyMesh](https://proxymesh.com) made these extension modules to support our customers that use Python and want to use custom headers to control our proxy behavior. But these modules can work for handling custom headers with any proxy.
When making HTTPS requests through a proxy, the connection is established via a CONNECT tunnel. During this process:
*If you are looking for [Scrapy](https://scrapy.org/) support, please see our [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers) project.*
1. **Sending headers to the proxy** - Most Python HTTP libraries don't provide an easy way to send custom headers (like `X-ProxyMesh-Country`) to the proxy server during the CONNECT handshake.
2. **Receiving headers from the proxy** - The proxy's response headers from the CONNECT request are typically discarded, making it impossible to read custom headers (like `X-ProxyMesh-IP`) that the proxy sends back.
This library solves both problems for popular Python HTTP libraries.
## Supported Libraries
| Library | Module | Use Case |
|---------|--------|----------|
| [urllib3](https://python-proxy-headers.readthedocs.io/en/latest/urllib3.html) | `urllib3_proxy_manager` | Low-level HTTP client |
| [requests](https://python-proxy-headers.readthedocs.io/en/latest/requests.html) | `requests_adapter` | Simple HTTP requests |
| [aiohttp](https://python-proxy-headers.readthedocs.io/en/latest/aiohttp.html) | `aiohttp_proxy` | Async HTTP client |
| [httpx](https://python-proxy-headers.readthedocs.io/en/latest/httpx.html) | `httpx_proxy` | Modern HTTP client |
| [pycurl](https://python-proxy-headers.readthedocs.io/en/latest/pycurl.html) | `pycurl_proxy` | libcurl bindings |
| [cloudscraper](https://python-proxy-headers.readthedocs.io/en/latest/cloudscraper.html) | `cloudscraper_proxy` | Cloudflare bypass |
| [autoscraper](https://python-proxy-headers.readthedocs.io/en/latest/autoscraper.html) | `autoscraper_proxy` | Automatic web scraping |
## Installation
Examples for how to use these extension modules are described below. You must first do the following:
```bash
pip install python-proxy-headers
```
1. `pip install python-proxy-headers`
2. Install the appropriate package based on the python module you want to use.
Then install the HTTP library you want to use (e.g., `pip install requests`).
This package does not have any dependencies because we don't know which module you want to use.
> **Note:** This package has no dependencies by default - install only what you need.
You can also find more example code in our [proxy-examples for python](https://github.com/proxymesh/proxy-examples/tree/main/python).
## Quick Start
## urllib3
### requests
If you just want to send custom proxy headers, but don't need to receive proxy response headers, then you can [urllib3.ProxyManager](https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html#urllib3.ProxyManager), like so:
```python
from python_proxy_headers.requests_adapter import ProxySession
``` python
import urllib3
proxy = urllib3.ProxyManager('http://PROXYHOST:PORT', proxy_headers={'X-ProxyMesh-Country': 'US'})
r = proxy.request('GET', 'https://api.ipify.org?format=json')
with ProxySession(proxy_headers={'X-ProxyMesh-Country': 'US'}) as session:
session.proxies = {'https': 'http://user:pass@proxy.example.com:8080'}
response = session.get('https://httpbin.org/ip')
# Proxy headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
Note that when using this method, if you keep reusing the same `ProxyManager` instance, you may be re-using the proxy connection, which may have different behavior than if you create a new proxy connection for each request. For example, with ProxyMesh you may keep getting the same IP address if you reuse the proxy connection.
### httpx
To get proxy response headers, use our extension module like this:
```python
from python_proxy_headers.httpx_proxy import get
``` python
from python_proxy_headers import urllib3_proxy_manager
proxy = urllib3_proxy_manager.ProxyHeaderManager('http://PROXYHOST:PORT')
r = proxy.request('GET', 'https://api.ipify.org?format=json')
r.headers['X-ProxyMesh-IP']
response = get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
)
# Proxy CONNECT response headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
You can also pass `proxy_headers` into our `ProxyHeaderManager` as well. For example, you can pass back the same `X-ProxyMesh-IP` header to ensure you get the same IP address on subsequent requests.
### aiohttp
## requests
```python
import asyncio
from python_proxy_headers.aiohttp_proxy import ProxyClientSession
The requests adapter builds on our `urllib3_proxy_manager` module to make it easy to pass in proxy headers and receive proxy response headers.
async def main():
async with ProxyClientSession() as session:
async with session.get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
) as response:
# Proxy headers merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
``` python
from python_proxy_headers import requests_adapter
r = requests_adapter.get('https://api.ipify.org?format=json', proxies={'http': 'http://PROXYHOST:PORT', 'https': 'http://PROXYHOST:PORT'}, proxy_headers={'X-ProxyMesh-Country': 'US'})
r.headers['X-ProxyMesh-IP']
asyncio.run(main())
```
The `requests_adapter` module supports all the standard requests methods: `get`, `post`, `put`, `delete`, etc.
### pycurl (low-level)
## aiohttp
```python
import pycurl
from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
While it's not documented, aiohttp does support passing in custom proxy headers by default.
c = pycurl.Curl()
c.setopt(pycurl.URL, 'https://httpbin.org/ip')
c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080')
``` python
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
# Add these two lines to any existing pycurl code
set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
capture = HeaderCapture(c)
c.perform()
print(capture.proxy_headers) # Headers from proxy CONNECT response
c.close()
```
However, if you want to get proxy response, you should use our extension module:
### cloudscraper
``` python
from python_proxy_headers import aiohttp_proxy
async with aiohttp_proxy.ProxyClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
```python
from python_proxy_headers.cloudscraper_proxy import create_scraper
r.headers['X-ProxyMesh-IP']
# Drop-in replacement for cloudscraper.create_scraper()
scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
scraper.proxies = {'https': 'http://proxy.example.com:8080'}
response = scraper.get('https://example.com')
# All CloudScraper features (Cloudflare bypass) preserved
```
## httpx
## Testing
httpx also supports proxy headers by default, though it's not documented:
A test harness is included to verify proxy header functionality:
``` python
import httpx
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
```
```bash
# Set your proxy
export PROXY_URL='http://user:pass@proxy.example.com:8080'
But to get the response headers, you need to use our extension module:
# Test all modules
python test_proxy_headers.py
``` python
import httpx
from python_proxy_headers.httpx_proxy import HTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
# Test specific modules
python test_proxy_headers.py requests httpx
r.headers['X-ProxyMesh-IP']
# Verbose output (show header values)
python test_proxy_headers.py -v
```
This module also provide helper methods similar to requests:
## Documentation
``` python
import httpx
from python_proxy_headers import httpx_proxy
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
r = httpx_proxy.get('https://api.ipify.org?format=json', proxy=proxy)
r.headers['X-ProxyMesh-IP']
```
For detailed documentation, API reference, and more examples:
And finally, httpx supports async requests, so we provide an async extension too:
- **Full Documentation:** [python-proxy-headers.readthedocs.io](https://python-proxy-headers.readthedocs.io/en/latest/)
- **Example Code:** [proxy-examples for Python](https://github.com/proxymesh/proxy-examples/tree/main/python)
``` python
import httpx
from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = AsyncHTTPProxyTransport(proxy=proxy)
async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client:
r = await client.get('https://api.ipify.org?format=json')
## Related Projects
r.headers['X-ProxyMesh-IP']
```
- **[scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers)** - Proxy header support for Scrapy
Our httpx helper module internally provides extension classes for [httpcore](https://www.encode.io/httpcore/), for handling proxy headers over tunnel connections.
You can use those classes if you're building on top of httpcore.
## About
Created by [ProxyMesh](https://proxymesh.com) to help our customers use custom headers to control proxy behavior. Works with any proxy that supports custom headers.
## License
MIT License

@@ -7,7 +7,7 @@ [build-system]

name = "python-proxy-headers"
version = "0.1.0"
version = "0.2.0"
authors = [
{ name="ProxyMesh", email="support@proxymesh.com" },
]
description = "Handle custom proxy headers for http requests in various python libraries"
description = "Handle custom proxy headers for http & https requests in various python libraries"
readme = "README.md"

@@ -28,1 +28,3 @@ requires-python = ">=3.8"

Issues = "https://github.com/proxymesh/python-proxy-headers/issues"
Documentation = "https://python-proxy-headers.readthedocs.io/en/latest/"
ProxyMesh = "https://proxymesh.com"

@@ -1,5 +0,5 @@

Metadata-Version: 2.2
Metadata-Version: 2.4
Name: python-proxy-headers
Version: 0.1.0
Summary: Handle custom proxy headers for http requests in various python libraries
Version: 0.2.0
Summary: Handle custom proxy headers for http & https requests in various python libraries
Author-email: ProxyMesh <support@proxymesh.com>

@@ -9,2 +9,4 @@ Project-URL: Homepage, https://github.com/proxymesh/python-proxy-headers

Project-URL: Issues, https://github.com/proxymesh/python-proxy-headers/issues
Project-URL: Documentation, https://python-proxy-headers.readthedocs.io/en/latest/
Project-URL: ProxyMesh, https://proxymesh.com
Classifier: Programming Language :: Python :: 3

@@ -19,135 +21,158 @@ Classifier: Operating System :: OS Independent

License-File: LICENSE
Dynamic: license-file
# Python Proxy Headers
The `python-proxy-headers` package provides support for handling custom proxy headers when making HTTPS requests in various python modules.
[![Documentation Status](https://readthedocs.org/projects/python-proxy-headers/badge/?version=latest)](https://python-proxy-headers.readthedocs.io/en/latest/?badge=latest)
[![PyPI version](https://badge.fury.io/py/python-proxy-headers.svg)](https://badge.fury.io/py/python-proxy-headers)
We currently provide extensions to the following packages:
Extensions for Python HTTP libraries to support **sending and receiving custom proxy headers** during HTTPS CONNECT tunneling.
* [urllib3](https://urllib3.readthedocs.io/en/stable/)
* [requests](https://docs.python-requests.org/en/latest/index.html)
* [aiohttp](https://docs.aiohttp.org/en/stable/index.html)
* [httpx](https://www.python-httpx.org/)
## The Problem
None of these modules provide good support for parsing custom response headers from proxy servers. And some of them make it hard to send custom headers to proxy servers. So we at [ProxyMesh](https://proxymesh.com) made these extension modules to support our customers that use Python and want to use custom headers to control our proxy behavior. But these modules can work for handling custom headers with any proxy.
When making HTTPS requests through a proxy, the connection is established via a CONNECT tunnel. During this process:
*If you are looking for [Scrapy](https://scrapy.org/) support, please see our [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers) project.*
1. **Sending headers to the proxy** - Most Python HTTP libraries don't provide an easy way to send custom headers (like `X-ProxyMesh-Country`) to the proxy server during the CONNECT handshake.
2. **Receiving headers from the proxy** - The proxy's response headers from the CONNECT request are typically discarded, making it impossible to read custom headers (like `X-ProxyMesh-IP`) that the proxy sends back.
This library solves both problems for popular Python HTTP libraries.
## Supported Libraries
| Library | Module | Use Case |
|---------|--------|----------|
| [urllib3](https://python-proxy-headers.readthedocs.io/en/latest/urllib3.html) | `urllib3_proxy_manager` | Low-level HTTP client |
| [requests](https://python-proxy-headers.readthedocs.io/en/latest/requests.html) | `requests_adapter` | Simple HTTP requests |
| [aiohttp](https://python-proxy-headers.readthedocs.io/en/latest/aiohttp.html) | `aiohttp_proxy` | Async HTTP client |
| [httpx](https://python-proxy-headers.readthedocs.io/en/latest/httpx.html) | `httpx_proxy` | Modern HTTP client |
| [pycurl](https://python-proxy-headers.readthedocs.io/en/latest/pycurl.html) | `pycurl_proxy` | libcurl bindings |
| [cloudscraper](https://python-proxy-headers.readthedocs.io/en/latest/cloudscraper.html) | `cloudscraper_proxy` | Cloudflare bypass |
| [autoscraper](https://python-proxy-headers.readthedocs.io/en/latest/autoscraper.html) | `autoscraper_proxy` | Automatic web scraping |
## Installation
Examples for how to use these extension modules are described below. You must first do the following:
```bash
pip install python-proxy-headers
```
1. `pip install python-proxy-headers`
2. Install the appropriate package based on the python module you want to use.
Then install the HTTP library you want to use (e.g., `pip install requests`).
This package does not have any dependencies because we don't know which module you want to use.
> **Note:** This package has no dependencies by default - install only what you need.
You can also find more example code in our [proxy-examples for python](https://github.com/proxymesh/proxy-examples/tree/main/python).
## Quick Start
## urllib3
### requests
If you just want to send custom proxy headers, but don't need to receive proxy response headers, then you can [urllib3.ProxyManager](https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html#urllib3.ProxyManager), like so:
```python
from python_proxy_headers.requests_adapter import ProxySession
``` python
import urllib3
proxy = urllib3.ProxyManager('http://PROXYHOST:PORT', proxy_headers={'X-ProxyMesh-Country': 'US'})
r = proxy.request('GET', 'https://api.ipify.org?format=json')
with ProxySession(proxy_headers={'X-ProxyMesh-Country': 'US'}) as session:
session.proxies = {'https': 'http://user:pass@proxy.example.com:8080'}
response = session.get('https://httpbin.org/ip')
# Proxy headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
Note that when using this method, if you keep reusing the same `ProxyManager` instance, you may be re-using the proxy connection, which may have different behavior than if you create a new proxy connection for each request. For example, with ProxyMesh you may keep getting the same IP address if you reuse the proxy connection.
### httpx
To get proxy response headers, use our extension module like this:
```python
from python_proxy_headers.httpx_proxy import get
``` python
from python_proxy_headers import urllib3_proxy_manager
proxy = urllib3_proxy_manager.ProxyHeaderManager('http://PROXYHOST:PORT')
r = proxy.request('GET', 'https://api.ipify.org?format=json')
r.headers['X-ProxyMesh-IP']
response = get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
)
# Proxy CONNECT response headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
You can also pass `proxy_headers` into our `ProxyHeaderManager` as well. For example, you can pass back the same `X-ProxyMesh-IP` header to ensure you get the same IP address on subsequent requests.
### aiohttp
## requests
```python
import asyncio
from python_proxy_headers.aiohttp_proxy import ProxyClientSession
The requests adapter builds on our `urllib3_proxy_manager` module to make it easy to pass in proxy headers and receive proxy response headers.
async def main():
async with ProxyClientSession() as session:
async with session.get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
) as response:
# Proxy headers merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
``` python
from python_proxy_headers import requests_adapter
r = requests_adapter.get('https://api.ipify.org?format=json', proxies={'http': 'http://PROXYHOST:PORT', 'https': 'http://PROXYHOST:PORT'}, proxy_headers={'X-ProxyMesh-Country': 'US'})
r.headers['X-ProxyMesh-IP']
asyncio.run(main())
```
The `requests_adapter` module supports all the standard requests methods: `get`, `post`, `put`, `delete`, etc.
### pycurl (low-level)
## aiohttp
```python
import pycurl
from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
While it's not documented, aiohttp does support passing in custom proxy headers by default.
c = pycurl.Curl()
c.setopt(pycurl.URL, 'https://httpbin.org/ip')
c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080')
``` python
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
# Add these two lines to any existing pycurl code
set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
capture = HeaderCapture(c)
c.perform()
print(capture.proxy_headers) # Headers from proxy CONNECT response
c.close()
```
However, if you want to get proxy response, you should use our extension module:
### cloudscraper
``` python
from python_proxy_headers import aiohttp_proxy
async with aiohttp_proxy.ProxyClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
```python
from python_proxy_headers.cloudscraper_proxy import create_scraper
r.headers['X-ProxyMesh-IP']
# Drop-in replacement for cloudscraper.create_scraper()
scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
scraper.proxies = {'https': 'http://proxy.example.com:8080'}
response = scraper.get('https://example.com')
# All CloudScraper features (Cloudflare bypass) preserved
```
## httpx
## Testing
httpx also supports proxy headers by default, though it's not documented:
A test harness is included to verify proxy header functionality:
``` python
import httpx
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
```
```bash
# Set your proxy
export PROXY_URL='http://user:pass@proxy.example.com:8080'
But to get the response headers, you need to use our extension module:
# Test all modules
python test_proxy_headers.py
``` python
import httpx
from python_proxy_headers.httpx_proxy import HTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
# Test specific modules
python test_proxy_headers.py requests httpx
r.headers['X-ProxyMesh-IP']
# Verbose output (show header values)
python test_proxy_headers.py -v
```
This module also provide helper methods similar to requests:
## Documentation
``` python
import httpx
from python_proxy_headers import httpx_proxy
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
r = httpx_proxy.get('https://api.ipify.org?format=json', proxy=proxy)
r.headers['X-ProxyMesh-IP']
```
For detailed documentation, API reference, and more examples:
And finally, httpx supports async requests, so we provide an async extension too:
- **Full Documentation:** [python-proxy-headers.readthedocs.io](https://python-proxy-headers.readthedocs.io/en/latest/)
- **Example Code:** [proxy-examples for Python](https://github.com/proxymesh/proxy-examples/tree/main/python)
``` python
import httpx
from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = AsyncHTTPProxyTransport(proxy=proxy)
async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client:
r = await client.get('https://api.ipify.org?format=json')
## Related Projects
r.headers['X-ProxyMesh-IP']
```
- **[scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers)** - Proxy header support for Scrapy
Our httpx helper module internally provides extension classes for [httpcore](https://www.encode.io/httpcore/), for handling proxy headers over tunnel connections.
You can use those classes if you're building on top of httpcore.
## About
Created by [ProxyMesh](https://proxymesh.com) to help our customers use custom headers to control proxy behavior. Works with any proxy that supports custom headers.
## License
MIT License

@@ -6,3 +6,6 @@ LICENSE

python_proxy_headers/aiohttp_proxy.py
python_proxy_headers/autoscraper_proxy.py
python_proxy_headers/cloudscraper_proxy.py
python_proxy_headers/httpx_proxy.py
python_proxy_headers/pycurl_proxy.py
python_proxy_headers/requests_adapter.py

@@ -9,0 +12,0 @@ python_proxy_headers/urllib3_proxy_manager.py

+108
-86
# Python Proxy Headers
The `python-proxy-headers` package provides support for handling custom proxy headers when making HTTPS requests in various python modules.
[![Documentation Status](https://readthedocs.org/projects/python-proxy-headers/badge/?version=latest)](https://python-proxy-headers.readthedocs.io/en/latest/?badge=latest)
[![PyPI version](https://badge.fury.io/py/python-proxy-headers.svg)](https://badge.fury.io/py/python-proxy-headers)
We currently provide extensions to the following packages:
Extensions for Python HTTP libraries to support **sending and receiving custom proxy headers** during HTTPS CONNECT tunneling.
* [urllib3](https://urllib3.readthedocs.io/en/stable/)
* [requests](https://docs.python-requests.org/en/latest/index.html)
* [aiohttp](https://docs.aiohttp.org/en/stable/index.html)
* [httpx](https://www.python-httpx.org/)
## The Problem
None of these modules provide good support for parsing custom response headers from proxy servers. And some of them make it hard to send custom headers to proxy servers. So we at [ProxyMesh](https://proxymesh.com) made these extension modules to support our customers that use Python and want to use custom headers to control our proxy behavior. But these modules can work for handling custom headers with any proxy.
When making HTTPS requests through a proxy, the connection is established via a CONNECT tunnel. During this process:
*If you are looking for [Scrapy](https://scrapy.org/) support, please see our [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers) project.*
1. **Sending headers to the proxy** - Most Python HTTP libraries don't provide an easy way to send custom headers (like `X-ProxyMesh-Country`) to the proxy server during the CONNECT handshake.
2. **Receiving headers from the proxy** - The proxy's response headers from the CONNECT request are typically discarded, making it impossible to read custom headers (like `X-ProxyMesh-IP`) that the proxy sends back.
This library solves both problems for popular Python HTTP libraries.
## Supported Libraries
| Library | Module | Use Case |
|---------|--------|----------|
| [urllib3](https://python-proxy-headers.readthedocs.io/en/latest/urllib3.html) | `urllib3_proxy_manager` | Low-level HTTP client |
| [requests](https://python-proxy-headers.readthedocs.io/en/latest/requests.html) | `requests_adapter` | Simple HTTP requests |
| [aiohttp](https://python-proxy-headers.readthedocs.io/en/latest/aiohttp.html) | `aiohttp_proxy` | Async HTTP client |
| [httpx](https://python-proxy-headers.readthedocs.io/en/latest/httpx.html) | `httpx_proxy` | Modern HTTP client |
| [pycurl](https://python-proxy-headers.readthedocs.io/en/latest/pycurl.html) | `pycurl_proxy` | libcurl bindings |
| [cloudscraper](https://python-proxy-headers.readthedocs.io/en/latest/cloudscraper.html) | `cloudscraper_proxy` | Cloudflare bypass |
| [autoscraper](https://python-proxy-headers.readthedocs.io/en/latest/autoscraper.html) | `autoscraper_proxy` | Automatic web scraping |
## Installation
Examples for how to use these extension modules are described below. You must first do the following:
```bash
pip install python-proxy-headers
```
1. `pip install python-proxy-headers`
2. Install the appropriate package based on the python module you want to use.
Then install the HTTP library you want to use (e.g., `pip install requests`).
This package does not have any dependencies because we don't know which module you want to use.
> **Note:** This package has no dependencies by default - install only what you need.
You can also find more example code in our [proxy-examples for python](https://github.com/proxymesh/proxy-examples/tree/main/python).
## Quick Start
## urllib3
### requests
If you just want to send custom proxy headers, but don't need to receive proxy response headers, then you can [urllib3.ProxyManager](https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html#urllib3.ProxyManager), like so:
```python
from python_proxy_headers.requests_adapter import ProxySession
``` python
import urllib3
proxy = urllib3.ProxyManager('http://PROXYHOST:PORT', proxy_headers={'X-ProxyMesh-Country': 'US'})
r = proxy.request('GET', 'https://api.ipify.org?format=json')
with ProxySession(proxy_headers={'X-ProxyMesh-Country': 'US'}) as session:
session.proxies = {'https': 'http://user:pass@proxy.example.com:8080'}
response = session.get('https://httpbin.org/ip')
# Proxy headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
Note that when using this method, if you keep reusing the same `ProxyManager` instance, you may be re-using the proxy connection, which may have different behavior than if you create a new proxy connection for each request. For example, with ProxyMesh you may keep getting the same IP address if you reuse the proxy connection.
### httpx
To get proxy response headers, use our extension module like this:
```python
from python_proxy_headers.httpx_proxy import get
``` python
from python_proxy_headers import urllib3_proxy_manager
proxy = urllib3_proxy_manager.ProxyHeaderManager('http://PROXYHOST:PORT')
r = proxy.request('GET', 'https://api.ipify.org?format=json')
r.headers['X-ProxyMesh-IP']
response = get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
)
# Proxy CONNECT response headers are merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
```
You can also pass `proxy_headers` into our `ProxyHeaderManager` as well. For example, you can pass back the same `X-ProxyMesh-IP` header to ensure you get the same IP address on subsequent requests.
### aiohttp
## requests
```python
import asyncio
from python_proxy_headers.aiohttp_proxy import ProxyClientSession
The requests adapter builds on our `urllib3_proxy_manager` module to make it easy to pass in proxy headers and receive proxy response headers.
async def main():
async with ProxyClientSession() as session:
async with session.get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.example.com:8080'
) as response:
# Proxy headers merged into response.headers
print(response.headers.get('X-ProxyMesh-IP'))
``` python
from python_proxy_headers import requests_adapter
r = requests_adapter.get('https://api.ipify.org?format=json', proxies={'http': 'http://PROXYHOST:PORT', 'https': 'http://PROXYHOST:PORT'}, proxy_headers={'X-ProxyMesh-Country': 'US'})
r.headers['X-ProxyMesh-IP']
asyncio.run(main())
```
The `requests_adapter` module supports all the standard requests methods: `get`, `post`, `put`, `delete`, etc.
### pycurl (low-level)
## aiohttp
```python
import pycurl
from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture
While it's not documented, aiohttp does support passing in custom proxy headers by default.
c = pycurl.Curl()
c.setopt(pycurl.URL, 'https://httpbin.org/ip')
c.setopt(pycurl.PROXY, 'http://proxy.example.com:8080')
``` python
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
# Add these two lines to any existing pycurl code
set_proxy_headers(c, {'X-ProxyMesh-Country': 'US'})
capture = HeaderCapture(c)
c.perform()
print(capture.proxy_headers) # Headers from proxy CONNECT response
c.close()
```
However, if you want to get proxy response, you should use our extension module:
### cloudscraper
``` python
from python_proxy_headers import aiohttp_proxy
async with aiohttp_proxy.ProxyClientSession() as session:
async with session.get('https://api.ipify.org?format=json', proxy="http://PROXYHOST:PORT", proxy_headers={'X-ProxyMesh-Country': 'US'}) as r:
await r.text()
```python
from python_proxy_headers.cloudscraper_proxy import create_scraper
r.headers['X-ProxyMesh-IP']
# Drop-in replacement for cloudscraper.create_scraper()
scraper = create_scraper(proxy_headers={'X-ProxyMesh-Country': 'US'})
scraper.proxies = {'https': 'http://proxy.example.com:8080'}
response = scraper.get('https://example.com')
# All CloudScraper features (Cloudflare bypass) preserved
```
## httpx
## Testing
httpx also supports proxy headers by default, though it's not documented:
A test harness is included to verify proxy header functionality:
``` python
import httpx
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
```
```bash
# Set your proxy
export PROXY_URL='http://user:pass@proxy.example.com:8080'
But to get the response headers, you need to use our extension module:
# Test all modules
python test_proxy_headers.py
``` python
import httpx
from python_proxy_headers.httpx_proxy import HTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = HTTPProxyTransport(proxy=proxy)
with httpx.Client(mounts={'http://': transort, 'https://': transport}) as client:
r = client.get('https://api.ipify.org?format=json')
# Test specific modules
python test_proxy_headers.py requests httpx
r.headers['X-ProxyMesh-IP']
# Verbose output (show header values)
python test_proxy_headers.py -v
```
This module also provide helper methods similar to requests:
## Documentation
``` python
import httpx
from python_proxy_headers import httpx_proxy
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
r = httpx_proxy.get('https://api.ipify.org?format=json', proxy=proxy)
r.headers['X-ProxyMesh-IP']
```
For detailed documentation, API reference, and more examples:
And finally, httpx supports async requests, so we provide an async extension too:
- **Full Documentation:** [python-proxy-headers.readthedocs.io](https://python-proxy-headers.readthedocs.io/en/latest/)
- **Example Code:** [proxy-examples for Python](https://github.com/proxymesh/proxy-examples/tree/main/python)
``` python
import httpx
from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport
proxy = httpx.Proxy('http://PROXYHOST:PORT', headers={'X-ProxyMesh-Country': 'US'})
transport = AsyncHTTPProxyTransport(proxy=proxy)
async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client:
r = await client.get('https://api.ipify.org?format=json')
## Related Projects
r.headers['X-ProxyMesh-IP']
```
- **[scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers)** - Proxy header support for Scrapy
Our httpx helper module internally provides extension classes for [httpcore](https://www.encode.io/httpcore/), for handling proxy headers over tunnel connections.
You can use those classes if you're building on top of httpcore.
## About
Created by [ProxyMesh](https://proxymesh.com) to help our customers use custom headers to control proxy behavior. Works with any proxy that supports custom headers.
## License
MIT License