Fix CDP proxy auth: add Request-stage pattern for Fetch.authRequired (#4945)

This commit is contained in:
LawyZheng 2026-03-03 00:49:41 +08:00 committed by GitHub
parent b517e3d7fb
commit 48571437aa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 110 additions and 32 deletions

View file

@ -7,10 +7,13 @@ downloadPath does not work (e.g., Playwright bug #38805 — remote Windows Chrom
ignoring Linux paths).
Flow:
1. Enable Fetch interception at Response stage for each page
1. Enable Fetch interception for each page:
- Response stage: detect and intercept downloads
- Request stage (when proxy auth configured): enable Fetch.authRequired for proxy 407 challenges
2. On each paused request:
- Non-download Fetch.continueResponse (pass through)
- Download extract body via stream save to disk Fetch.fulfillRequest (empty body blocks browser save)
- Request stage Fetch.continueRequest (pass through to server)
- Response non-download Fetch.continueResponse (pass through)
- Response download extract body via stream save to disk Fetch.fulfillRequest
"""
from __future__ import annotations
@ -194,13 +197,15 @@ def extract_filename(headers: dict[str, str], url: str, index: int) -> str:
class CDPDownloadInterceptor:
"""
Intercepts download responses via the CDP Fetch domain.
Intercepts download responses via the CDP Fetch domain and optionally handles
proxy authentication via Fetch.authRequired.
Flow:
1. Enable Fetch interception at Response stage
1. Enable Fetch interception (Response stage for downloads; Request stage + handleAuthRequests for proxy auth)
2. On each paused request:
- Non-download Fetch.continueResponse (pass through)
- Download extract body save to disk Fetch.fulfillRequest (block browser save)
- Request stage Fetch.continueRequest (pass through)
- Response non-download Fetch.continueResponse (pass through)
- Response download extract body save to disk Fetch.fulfillRequest
"""
def __init__(
@ -226,25 +231,36 @@ class CDPDownloadInterceptor:
LOG.info("CDP download interceptor download dir set", download_dir=download_dir)
async def enable_for_page(self, page: Page) -> None:
"""Create a CDP session for the given page and enable Fetch interception."""
"""Create a CDP session for the given page and enable Fetch interception.
When proxy credentials are configured, also enables Fetch.authRequired handling
at the page level matching Playwright's internal approach (CRNetworkManager).
Playwright uses Request-stage interception with handleAuthRequests to receive
proxy 407 challenges via Fetch.authRequired.
"""
cdp_session = await page.context.new_cdp_session(page)
# Capture cdp_session in the closure so the handler uses the correct session
# (requestId is scoped to the session that fired Fetch.requestPaused).
cdp_session.on("Fetch.requestPaused", lambda event: self._on_request_paused(event, cdp_session))
# When proxy credentials are provided, also handle Fetch.authRequired events.
# This solves the problem where Playwright's new_context(proxy=...) cannot handle
# proxy authentication (407) on remote CDP browsers.
has_proxy_auth = bool(self._proxy_username and self._proxy_password)
if has_proxy_auth:
cdp_session.on("Fetch.authRequired", lambda event: self._on_auth_required(event, cdp_session))
# Note: Fetch.authRequired events are controlled solely by handleAuthRequests,
# independent of the patterns array (which only affects Fetch.requestPaused).
# Always intercept Response stage for download detection.
# When proxy auth is needed, also intercept Request stage (like Playwright's
# CRNetworkManager) — Chrome requires Request-stage patterns for
# Fetch.authRequired to fire on proxy 407 challenges.
patterns: list[dict[str, str]] = [{"requestStage": "Response"}]
if has_proxy_auth:
# urlPattern "*" intercepts all requests at Request stage, which adds overhead.
# This is required: Chrome only fires Fetch.authRequired for proxy 407 challenges
# when a Request-stage pattern is registered.
patterns.append({"urlPattern": "*", "requestStage": "Request"})
await cdp_session.send(
"Fetch.enable",
{
"patterns": [{"requestStage": "Response"}],
"patterns": patterns,
"handleAuthRequests": has_proxy_auth,
},
)
@ -347,15 +363,30 @@ class CDPDownloadInterceptor:
)
async def _handle_request_paused(self, event: dict[str, Any], cdp_session: CDPSession) -> None:
"""Async handler for paused requests."""
"""Async handler for paused requests.
Handles both Request-stage and Response-stage events:
- Request stage (no responseStatusCode): continue the request immediately.
We intercept at Request stage only to make Fetch.authRequired fire for proxy auth.
- Response stage: check for downloads and intercept if needed.
"""
request_id = event["requestId"]
response_status = event.get("responseStatusCode", 0)
raw_response_headers = event.get("responseHeaders", [])
response_headers = _parse_headers(raw_response_headers)
response_status = event.get("responseStatusCode")
url = event.get("request", {}).get("url", "<unknown>")
resource_type = event.get("resourceType", "")
try:
# Request stage: no response yet (responseStatusCode absent). Continue the request
# so it proceeds to the server. We only intercept Request stage to enable
# Fetch.authRequired for proxy 407 challenges.
if response_status is None:
await cdp_session.send("Fetch.continueRequest", {"requestId": request_id})
return
# Response stage: check for downloads
raw_response_headers = event.get("responseHeaders", [])
response_headers = _parse_headers(raw_response_headers)
resource_type = event.get("resourceType", "")
if is_download_response(response_headers, response_status, resource_type):
LOG.info(
"CDP download response detected",
@ -378,10 +409,16 @@ class CDPDownloadInterceptor:
exc_info=True,
error=str(e),
)
try:
await self._continue_response(cdp_session, request_id)
except Exception:
pass
# For Response-stage errors (e.g. download handling failed), try to let the
# response through so the request doesn't hang indefinitely.
# Request-stage errors don't need recovery here — either continueRequest already
# succeeded (and retrying would fail on an already-continued request), or it
# failed (and retrying the same call won't help).
if response_status is not None:
try:
await self._continue_response(cdp_session, request_id)
except Exception:
pass
async def _continue_response(self, cdp_session: CDPSession, request_id: str) -> None:
"""Let a non-download response pass through to the browser."""

View file

@ -371,7 +371,7 @@ class TestCDPDownloadInterceptorProxyAuth:
@pytest.mark.asyncio
async def test_enable_for_page_with_proxy_auth(self) -> None:
"""enable_for_page should set handleAuthRequests=True and register authRequired handler."""
"""enable_for_page with credentials should add Request-stage pattern and authRequired handler."""
interceptor = self._make_interceptor(proxy_username="user", proxy_password="pass")
mock_cdp_session = self._make_cdp_session()
@ -381,23 +381,26 @@ class TestCDPDownloadInterceptorProxyAuth:
await interceptor.enable_for_page(mock_page)
# Verify Fetch.enable was called with handleAuthRequests=True
# Verify Fetch.enable with both Response (downloads) and Request (auth) patterns
mock_cdp_session.send.assert_called_once_with(
"Fetch.enable",
{
"patterns": [{"requestStage": "Response"}],
"patterns": [
{"requestStage": "Response"},
{"urlPattern": "*", "requestStage": "Request"},
],
"handleAuthRequests": True,
},
)
# Verify both event handlers were registered
# Verify both handlers registered
event_names = [call.args[0] for call in mock_cdp_session.on.call_args_list]
assert "Fetch.requestPaused" in event_names
assert "Fetch.authRequired" in event_names
@pytest.mark.asyncio
async def test_enable_for_page_without_proxy_auth(self) -> None:
"""enable_for_page without credentials should set handleAuthRequests=False."""
"""enable_for_page without credentials should only intercept Response stage."""
interceptor = self._make_interceptor()
mock_cdp_session = self._make_cdp_session()
@ -407,7 +410,7 @@ class TestCDPDownloadInterceptorProxyAuth:
await interceptor.enable_for_page(mock_page)
# Verify Fetch.enable was called with handleAuthRequests=False
# Verify Fetch.enable with Response-only pattern, no auth
mock_cdp_session.send.assert_called_once_with(
"Fetch.enable",
{
@ -416,11 +419,49 @@ class TestCDPDownloadInterceptorProxyAuth:
},
)
# Verify only requestPaused handler was registered (not authRequired)
# Verify only requestPaused handler (no authRequired)
event_names = [call.args[0] for call in mock_cdp_session.on.call_args_list]
assert "Fetch.requestPaused" in event_names
assert "Fetch.authRequired" not in event_names
@pytest.mark.asyncio
async def test_request_stage_continues_request(self) -> None:
"""Request-stage events (no responseStatusCode) should be continued with Fetch.continueRequest."""
interceptor = self._make_interceptor(proxy_username="user", proxy_password="pass")
cdp_session = self._make_cdp_session()
event = {
"requestId": "req-1",
"request": {"url": "https://example.com/page"},
"resourceType": "Document",
# No responseStatusCode — this is a Request-stage event
}
await interceptor._handle_request_paused(event, cdp_session)
cdp_session.send.assert_called_once_with("Fetch.continueRequest", {"requestId": "req-1"})
@pytest.mark.asyncio
async def test_request_stage_error_does_not_retry(self) -> None:
"""Request-stage errors should not attempt recovery (no duplicate continueRequest)."""
interceptor = self._make_interceptor(proxy_username="user", proxy_password="pass")
cdp_session = self._make_cdp_session()
cdp_session.send.side_effect = Exception("continueRequest failed")
event = {
"requestId": "req-err",
"request": {"url": "https://example.com/page"},
"resourceType": "Document",
# No responseStatusCode — Request-stage event
}
await interceptor._handle_request_paused(event, cdp_session)
# Only one call: the original continueRequest that failed. No recovery attempt.
assert cdp_session.send.call_count == 1
assert cdp_session.send.call_args.args[0] == "Fetch.continueRequest"
@pytest.mark.asyncio
async def test_malformed_event_missing_request_id(self) -> None:
"""Malformed event without requestId should be caught, not raise."""