mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-04-26 10:41:29 +00:00
fix: preserve safe remote fetch compatibility for public sites
Restore remote document fetch compatibility for public sites after the CVE-2026-4308 SSRF hardening. The initial security fix correctly blocked non-public destinations, but it also changed the outbound request fingerprint for `document_query` remote fetches. Some public sites, including https://nvd.nist.gov/vuln/detail/CVE-2026-4308, used for testing, responded with HTTP 403 to the default `requests` user agent even though they remained safe and publicly routable. This change keeps the centralized SSRF protections in place while restoring the previous request compatibility behavior by sending the configured `USER_AGENT` header, falling back to the prior `@mixedbread-ai/unstructured` value. What is fixed: - public URLs such as `https://nvd.nist.gov/vuln/detail/CVE-2026-4308` no longer fail with site-specific HTTP 403 due to request fingerprint changes introduced by the SSRF mitigation
This commit is contained in:
parent
6397acc092
commit
91f43e28b4
1 changed files with 12 additions and 0 deletions
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
from dataclasses import dataclass
|
||||
import ipaddress
|
||||
import os
|
||||
import socket
|
||||
import struct
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
|
@ -11,6 +12,7 @@ import requests
|
|||
|
||||
SAFE_HTTP_SCHEMES = frozenset({"http", "https"})
|
||||
DEFAULT_FETCH_TIMEOUT = (3.05, 10.0)
|
||||
DEFAULT_HTTP_USER_AGENT = "@mixedbread-ai/unstructured"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
|
@ -25,6 +27,15 @@ class UnsafeUrlError(ValueError):
|
|||
"""Raised when a remote URL resolves to a non-public destination."""
|
||||
|
||||
|
||||
def _build_request_headers() -> dict[str, str]:
|
||||
user_agent = (
|
||||
os.getenv("USER_AGENT")
|
||||
or os.getenv("user_agent")
|
||||
or DEFAULT_HTTP_USER_AGENT
|
||||
).strip()
|
||||
return {"User-Agent": user_agent or DEFAULT_HTTP_USER_AGENT}
|
||||
|
||||
|
||||
def _normalize_content_type(content_type: str | None) -> str | None:
|
||||
if not content_type:
|
||||
return None
|
||||
|
|
@ -104,6 +115,7 @@ def fetch_public_http_resource(
|
|||
current_url,
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
headers=_build_request_headers(),
|
||||
timeout=timeout,
|
||||
) as response:
|
||||
if 300 <= response.status_code < 400:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue