mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-17 04:01:13 +00:00
Move URL normalization into Agent Zero-owned Browser helper code and expose the content helper's required API contract from the shared asset. Normalize host-browser open/navigate payloads before they cross into the connector, including nested multi actions, and add regression coverage for helper payload delivery and URL edge cases.
55 lines
1.8 KiB
Python
55 lines
1.8 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
|
|
from helpers.errors import RepairableException
|
|
|
|
|
|
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
|
|
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
|
|
_LOCAL_HOST_RE = re.compile(
|
|
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$",
|
|
re.I,
|
|
)
|
|
_TYPED_HOST_RE = re.compile(
|
|
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|"
|
|
r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$",
|
|
re.I,
|
|
)
|
|
|
|
|
|
def normalize_url(value: str) -> str:
|
|
raw = str(value or "").strip()
|
|
if not raw:
|
|
raise ValueError("Browser navigation requires a non-empty URL.")
|
|
if raw.startswith(("/", "?", "#", ".")):
|
|
raise RepairableException(
|
|
f"Browser navigation target {raw!r} is relative; provide a full URL with a scheme."
|
|
)
|
|
|
|
def with_trailing_path(url: str) -> str:
|
|
parts = urlsplit(url)
|
|
if parts.scheme in {"http", "https"} and not parts.path:
|
|
return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment))
|
|
return urlunsplit(parts)
|
|
|
|
try:
|
|
host = re.split(r"[/?#]", raw, maxsplit=1)[0] or ""
|
|
if (
|
|
not _URL_SCHEME_RE.match(raw)
|
|
and not _SPECIAL_SCHEME_RE.match(raw)
|
|
and not raw.startswith(("/", "?", "#", "."))
|
|
and not re.search(r"\s", raw)
|
|
and _TYPED_HOST_RE.match(host)
|
|
):
|
|
protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://"
|
|
return with_trailing_path(protocol + raw)
|
|
|
|
parts = urlsplit(raw)
|
|
if parts.scheme:
|
|
return with_trailing_path(raw)
|
|
except Exception:
|
|
pass
|
|
|
|
return with_trailing_path("https://" + raw)
|