agent-zero/plugins/_browser/helpers/url.py
Alessandro aa7944b95a Centralize Browser helper contracts
Move URL normalization into Agent Zero-owned Browser helper code and expose the content helper's required API contract from the shared asset. Normalize host-browser open/navigate payloads before they cross into the connector, including nested multi actions, and add regression coverage for helper payload delivery and URL edge cases.
2026-05-08 16:39:04 +02:00

55 lines
1.8 KiB
Python

from __future__ import annotations
import re
from urllib.parse import urlsplit, urlunsplit
from helpers.errors import RepairableException
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
_LOCAL_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$",
re.I,
)
_TYPED_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|"
r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$",
re.I,
)
def normalize_url(value: str) -> str:
raw = str(value or "").strip()
if not raw:
raise ValueError("Browser navigation requires a non-empty URL.")
if raw.startswith(("/", "?", "#", ".")):
raise RepairableException(
f"Browser navigation target {raw!r} is relative; provide a full URL with a scheme."
)
def with_trailing_path(url: str) -> str:
parts = urlsplit(url)
if parts.scheme in {"http", "https"} and not parts.path:
return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment))
return urlunsplit(parts)
try:
host = re.split(r"[/?#]", raw, maxsplit=1)[0] or ""
if (
not _URL_SCHEME_RE.match(raw)
and not _SPECIAL_SCHEME_RE.match(raw)
and not raw.startswith(("/", "?", "#", "."))
and not re.search(r"\s", raw)
and _TYPED_HOST_RE.match(host)
):
protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://"
return with_trailing_path(protocol + raw)
parts = urlsplit(raw)
if parts.scheme:
return with_trailing_path(raw)
except Exception:
pass
return with_trailing_path("https://" + raw)