Centralize Browser helper contracts

Move URL normalization into Agent Zero-owned Browser helper code and expose the content helper's required API contract from the shared asset. Normalize host-browser open/navigate payloads before they cross into the connector, including nested multi actions, and add regression coverage for helper payload delivery and URL edge cases.
This commit is contained in:
Alessandro 2026-05-08 16:39:04 +02:00
parent c020f1af28
commit aa7944b95a
6 changed files with 184 additions and 68 deletions

View file

@ -2,6 +2,37 @@
const GLOBAL_KEY = "__spaceBrowserPageContent__";
const DOM_HELPER_KEY = "__spaceBrowserDomHelper__";
const VERSION = "11";
const REQUIRED_API_NAMES = Object.freeze([
"annotate",
"boundingBoxFor",
"capture",
"click",
"detail",
"fileInputElementFor",
"fileInputFor",
"pointFor",
"scroll",
"select",
"setChecked",
"submit",
"type",
"typeSubmit"
]);
function patchOpenShadowDom() {
const original = Element.prototype.attachShadow;
if (!original || original.__a0BrowserOpenShadowPatch) {
return;
}
const patched = function attachShadow(options) {
return original.call(this, { ...(options || {}), mode: "open" });
};
patched.__a0BrowserOpenShadowPatch = true;
Element.prototype.attachShadow = patched;
}
patchOpenShadowDom();
const BLOCK_TAGS = new Set([
"ADDRESS",
"ARTICLE",
@ -3958,6 +3989,11 @@
setChecked(referenceId, checked) {
return setCheckedReference(referenceId, checked);
},
ready() {
const api = globalThis[GLOBAL_KEY];
return Boolean(api && REQUIRED_API_NAMES.every((name) => typeof api[name] === "function"));
},
requiredApis: REQUIRED_API_NAMES.slice(),
version: VERSION
};
})();

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio
import base64
import hashlib
import re
import uuid
from functools import lru_cache
from pathlib import Path
@ -37,6 +38,7 @@ from plugins._browser.helpers.config import (
HOST_BROWSER_PRIVACY_POLICY_KEY,
get_browser_config,
)
from plugins._browser.helpers.url import normalize_url
BROWSER_OP_EVENT = "connector_browser_op"
@ -48,6 +50,10 @@ BASE64_DECODE_CHARS_PER_CHUNK = 64 * 1024
_LOCAL_PROVIDERS = {"ollama", "lm_studio"}
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
_SENSITIVE_ACTIONS = {"content", "detail", "evaluate", "screenshot", "screenshot_file"}
_REQUIRED_API_NAMES_RE = re.compile(
r"const\s+REQUIRED_API_NAMES\s*=\s*Object\.freeze\(\[(?P<body>.*?)\]\);",
re.S,
)
class ConnectorBrowserRuntime:
@ -76,12 +82,12 @@ class ConnectorBrowserRuntime:
}
if action == "open":
payload["url"] = args[0] if args else ""
payload["url"] = self._normalize_open_url(args[0] if args else "")
elif action in {"state", "set_active", "back", "forward", "reload"}:
payload["browser_id"] = args[0] if args else None
elif action == "navigate":
payload["browser_id"] = args[0] if args else None
payload["url"] = args[1] if len(args) > 1 else ""
payload["url"] = normalize_url(args[1] if len(args) > 1 else "")
elif action == "screenshot_file":
payload["action"] = "screenshot"
payload["browser_id"] = args[0] if args else None
@ -145,7 +151,7 @@ class ConnectorBrowserRuntime:
payload["ref"] = args[1] if len(args) > 1 else None
payload.update(kwargs)
elif action == "multi":
payload["calls"] = args[0] if args else []
payload["calls"] = self._normalize_multi_calls(args[0] if args else [])
elif action == "close_browser":
payload["action"] = "close"
payload["browser_id"] = args[0] if args else None
@ -156,6 +162,31 @@ class ConnectorBrowserRuntime:
return payload
@staticmethod
def _normalize_open_url(value: Any) -> str:
raw = str(value or "").strip()
return normalize_url(raw) if raw else ""
@classmethod
def _normalize_multi_calls(cls, calls: Any) -> Any:
if not isinstance(calls, list):
return calls
normalized_calls: list[Any] = []
for call in calls:
if not isinstance(call, dict):
normalized_calls.append(call)
continue
normalized = dict(call)
action = str(normalized.get("action") or "").strip().lower().replace("-", "_")
if action == "open":
normalized["url"] = cls._normalize_open_url(normalized.get("url"))
elif action == "navigate":
normalized["url"] = normalize_url(normalized.get("url", ""))
elif action == "multi" or isinstance(normalized.get("calls"), list):
normalized["calls"] = cls._normalize_multi_calls(normalized.get("calls", []))
normalized_calls.append(normalized)
return normalized_calls
async def _dispatch(self, payload: dict[str, Any]) -> Any:
self._enforce_privacy(payload)
sid = self._select_sid()
@ -350,7 +381,7 @@ class ConnectorBrowserRuntime:
@lru_cache(maxsize=1)
def _content_helper_payload() -> dict[str, str]:
def _content_helper_payload() -> dict[str, Any]:
try:
source = CONTENT_HELPER_PATH.read_text(encoding="utf-8")
except OSError as exc:
@ -358,13 +389,28 @@ def _content_helper_payload() -> dict[str, str]:
f"Host-browser content helper could not be read from {CONTENT_HELPER_PATH}: {exc}"
) from exc
return {
"required_apis": _content_helper_required_apis(source),
"source": source,
"sha256": hashlib.sha256(source.encode("utf-8")).hexdigest(),
}
def _content_helper_sha256() -> str:
return _content_helper_payload()["sha256"]
return str(_content_helper_payload()["sha256"])
def _content_helper_required_apis(source: str) -> list[str]:
match = _REQUIRED_API_NAMES_RE.search(source)
if not match:
raise RuntimeError(
f"Host-browser content helper from {CONTENT_HELPER_PATH} does not declare REQUIRED_API_NAMES."
)
names = re.findall(r'"([^"]+)"', match.group("body"))
if not names:
raise RuntimeError(
f"Host-browser content helper from {CONTENT_HELPER_PATH} declares no required API names."
)
return names
def _agent_uses_local_chat_model(agent: Any) -> bool:

View file

@ -14,7 +14,6 @@ import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from urllib.parse import urlsplit, urlunsplit
from helpers import files
from helpers.defer import DeferredTask
@ -26,6 +25,7 @@ from plugins._browser.helpers.config import (
get_browser_config,
)
from plugins._browser.helpers.playwright import configure_playwright_env, ensure_playwright_binary
from plugins._browser.helpers.url import normalize_url
PLUGIN_DIR = Path(__file__).resolve().parents[1]
@ -275,17 +275,6 @@ CLIPBOARD_BRIDGE_SCRIPT = r"""
}
"""
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
_LOCAL_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$",
re.I,
)
_TYPED_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|"
r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$",
re.I,
)
_SAFE_CONTEXT_RE = re.compile(r"[^a-zA-Z0-9_.-]+")
@ -301,38 +290,6 @@ def _nudged_viewport(viewport: dict[str, int]) -> dict[str, int]:
return {"width": width, "height": height - 1}
def normalize_url(value: str) -> str:
raw = str(value or "").strip()
if not raw:
raise ValueError("Browser navigation requires a non-empty URL.")
def with_trailing_path(url: str) -> str:
parts = urlsplit(url)
if parts.scheme in {"http", "https"} and not parts.path:
return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment))
return urlunsplit(parts)
try:
host = re.split(r"[/?#]", raw, maxsplit=1)[0] or ""
if (
not _URL_SCHEME_RE.match(raw)
and not _SPECIAL_SCHEME_RE.match(raw)
and not raw.startswith(("/", "?", "#", "."))
and not re.search(r"\s", raw)
and _TYPED_HOST_RE.match(host)
):
protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://"
return with_trailing_path(protocol + raw)
parts = urlsplit(raw)
if parts.scheme:
return with_trailing_path(raw)
except Exception:
pass
return with_trailing_path("https://" + raw)
def _safe_context_id(context_id: str) -> str:
return _SAFE_CONTEXT_RE.sub("_", str(context_id or "default")).strip("._") or "default"
@ -816,7 +773,6 @@ class _BrowserRuntimeCore:
self.context.set_default_navigation_timeout(30000)
self.context.on("close", self._on_context_closed)
self.context.on("page", self._on_new_page_sync)
await self.context.add_init_script(self._shadow_dom_script())
await self.context.add_init_script(path=str(CONTENT_HELPER_PATH))
for page in list(self.context.pages):
@ -2258,7 +2214,7 @@ class _BrowserRuntimeCore:
async def _ensure_content_helper(self, page: Any) -> None:
has_helper = await page.evaluate(
"() => Boolean(globalThis.__spaceBrowserPageContent__?.capture && globalThis.__spaceBrowserPageContent__?.annotate && globalThis.__spaceBrowserPageContent__?.boundingBoxFor && globalThis.__spaceBrowserPageContent__?.pointFor && globalThis.__spaceBrowserPageContent__?.select && globalThis.__spaceBrowserPageContent__?.setChecked && globalThis.__spaceBrowserPageContent__?.fileInputFor)"
"() => Boolean(globalThis.__spaceBrowserPageContent__?.ready?.())"
)
if has_helper:
return
@ -2266,22 +2222,6 @@ class _BrowserRuntimeCore:
self._content_helper_source = CONTENT_HELPER_PATH.read_text(encoding="utf-8")
await page.evaluate(self._content_helper_source)
@staticmethod
def _shadow_dom_script() -> str:
return """
(() => {
const original = Element.prototype.attachShadow;
if (original && !original.__a0BrowserOpenShadowPatch) {
const patched = function attachShadow(options) {
return original.call(this, { ...(options || {}), mode: "open" });
};
patched.__a0BrowserOpenShadowPatch = true;
Element.prototype.attachShadow = patched;
}
})();
"""
_runtimes: dict[str, BrowserRuntime] = {}
_runtime_lock = threading.RLock()

View file

@ -0,0 +1,55 @@
from __future__ import annotations
import re
from urllib.parse import urlsplit, urlunsplit
from helpers.errors import RepairableException
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
_LOCAL_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$",
re.I,
)
_TYPED_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|"
r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$",
re.I,
)
def normalize_url(value: str) -> str:
raw = str(value or "").strip()
if not raw:
raise ValueError("Browser navigation requires a non-empty URL.")
if raw.startswith(("/", "?", "#", ".")):
raise RepairableException(
f"Browser navigation target {raw!r} is relative; provide a full URL with a scheme."
)
def with_trailing_path(url: str) -> str:
parts = urlsplit(url)
if parts.scheme in {"http", "https"} and not parts.path:
return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment))
return urlunsplit(parts)
try:
host = re.split(r"[/?#]", raw, maxsplit=1)[0] or ""
if (
not _URL_SCHEME_RE.match(raw)
and not _SPECIAL_SCHEME_RE.match(raw)
and not raw.startswith(("/", "?", "#", "."))
and not re.search(r"\s", raw)
and _TYPED_HOST_RE.match(host)
):
protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://"
return with_trailing_path(protocol + raw)
parts = urlsplit(raw)
if parts.scheme:
return with_trailing_path(raw)
except Exception:
pass
return with_trailing_path("https://" + raw)

View file

@ -79,6 +79,7 @@ sys.modules.setdefault("plugins._model_config.helpers.model_config", _model_conf
def anyio_backend():
return "asyncio"
from helpers.errors import RepairableException
from plugins._browser.helpers.config import (
build_browser_launch_config,
get_browser_main_model_summary,
@ -134,6 +135,8 @@ def test_browser_url_normalization_matches_address_bar_hosts():
assert normalize_url("novinky.cz") == "https://novinky.cz/"
assert normalize_url("https://example.com") == "https://example.com/"
assert normalize_url("about:blank") == "about:blank"
with pytest.raises(RepairableException, match="relative"):
normalize_url("/docs")
def test_browser_config_normalizes_extension_paths(tmp_path):
@ -1201,6 +1204,13 @@ def test_browser_content_helper_keeps_label_wrapped_controls_referenceable():
).read_text(encoding="utf-8")
assert 'const VERSION = "11"' in helper
assert "function patchOpenShadowDom" in helper
assert "Element.prototype.attachShadow = patched" in helper
assert "const REQUIRED_API_NAMES = Object.freeze([" in helper
assert "requiredApis: REQUIRED_API_NAMES.slice()" in helper
assert "ready()" in helper
for api_name in ("click", "scroll", "submit", "type", "typeSubmit"):
assert f'"{api_name}"' in helper
assert "function renderControlLabelReferences" in helper
assert "getLabelElementText(labelElement, element)" in helper
assert "return renderControlLabelReferences(node, context);" in helper
@ -1212,7 +1222,7 @@ def test_browser_runtime_requires_current_content_helper_for_modifier_clicks():
PROJECT_ROOT / "plugins" / "_browser" / "helpers" / "runtime.py"
).read_text(encoding="utf-8")
assert "__spaceBrowserPageContent__?.boundingBoxFor" in runtime
assert "__spaceBrowserPageContent__?.ready?.()" in runtime
@pytest.mark.anyio

View file

@ -196,6 +196,34 @@ def test_host_browser_privacy_blocks_cloud_content(monkeypatch):
runtime._enforce_privacy({"action": "content"})
def test_connector_runtime_normalizes_host_navigation_payloads():
runtime = ConnectorBrowserRuntime("ctx-host", _agent("ctx-host"))
open_payload = runtime._payload_for_call("open", "localhost:3000")
empty_open_payload = runtime._payload_for_call("open", "")
navigate_payload = runtime._payload_for_call("navigate", 7, "novinky.cz")
multi_payload = runtime._payload_for_call(
"multi",
[
{"action": "open", "url": "example.com"},
{"action": "navigate", "browser_id": 1, "url": "127.0.0.1:8000/path"},
{
"action": "multi",
"calls": [{"action": "open", "url": "nested.example"}],
},
{"action": "content", "browser_id": 1},
],
)
assert open_payload["url"] == "http://localhost:3000/"
assert empty_open_payload["url"] == ""
assert navigate_payload["url"] == "https://novinky.cz/"
assert multi_payload["calls"][0]["url"] == "https://example.com/"
assert multi_payload["calls"][1]["url"] == "http://127.0.0.1:8000/path"
assert multi_payload["calls"][2]["calls"][0]["url"] == "https://nested.example/"
assert multi_payload["calls"][3] == {"action": "content", "browser_id": 1}
def test_host_browser_artifacts_materialize_inside_multi_results(monkeypatch, tmp_path):
import plugins._browser.helpers.connector_runtime as connector_runtime_module
@ -326,6 +354,7 @@ def test_connector_runtime_ensures_preparable_host_browser_before_action(monkeyp
assert result == {"id": 1, "state": {"runtime": "host"}}
assert [payload["action"] for payload in emitted] == ["ensure", "open"]
assert "__spaceBrowserPageContent__" in emitted[0]["content_helper"]["source"]
assert "capture" in emitted[0]["content_helper"]["required_apis"]
assert emitted[0]["content_helper"]["sha256"]
finally:
ws_runtime.unregister_sid(sid)