diff --git a/plugins/_browser/assets/browser-page-content.js b/plugins/_browser/assets/browser-page-content.js index 5fd628bce..c1330d154 100644 --- a/plugins/_browser/assets/browser-page-content.js +++ b/plugins/_browser/assets/browser-page-content.js @@ -2,6 +2,37 @@ const GLOBAL_KEY = "__spaceBrowserPageContent__"; const DOM_HELPER_KEY = "__spaceBrowserDomHelper__"; const VERSION = "11"; + const REQUIRED_API_NAMES = Object.freeze([ + "annotate", + "boundingBoxFor", + "capture", + "click", + "detail", + "fileInputElementFor", + "fileInputFor", + "pointFor", + "scroll", + "select", + "setChecked", + "submit", + "type", + "typeSubmit" + ]); + + function patchOpenShadowDom() { + const original = Element.prototype.attachShadow; + if (!original || original.__a0BrowserOpenShadowPatch) { + return; + } + const patched = function attachShadow(options) { + return original.call(this, { ...(options || {}), mode: "open" }); + }; + patched.__a0BrowserOpenShadowPatch = true; + Element.prototype.attachShadow = patched; + } + + patchOpenShadowDom(); + const BLOCK_TAGS = new Set([ "ADDRESS", "ARTICLE", @@ -3958,6 +3989,11 @@ setChecked(referenceId, checked) { return setCheckedReference(referenceId, checked); }, + ready() { + const api = globalThis[GLOBAL_KEY]; + return Boolean(api && REQUIRED_API_NAMES.every((name) => typeof api[name] === "function")); + }, + requiredApis: REQUIRED_API_NAMES.slice(), version: VERSION }; })(); diff --git a/plugins/_browser/helpers/connector_runtime.py b/plugins/_browser/helpers/connector_runtime.py index 97a691c69..0639b5039 100644 --- a/plugins/_browser/helpers/connector_runtime.py +++ b/plugins/_browser/helpers/connector_runtime.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio import base64 import hashlib +import re import uuid from functools import lru_cache from pathlib import Path @@ -37,6 +38,7 @@ from plugins._browser.helpers.config import ( HOST_BROWSER_PRIVACY_POLICY_KEY, get_browser_config, ) +from plugins._browser.helpers.url import normalize_url BROWSER_OP_EVENT = "connector_browser_op" @@ -48,6 +50,10 @@ BASE64_DECODE_CHARS_PER_CHUNK = 64 * 1024 _LOCAL_PROVIDERS = {"ollama", "lm_studio"} _LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1", "host.docker.internal"} _SENSITIVE_ACTIONS = {"content", "detail", "evaluate", "screenshot", "screenshot_file"} +_REQUIRED_API_NAMES_RE = re.compile( + r"const\s+REQUIRED_API_NAMES\s*=\s*Object\.freeze\(\[(?P.*?)\]\);", + re.S, +) class ConnectorBrowserRuntime: @@ -76,12 +82,12 @@ class ConnectorBrowserRuntime: } if action == "open": - payload["url"] = args[0] if args else "" + payload["url"] = self._normalize_open_url(args[0] if args else "") elif action in {"state", "set_active", "back", "forward", "reload"}: payload["browser_id"] = args[0] if args else None elif action == "navigate": payload["browser_id"] = args[0] if args else None - payload["url"] = args[1] if len(args) > 1 else "" + payload["url"] = normalize_url(args[1] if len(args) > 1 else "") elif action == "screenshot_file": payload["action"] = "screenshot" payload["browser_id"] = args[0] if args else None @@ -145,7 +151,7 @@ class ConnectorBrowserRuntime: payload["ref"] = args[1] if len(args) > 1 else None payload.update(kwargs) elif action == "multi": - payload["calls"] = args[0] if args else [] + payload["calls"] = self._normalize_multi_calls(args[0] if args else []) elif action == "close_browser": payload["action"] = "close" payload["browser_id"] = args[0] if args else None @@ -156,6 +162,31 @@ class ConnectorBrowserRuntime: return payload + @staticmethod + def _normalize_open_url(value: Any) -> str: + raw = str(value or "").strip() + return normalize_url(raw) if raw else "" + + @classmethod + def _normalize_multi_calls(cls, calls: Any) -> Any: + if not isinstance(calls, list): + return calls + normalized_calls: list[Any] = [] + for call in calls: + if not isinstance(call, dict): + normalized_calls.append(call) + continue + normalized = dict(call) + action = str(normalized.get("action") or "").strip().lower().replace("-", "_") + if action == "open": + normalized["url"] = cls._normalize_open_url(normalized.get("url")) + elif action == "navigate": + normalized["url"] = normalize_url(normalized.get("url", "")) + elif action == "multi" or isinstance(normalized.get("calls"), list): + normalized["calls"] = cls._normalize_multi_calls(normalized.get("calls", [])) + normalized_calls.append(normalized) + return normalized_calls + async def _dispatch(self, payload: dict[str, Any]) -> Any: self._enforce_privacy(payload) sid = self._select_sid() @@ -350,7 +381,7 @@ class ConnectorBrowserRuntime: @lru_cache(maxsize=1) -def _content_helper_payload() -> dict[str, str]: +def _content_helper_payload() -> dict[str, Any]: try: source = CONTENT_HELPER_PATH.read_text(encoding="utf-8") except OSError as exc: @@ -358,13 +389,28 @@ def _content_helper_payload() -> dict[str, str]: f"Host-browser content helper could not be read from {CONTENT_HELPER_PATH}: {exc}" ) from exc return { + "required_apis": _content_helper_required_apis(source), "source": source, "sha256": hashlib.sha256(source.encode("utf-8")).hexdigest(), } def _content_helper_sha256() -> str: - return _content_helper_payload()["sha256"] + return str(_content_helper_payload()["sha256"]) + + +def _content_helper_required_apis(source: str) -> list[str]: + match = _REQUIRED_API_NAMES_RE.search(source) + if not match: + raise RuntimeError( + f"Host-browser content helper from {CONTENT_HELPER_PATH} does not declare REQUIRED_API_NAMES." + ) + names = re.findall(r'"([^"]+)"', match.group("body")) + if not names: + raise RuntimeError( + f"Host-browser content helper from {CONTENT_HELPER_PATH} declares no required API names." + ) + return names def _agent_uses_local_chat_model(agent: Any) -> bool: diff --git a/plugins/_browser/helpers/runtime.py b/plugins/_browser/helpers/runtime.py index 6ad90c301..ca135f67b 100644 --- a/plugins/_browser/helpers/runtime.py +++ b/plugins/_browser/helpers/runtime.py @@ -14,7 +14,6 @@ import uuid from dataclasses import dataclass from pathlib import Path from typing import Any -from urllib.parse import urlsplit, urlunsplit from helpers import files from helpers.defer import DeferredTask @@ -26,6 +25,7 @@ from plugins._browser.helpers.config import ( get_browser_config, ) from plugins._browser.helpers.playwright import configure_playwright_env, ensure_playwright_binary +from plugins._browser.helpers.url import normalize_url PLUGIN_DIR = Path(__file__).resolve().parents[1] @@ -275,17 +275,6 @@ CLIPBOARD_BRIDGE_SCRIPT = r""" } """ -_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I) -_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I) -_LOCAL_HOST_RE = re.compile( - r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$", - re.I, -) -_TYPED_HOST_RE = re.compile( - r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|" - r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$", - re.I, -) _SAFE_CONTEXT_RE = re.compile(r"[^a-zA-Z0-9_.-]+") @@ -301,38 +290,6 @@ def _nudged_viewport(viewport: dict[str, int]) -> dict[str, int]: return {"width": width, "height": height - 1} -def normalize_url(value: str) -> str: - raw = str(value or "").strip() - if not raw: - raise ValueError("Browser navigation requires a non-empty URL.") - - def with_trailing_path(url: str) -> str: - parts = urlsplit(url) - if parts.scheme in {"http", "https"} and not parts.path: - return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment)) - return urlunsplit(parts) - - try: - host = re.split(r"[/?#]", raw, maxsplit=1)[0] or "" - if ( - not _URL_SCHEME_RE.match(raw) - and not _SPECIAL_SCHEME_RE.match(raw) - and not raw.startswith(("/", "?", "#", ".")) - and not re.search(r"\s", raw) - and _TYPED_HOST_RE.match(host) - ): - protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://" - return with_trailing_path(protocol + raw) - - parts = urlsplit(raw) - if parts.scheme: - return with_trailing_path(raw) - except Exception: - pass - - return with_trailing_path("https://" + raw) - - def _safe_context_id(context_id: str) -> str: return _SAFE_CONTEXT_RE.sub("_", str(context_id or "default")).strip("._") or "default" @@ -816,7 +773,6 @@ class _BrowserRuntimeCore: self.context.set_default_navigation_timeout(30000) self.context.on("close", self._on_context_closed) self.context.on("page", self._on_new_page_sync) - await self.context.add_init_script(self._shadow_dom_script()) await self.context.add_init_script(path=str(CONTENT_HELPER_PATH)) for page in list(self.context.pages): @@ -2258,7 +2214,7 @@ class _BrowserRuntimeCore: async def _ensure_content_helper(self, page: Any) -> None: has_helper = await page.evaluate( - "() => Boolean(globalThis.__spaceBrowserPageContent__?.capture && globalThis.__spaceBrowserPageContent__?.annotate && globalThis.__spaceBrowserPageContent__?.boundingBoxFor && globalThis.__spaceBrowserPageContent__?.pointFor && globalThis.__spaceBrowserPageContent__?.select && globalThis.__spaceBrowserPageContent__?.setChecked && globalThis.__spaceBrowserPageContent__?.fileInputFor)" + "() => Boolean(globalThis.__spaceBrowserPageContent__?.ready?.())" ) if has_helper: return @@ -2266,22 +2222,6 @@ class _BrowserRuntimeCore: self._content_helper_source = CONTENT_HELPER_PATH.read_text(encoding="utf-8") await page.evaluate(self._content_helper_source) - @staticmethod - def _shadow_dom_script() -> str: - return """ -(() => { - const original = Element.prototype.attachShadow; - if (original && !original.__a0BrowserOpenShadowPatch) { - const patched = function attachShadow(options) { - return original.call(this, { ...(options || {}), mode: "open" }); - }; - patched.__a0BrowserOpenShadowPatch = true; - Element.prototype.attachShadow = patched; - } -})(); -""" - - _runtimes: dict[str, BrowserRuntime] = {} _runtime_lock = threading.RLock() diff --git a/plugins/_browser/helpers/url.py b/plugins/_browser/helpers/url.py new file mode 100644 index 000000000..4d23b34db --- /dev/null +++ b/plugins/_browser/helpers/url.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import re +from urllib.parse import urlsplit, urlunsplit + +from helpers.errors import RepairableException + + +_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I) +_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I) +_LOCAL_HOST_RE = re.compile( + r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$", + re.I, +) +_TYPED_HOST_RE = re.compile( + r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|" + r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$", + re.I, +) + + +def normalize_url(value: str) -> str: + raw = str(value or "").strip() + if not raw: + raise ValueError("Browser navigation requires a non-empty URL.") + if raw.startswith(("/", "?", "#", ".")): + raise RepairableException( + f"Browser navigation target {raw!r} is relative; provide a full URL with a scheme." + ) + + def with_trailing_path(url: str) -> str: + parts = urlsplit(url) + if parts.scheme in {"http", "https"} and not parts.path: + return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment)) + return urlunsplit(parts) + + try: + host = re.split(r"[/?#]", raw, maxsplit=1)[0] or "" + if ( + not _URL_SCHEME_RE.match(raw) + and not _SPECIAL_SCHEME_RE.match(raw) + and not raw.startswith(("/", "?", "#", ".")) + and not re.search(r"\s", raw) + and _TYPED_HOST_RE.match(host) + ): + protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://" + return with_trailing_path(protocol + raw) + + parts = urlsplit(raw) + if parts.scheme: + return with_trailing_path(raw) + except Exception: + pass + + return with_trailing_path("https://" + raw) diff --git a/tests/test_browser_agent_regressions.py b/tests/test_browser_agent_regressions.py index d302382fe..aa38469a1 100644 --- a/tests/test_browser_agent_regressions.py +++ b/tests/test_browser_agent_regressions.py @@ -79,6 +79,7 @@ sys.modules.setdefault("plugins._model_config.helpers.model_config", _model_conf def anyio_backend(): return "asyncio" +from helpers.errors import RepairableException from plugins._browser.helpers.config import ( build_browser_launch_config, get_browser_main_model_summary, @@ -134,6 +135,8 @@ def test_browser_url_normalization_matches_address_bar_hosts(): assert normalize_url("novinky.cz") == "https://novinky.cz/" assert normalize_url("https://example.com") == "https://example.com/" assert normalize_url("about:blank") == "about:blank" + with pytest.raises(RepairableException, match="relative"): + normalize_url("/docs") def test_browser_config_normalizes_extension_paths(tmp_path): @@ -1201,6 +1204,13 @@ def test_browser_content_helper_keeps_label_wrapped_controls_referenceable(): ).read_text(encoding="utf-8") assert 'const VERSION = "11"' in helper + assert "function patchOpenShadowDom" in helper + assert "Element.prototype.attachShadow = patched" in helper + assert "const REQUIRED_API_NAMES = Object.freeze([" in helper + assert "requiredApis: REQUIRED_API_NAMES.slice()" in helper + assert "ready()" in helper + for api_name in ("click", "scroll", "submit", "type", "typeSubmit"): + assert f'"{api_name}"' in helper assert "function renderControlLabelReferences" in helper assert "getLabelElementText(labelElement, element)" in helper assert "return renderControlLabelReferences(node, context);" in helper @@ -1212,7 +1222,7 @@ def test_browser_runtime_requires_current_content_helper_for_modifier_clicks(): PROJECT_ROOT / "plugins" / "_browser" / "helpers" / "runtime.py" ).read_text(encoding="utf-8") - assert "__spaceBrowserPageContent__?.boundingBoxFor" in runtime + assert "__spaceBrowserPageContent__?.ready?.()" in runtime @pytest.mark.anyio diff --git a/tests/test_host_browser_connector.py b/tests/test_host_browser_connector.py index cad22f3b8..fe5af96fb 100644 --- a/tests/test_host_browser_connector.py +++ b/tests/test_host_browser_connector.py @@ -196,6 +196,34 @@ def test_host_browser_privacy_blocks_cloud_content(monkeypatch): runtime._enforce_privacy({"action": "content"}) +def test_connector_runtime_normalizes_host_navigation_payloads(): + runtime = ConnectorBrowserRuntime("ctx-host", _agent("ctx-host")) + + open_payload = runtime._payload_for_call("open", "localhost:3000") + empty_open_payload = runtime._payload_for_call("open", "") + navigate_payload = runtime._payload_for_call("navigate", 7, "novinky.cz") + multi_payload = runtime._payload_for_call( + "multi", + [ + {"action": "open", "url": "example.com"}, + {"action": "navigate", "browser_id": 1, "url": "127.0.0.1:8000/path"}, + { + "action": "multi", + "calls": [{"action": "open", "url": "nested.example"}], + }, + {"action": "content", "browser_id": 1}, + ], + ) + + assert open_payload["url"] == "http://localhost:3000/" + assert empty_open_payload["url"] == "" + assert navigate_payload["url"] == "https://novinky.cz/" + assert multi_payload["calls"][0]["url"] == "https://example.com/" + assert multi_payload["calls"][1]["url"] == "http://127.0.0.1:8000/path" + assert multi_payload["calls"][2]["calls"][0]["url"] == "https://nested.example/" + assert multi_payload["calls"][3] == {"action": "content", "browser_id": 1} + + def test_host_browser_artifacts_materialize_inside_multi_results(monkeypatch, tmp_path): import plugins._browser.helpers.connector_runtime as connector_runtime_module @@ -326,6 +354,7 @@ def test_connector_runtime_ensures_preparable_host_browser_before_action(monkeyp assert result == {"id": 1, "state": {"runtime": "host"}} assert [payload["action"] for payload in emitted] == ["ensure", "open"] assert "__spaceBrowserPageContent__" in emitted[0]["content_helper"]["source"] + assert "capture" in emitted[0]["content_helper"]["required_apis"] assert emitted[0]["content_helper"]["sha256"] finally: ws_runtime.unregister_sid(sid)