mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-18 23:45:49 +00:00
Store and surface host-browser preparation and CDP endpoint metadata from A0 CLI. Let Browser runtime prepare candidate CLIs before the first action, and keep host-required errors more actionable. Simplify Host Browser settings language and document the Chrome remote-debugging consent flow.
393 lines
16 KiB
Python
393 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import base64
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from urllib.parse import urlparse
|
|
|
|
from helpers import files
|
|
|
|
try:
|
|
from helpers.ws import NAMESPACE
|
|
except Exception:
|
|
NAMESPACE = "/ws"
|
|
|
|
try:
|
|
from helpers.ws_manager import ConnectionNotFoundError, get_shared_ws_manager
|
|
except Exception:
|
|
class ConnectionNotFoundError(RuntimeError):
|
|
pass
|
|
|
|
def get_shared_ws_manager():
|
|
raise ConnectionNotFoundError("WebSocket manager is unavailable")
|
|
|
|
from plugins._a0_connector.helpers.ws_runtime import (
|
|
clear_pending_browser_op,
|
|
host_browser_metadata_for_context,
|
|
host_browser_metadata_for_sid,
|
|
select_host_browser_candidate_sid,
|
|
select_host_browser_target_sid,
|
|
store_pending_browser_op,
|
|
)
|
|
from plugins._browser.helpers.config import (
|
|
HOST_BROWSER_PRIVACY_POLICY_KEY,
|
|
get_browser_config,
|
|
)
|
|
|
|
|
|
BROWSER_OP_EVENT = "connector_browser_op"
|
|
BROWSER_OP_TIMEOUT = 120.0
|
|
HOST_BROWSER_SCREENSHOT_DIR = ("tmp", "browser", "host-screenshots")
|
|
MAX_ARTIFACT_SIZE_BYTES = 25 * 1024 * 1024
|
|
BASE64_DECODE_CHARS_PER_CHUNK = 64 * 1024
|
|
_LOCAL_PROVIDERS = {"ollama", "lm_studio"}
|
|
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
|
|
_SENSITIVE_ACTIONS = {"content", "detail", "evaluate", "screenshot", "screenshot_file"}
|
|
|
|
|
|
class ConnectorBrowserRuntime:
|
|
def __init__(self, context_id: str, agent: Any):
|
|
self.context_id = str(context_id or "").strip()
|
|
self.agent = agent
|
|
|
|
async def call(self, method: str, *args: Any, **kwargs: Any) -> Any:
|
|
payload = self._payload_for_call(method, *args, **kwargs)
|
|
warning = self._privacy_warning(payload)
|
|
result = await self._dispatch(payload)
|
|
result = self._materialize_artifact(result)
|
|
if warning:
|
|
if isinstance(result, dict):
|
|
result.setdefault("privacy_warning", warning)
|
|
else:
|
|
result = {"result": result, "privacy_warning": warning}
|
|
return result
|
|
|
|
def _payload_for_call(self, method: str, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
action = str(method or "").strip().lower().replace("-", "_")
|
|
payload: dict[str, Any] = {
|
|
"op_id": str(uuid.uuid4()),
|
|
"context_id": self.context_id,
|
|
"action": action,
|
|
}
|
|
|
|
if action == "open":
|
|
payload["url"] = args[0] if args else ""
|
|
elif action in {"state", "set_active", "back", "forward", "reload"}:
|
|
payload["browser_id"] = args[0] if args else None
|
|
elif action == "navigate":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["url"] = args[1] if len(args) > 1 else ""
|
|
elif action == "screenshot_file":
|
|
payload["action"] = "screenshot"
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["quality"] = kwargs.get("quality", 80)
|
|
payload["full_page"] = kwargs.get("full_page", False)
|
|
payload["path"] = kwargs.get("path", "")
|
|
elif action == "list":
|
|
payload["include_content"] = kwargs.get("include_content", False)
|
|
elif action == "content":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["payload"] = args[1] if len(args) > 1 and isinstance(args[1], dict) else None
|
|
elif action == "detail":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["ref"] = args[1] if len(args) > 1 else None
|
|
elif action == "evaluate":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["script"] = args[1] if len(args) > 1 else ""
|
|
elif action == "click":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["ref"] = args[1] if len(args) > 1 else None
|
|
payload["modifiers"] = kwargs.get("modifiers")
|
|
payload["focus_popup"] = kwargs.get("focus_popup")
|
|
elif action in {"type", "submit", "type_submit", "scroll"}:
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["ref"] = args[1] if len(args) > 1 else None
|
|
if action in {"type", "type_submit"}:
|
|
payload["text"] = args[2] if len(args) > 2 else ""
|
|
elif action in {"hover", "double_click", "right_click", "drag"}:
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload.update(kwargs)
|
|
elif action == "wheel":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["x"] = args[1] if len(args) > 1 else 0
|
|
payload["y"] = args[2] if len(args) > 2 else 0
|
|
payload["delta_x"] = args[3] if len(args) > 3 else 0
|
|
payload["delta_y"] = args[4] if len(args) > 4 else 0
|
|
elif action == "mouse":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["event_type"] = args[1] if len(args) > 1 else "click"
|
|
payload["x"] = args[2] if len(args) > 2 else 0
|
|
payload["y"] = args[3] if len(args) > 3 else 0
|
|
payload["button"] = kwargs.get("button", args[4] if len(args) > 4 else "left")
|
|
payload["modifiers"] = kwargs.get("modifiers")
|
|
elif action == "keyboard":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["key"] = kwargs.get("key", "")
|
|
payload["text"] = kwargs.get("text", "")
|
|
elif action == "key_chord":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["keys"] = args[1] if len(args) > 1 else []
|
|
elif action == "clipboard":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["clipboard_action"] = kwargs.get("action", "")
|
|
payload["text"] = kwargs.get("text", "")
|
|
elif action == "set_viewport":
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["width"] = args[1] if len(args) > 1 else 0
|
|
payload["height"] = args[2] if len(args) > 2 else 0
|
|
elif action in {"select_option", "set_checked", "upload_file"}:
|
|
payload["browser_id"] = args[0] if args else None
|
|
payload["ref"] = args[1] if len(args) > 1 else None
|
|
payload.update(kwargs)
|
|
elif action == "multi":
|
|
payload["calls"] = args[0] if args else []
|
|
elif action == "close_browser":
|
|
payload["action"] = "close"
|
|
payload["browser_id"] = args[0] if args else None
|
|
elif action == "close_all_browsers":
|
|
payload["action"] = "close_all"
|
|
else:
|
|
payload.update(kwargs)
|
|
|
|
return payload
|
|
|
|
async def _dispatch(self, payload: dict[str, Any]) -> Any:
|
|
self._enforce_privacy(payload)
|
|
sid = self._select_sid()
|
|
if not sid:
|
|
statuses = host_browser_metadata_for_context(self.context_id)
|
|
detail = self._format_statuses(statuses)
|
|
raise RuntimeError(
|
|
"Host browser is required but no subscribed A0 CLI advertises host-browser support"
|
|
+ (f": {detail}" if detail else ".")
|
|
)
|
|
|
|
if self._needs_prepare(sid, payload):
|
|
await self._send_browser_op(
|
|
sid,
|
|
{
|
|
"op_id": str(uuid.uuid4()),
|
|
"context_id": self.context_id,
|
|
"action": "ensure",
|
|
},
|
|
)
|
|
sid = self._select_sid() or sid
|
|
|
|
return await self._send_browser_op(sid, payload)
|
|
|
|
async def _send_browser_op(self, sid: str, payload: dict[str, Any]) -> Any:
|
|
op_id = str(payload["op_id"])
|
|
loop = asyncio.get_running_loop()
|
|
future: asyncio.Future[dict[str, Any]] = loop.create_future()
|
|
store_pending_browser_op(
|
|
op_id,
|
|
sid=sid,
|
|
future=future,
|
|
loop=loop,
|
|
context_id=self.context_id,
|
|
)
|
|
try:
|
|
await get_shared_ws_manager().emit_to(
|
|
NAMESPACE,
|
|
sid,
|
|
BROWSER_OP_EVENT,
|
|
payload,
|
|
handler_id=f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
)
|
|
response = await asyncio.wait_for(future, timeout=BROWSER_OP_TIMEOUT)
|
|
except ConnectionNotFoundError as exc:
|
|
raise RuntimeError(
|
|
"The selected A0 CLI disconnected before the host browser request could be delivered."
|
|
) from exc
|
|
except asyncio.TimeoutError as exc:
|
|
raise RuntimeError(
|
|
f"Timed out waiting for A0 CLI host browser action={payload.get('action')!r}."
|
|
) from exc
|
|
finally:
|
|
clear_pending_browser_op(op_id)
|
|
|
|
if not isinstance(response, dict):
|
|
raise RuntimeError(f"Unexpected host browser response: {response!r}")
|
|
if not response.get("ok"):
|
|
raise RuntimeError(str(response.get("error") or "Host browser operation failed"))
|
|
return response.get("result")
|
|
|
|
def _select_sid(self) -> str | None:
|
|
return (
|
|
select_host_browser_target_sid(self.context_id)
|
|
or select_host_browser_candidate_sid(self.context_id)
|
|
)
|
|
|
|
def _needs_prepare(self, sid: str, payload: dict[str, Any]) -> bool:
|
|
action = str(payload.get("action") or "").strip().lower().replace("-", "_")
|
|
if action in {"status", "ensure"}:
|
|
return False
|
|
metadata = host_browser_metadata_for_sid(sid) or {}
|
|
return not (
|
|
metadata.get("enabled")
|
|
and str(metadata.get("status") or "").strip() in {"ready", "active"}
|
|
)
|
|
|
|
def _enforce_privacy(self, payload: dict[str, Any]) -> None:
|
|
policy = str(
|
|
get_browser_config(agent=self.agent).get(HOST_BROWSER_PRIVACY_POLICY_KEY)
|
|
or "enforce_local"
|
|
).strip()
|
|
if not self._payload_is_sensitive(payload) or policy != "enforce_local":
|
|
return
|
|
if _agent_uses_local_chat_model(self.agent):
|
|
return
|
|
raise RuntimeError(
|
|
"Host-browser content is blocked by Browser privacy policy. "
|
|
"Switch this project to a local chat model, or change Browser settings from "
|
|
"enforce_local to warn/allow."
|
|
)
|
|
|
|
def _privacy_warning(self, payload: dict[str, Any]) -> str:
|
|
policy = str(
|
|
get_browser_config(agent=self.agent).get(HOST_BROWSER_PRIVACY_POLICY_KEY)
|
|
or "enforce_local"
|
|
).strip()
|
|
if policy != "warn" or not self._payload_is_sensitive(payload):
|
|
return ""
|
|
if _agent_uses_local_chat_model(self.agent):
|
|
return ""
|
|
return (
|
|
"Browser privacy policy is warn: host-browser content was returned while "
|
|
"the active chat model does not appear local."
|
|
)
|
|
|
|
def _payload_is_sensitive(self, payload: dict[str, Any]) -> bool:
|
|
action = str(payload.get("action") or "").strip().lower().replace("-", "_")
|
|
if action in _SENSITIVE_ACTIONS:
|
|
return True
|
|
if action == "list" and bool(payload.get("include_content")):
|
|
return True
|
|
if action == "multi":
|
|
calls = payload.get("calls")
|
|
if isinstance(calls, list):
|
|
return any(
|
|
self._payload_is_sensitive(call)
|
|
for call in calls
|
|
if isinstance(call, dict)
|
|
)
|
|
return False
|
|
|
|
def _materialize_artifact(self, result: Any) -> Any:
|
|
if isinstance(result, list):
|
|
materialized_list = []
|
|
for item in result:
|
|
if isinstance(item, dict) and isinstance(item.get("result"), dict):
|
|
next_item = dict(item)
|
|
next_item["result"] = self._materialize_artifact(next_item["result"])
|
|
materialized_list.append(next_item)
|
|
else:
|
|
materialized_list.append(item)
|
|
return materialized_list
|
|
if not isinstance(result, dict):
|
|
return result
|
|
artifact = result.get("artifact")
|
|
if not isinstance(artifact, dict):
|
|
return result
|
|
if str(artifact.get("encoding", "")).lower() != "base64":
|
|
return result
|
|
data = str(artifact.get("data") or "")
|
|
if not data:
|
|
return result
|
|
estimated_size = _estimated_base64_decoded_size(data)
|
|
if estimated_size > MAX_ARTIFACT_SIZE_BYTES:
|
|
raise RuntimeError(
|
|
"Host browser artifact is too large to materialize safely "
|
|
f"({estimated_size} bytes, limit {MAX_ARTIFACT_SIZE_BYTES} bytes)."
|
|
)
|
|
filename = _safe_filename(str(artifact.get("filename") or "host-browser.jpg"))
|
|
target_dir = Path(files.get_abs_path(*HOST_BROWSER_SCREENSHOT_DIR, self.context_id))
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
target_path = target_dir / filename
|
|
try:
|
|
_write_base64_to_path(data, target_path)
|
|
except Exception as exc:
|
|
target_path.unlink(missing_ok=True)
|
|
raise RuntimeError("Host browser artifact could not be decoded.") from exc
|
|
materialized = dict(result)
|
|
materialized.pop("artifact", None)
|
|
local_path = str(target_path)
|
|
materialized["path"] = local_path
|
|
materialized["a0_path"] = files.normalize_a0_path(local_path)
|
|
materialized["vision_load"] = {
|
|
"tool_name": "vision_load",
|
|
"tool_args": {"paths": [local_path]},
|
|
}
|
|
return materialized
|
|
|
|
@staticmethod
|
|
def _format_statuses(statuses: list[dict[str, Any]]) -> str:
|
|
parts = []
|
|
for status in statuses:
|
|
parts.append(
|
|
f"sid={status.get('sid')} status={status.get('status')} "
|
|
f"supported={status.get('supported')} can_prepare={status.get('can_prepare')} "
|
|
f"enabled={status.get('enabled')} "
|
|
f"reason={status.get('support_reason') or 'none'}"
|
|
)
|
|
return "; ".join(parts)
|
|
|
|
|
|
def _agent_uses_local_chat_model(agent: Any) -> bool:
|
|
try:
|
|
from plugins._model_config.helpers import model_config
|
|
|
|
cfg = model_config.get_chat_model_config(agent)
|
|
except Exception:
|
|
cfg = {}
|
|
if not isinstance(cfg, dict):
|
|
return False
|
|
provider = str(cfg.get("provider", "") or "").strip().lower()
|
|
if provider in _LOCAL_PROVIDERS:
|
|
return True
|
|
api_base = str(cfg.get("api_base", "") or cfg.get("base_url", "") or "").strip()
|
|
if not api_base:
|
|
kwargs = cfg.get("kwargs")
|
|
if isinstance(kwargs, dict):
|
|
api_base = str(kwargs.get("api_base", "") or kwargs.get("base_url", "") or "").strip()
|
|
return _api_base_is_local(api_base)
|
|
|
|
|
|
def _api_base_is_local(api_base: str) -> bool:
|
|
if not api_base:
|
|
return False
|
|
parsed = urlparse(api_base if "://" in api_base else f"http://{api_base}")
|
|
hostname = (parsed.hostname or "").strip().lower()
|
|
return hostname in _LOCAL_HOSTS
|
|
|
|
|
|
def _safe_filename(value: str) -> str:
|
|
cleaned = "".join(char if char.isalnum() or char in {"-", "_", "."} else "_" for char in value)
|
|
cleaned = cleaned.strip("._") or f"host-browser-{uuid.uuid4().hex}.jpg"
|
|
if "." not in cleaned:
|
|
cleaned += ".jpg"
|
|
return cleaned
|
|
|
|
|
|
def _estimated_base64_decoded_size(data: str) -> int:
|
|
compact_length = sum(1 for char in data if not char.isspace())
|
|
return (compact_length * 3) // 4
|
|
|
|
|
|
def _write_base64_to_path(data: str, target_path: Path) -> None:
|
|
pending = ""
|
|
with target_path.open("wb") as target:
|
|
for offset in range(0, len(data), BASE64_DECODE_CHARS_PER_CHUNK):
|
|
chunk = pending + "".join(
|
|
char
|
|
for char in data[offset : offset + BASE64_DECODE_CHARS_PER_CHUNK]
|
|
if not char.isspace()
|
|
)
|
|
ready_length = (len(chunk) // 4) * 4
|
|
if ready_length:
|
|
target.write(base64.b64decode(chunk[:ready_length], validate=True))
|
|
pending = chunk[ready_length:]
|
|
if pending:
|
|
target.write(base64.b64decode(pending, validate=True))
|