agent-zero/plugins/_browser/helpers/runtime.py
Alessandro 370ac9b878 Make Browser dockable and stabilize canvas interaction
Extend Browser into a reusable panel that can run in either the Universal Canvas or the floating modal. Add canvas registration, dock/undock behavior, and keep the existing modal path working as a fallback.

Stabilize tab switching with viewer tokens and stale-frame rejection, prevent command snapshots from crossing active tabs, and keep tab changes responsive.

Improve canvas navigation and scrolling by making screencast polling non-blocking and removing page-settle waits from wheel input, so the visible frame updates promptly without stretch/catch-up artifacts.

Polish Browser busy feedback with a spinner-only status affordance to avoid misleading “updating browser” copy.
2026-04-26 17:09:21 +02:00

991 lines
35 KiB
Python

from __future__ import annotations
import atexit
import asyncio
import base64
import contextlib
import os
import re
import shutil
import signal
import threading
import time
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from urllib.parse import urlsplit, urlunsplit
from helpers import files
from helpers.defer import DeferredTask
from helpers.print_style import PrintStyle
from plugins._browser.helpers.config import build_browser_launch_config, get_browser_config
from plugins._browser.helpers.playwright import configure_playwright_env, ensure_playwright_binary
PLUGIN_DIR = Path(__file__).resolve().parents[1]
CONTENT_HELPER_PATH = PLUGIN_DIR / "assets" / "browser-page-content.js"
RUNTIME_DATA_KEY = "_browser_runtime"
DEFAULT_VIEWPORT = {"width": 1024, "height": 768}
CHROME_SINGLETON_FILES = ("SingletonLock", "SingletonCookie", "SingletonSocket")
SCREENCAST_MAX_WIDTH = 4096
SCREENCAST_MAX_HEIGHT = 4096
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
_LOCAL_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?$",
re.I,
)
_TYPED_HOST_RE = re.compile(
r"^(?:localhost|\[[0-9a-f:.]+\]|(?:\d{1,3}\.){3}\d{1,3}|"
r"(?:[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?\.)+[a-z\d-]{2,63})(?::\d+)?$",
re.I,
)
_SAFE_CONTEXT_RE = re.compile(r"[^a-zA-Z0-9_.-]+")
def normalize_url(value: str) -> str:
raw = str(value or "").strip()
if not raw:
raise ValueError("Browser navigation requires a non-empty URL.")
def with_trailing_path(url: str) -> str:
parts = urlsplit(url)
if parts.scheme in {"http", "https"} and not parts.path:
return urlunsplit((parts.scheme, parts.netloc, "/", parts.query, parts.fragment))
return urlunsplit(parts)
try:
host = re.split(r"[/?#]", raw, 1)[0] or ""
if (
not _URL_SCHEME_RE.match(raw)
and not _SPECIAL_SCHEME_RE.match(raw)
and not raw.startswith(("/", "?", "#", "."))
and not re.search(r"\s", raw)
and _TYPED_HOST_RE.match(host)
):
protocol = "http://" if _LOCAL_HOST_RE.match(host) else "https://"
return with_trailing_path(protocol + raw)
parts = urlsplit(raw)
if parts.scheme:
return with_trailing_path(raw)
except Exception:
pass
return with_trailing_path("https://" + raw)
def _safe_context_id(context_id: str) -> str:
return _SAFE_CONTEXT_RE.sub("_", str(context_id or "default")).strip("._") or "default"
@dataclass
class BrowserPage:
id: int
page: Any
class _BrowserScreencast:
def __init__(
self,
*,
stream_id: str,
browser_id: int,
session: Any,
mime: str,
):
self.id = stream_id
self.browser_id = browser_id
self.session = session
self.mime = mime
self.queue = asyncio.Queue(maxsize=1)
self.stopped = False
self._ack_tasks: set[asyncio.Task] = set()
self._expected_width = 0
self._expected_height = 0
self._dimension_mismatches = 0
async def start(
self,
*,
quality: int,
every_nth_frame: int,
viewport: dict[str, int],
) -> None:
self.session.on("Page.screencastFrame", self._on_frame)
width = max(320, min(4096, int(viewport.get("width") or DEFAULT_VIEWPORT["width"])))
height = max(200, min(4096, int(viewport.get("height") or DEFAULT_VIEWPORT["height"])))
self._expected_width = width
self._expected_height = height
self._dimension_mismatches = 0
with contextlib.suppress(Exception):
await self.session.send("Page.enable")
await self.session.send(
"Emulation.setDeviceMetricsOverride",
{
"width": width,
"height": height,
"deviceScaleFactor": 1,
"mobile": False,
"dontSetVisibleSize": True,
},
)
with contextlib.suppress(Exception):
await self.session.send(
"Emulation.setVisibleSize",
{
"width": width,
"height": height,
},
)
await self.session.send(
"Page.startScreencast",
{
"format": "jpeg",
"quality": max(20, min(95, int(quality))),
"maxWidth": SCREENCAST_MAX_WIDTH,
"maxHeight": SCREENCAST_MAX_HEIGHT,
"everyNthFrame": max(1, int(every_nth_frame)),
},
)
async def next_frame(self, timeout: float = 1.0) -> dict[str, Any]:
frame = await asyncio.wait_for(self.queue.get(), timeout=max(0.1, float(timeout)))
if frame is None:
raise RuntimeError("Browser screencast stopped.")
return frame
async def pop_frame(self) -> dict[str, Any] | None:
try:
frame = self.queue.get_nowait()
except asyncio.QueueEmpty:
return None
if frame is None:
raise RuntimeError("Browser screencast stopped.")
return frame
async def stop(self) -> None:
if self.stopped:
return
self.stopped = True
self._drop_queued_frames()
with contextlib.suppress(asyncio.QueueFull):
self.queue.put_nowait(None)
with contextlib.suppress(Exception):
await self.session.send("Page.stopScreencast")
for task in list(self._ack_tasks):
task.cancel()
if self._ack_tasks:
await asyncio.gather(*self._ack_tasks, return_exceptions=True)
self._ack_tasks.clear()
with contextlib.suppress(Exception):
await self.session.detach()
def _on_frame(self, params: dict[str, Any]) -> None:
if self.stopped:
return
task = asyncio.create_task(self._handle_frame(params or {}))
self._ack_tasks.add(task)
task.add_done_callback(self._ack_tasks.discard)
async def _handle_frame(self, params: dict[str, Any]) -> None:
try:
data = params.get("data") or ""
if data and self._frame_matches_viewport(data):
self._queue_latest(
{
"browser_id": self.browser_id,
"mime": self.mime,
"image": data,
"metadata": params.get("metadata") or {},
}
)
finally:
session_id = params.get("sessionId")
if session_id is not None and not self.stopped:
with contextlib.suppress(Exception):
await self.session.send(
"Page.screencastFrameAck",
{"sessionId": int(session_id)},
)
def _queue_latest(self, frame: dict[str, Any]) -> None:
self._drop_queued_frames()
with contextlib.suppress(asyncio.QueueFull):
self.queue.put_nowait(frame)
def _frame_matches_viewport(self, data: str) -> bool:
if not self._expected_width or not self._expected_height:
return True
size = self._jpeg_size(data)
if not size:
return True
width, height = size
if abs(width - self._expected_width) <= 2 and abs(height - self._expected_height) <= 2:
return True
self._dimension_mismatches += 1
return self._dimension_mismatches > 10
@staticmethod
def _jpeg_size(data: str) -> tuple[int, int] | None:
try:
raw = base64.b64decode(data, validate=False)
except Exception:
return None
if len(raw) < 10 or raw[:2] != b"\xff\xd8":
return None
index = 2
standalone_markers = {0x01, *range(0xD0, 0xD8)}
size_markers = {
0xC0,
0xC1,
0xC2,
0xC3,
0xC5,
0xC6,
0xC7,
0xC9,
0xCA,
0xCB,
0xCD,
0xCE,
0xCF,
}
while index < len(raw) - 9:
if raw[index] != 0xFF:
index += 1
continue
while index < len(raw) and raw[index] == 0xFF:
index += 1
if index >= len(raw):
return None
marker = raw[index]
index += 1
if marker in standalone_markers:
continue
if index + 2 > len(raw):
return None
segment_length = int.from_bytes(raw[index : index + 2], "big")
if segment_length < 2 or index + segment_length > len(raw):
return None
if marker in size_markers and segment_length >= 7:
height = int.from_bytes(raw[index + 3 : index + 5], "big")
width = int.from_bytes(raw[index + 5 : index + 7], "big")
return width, height
index += segment_length
return None
def _drop_queued_frames(self) -> None:
while True:
try:
self.queue.get_nowait()
except asyncio.QueueEmpty:
return
class BrowserRuntime:
def __init__(self, context_id: str):
self.context_id = str(context_id)
self._core = _BrowserRuntimeCore(self.context_id)
self._worker = DeferredTask(thread_name=f"BrowserRuntime-{self.context_id}")
self._closed = False
async def call(self, method: str, *args: Any, **kwargs: Any) -> Any:
if self._closed and method != "close":
raise RuntimeError("Browser runtime is closed.")
async def runner():
fn = getattr(self._core, method)
return await fn(*args, **kwargs)
return await self._worker.execute_inside(runner)
async def close(self, delete_profile: bool = False) -> None:
if self._closed:
return
try:
await self.call("close", delete_profile=delete_profile)
finally:
self._closed = True
self._worker.kill(terminate_thread=True)
class _BrowserRuntimeCore:
def __init__(self, context_id: str):
self.context_id = context_id
self.safe_context_id = _safe_context_id(context_id)
self.playwright = None
self.context = None
self.pages: dict[int, BrowserPage] = {}
self.screencasts: dict[str, _BrowserScreencast] = {}
self.next_browser_id = 1
self.last_interacted_browser_id: int | None = None
self._content_helper_source: str | None = None
self._start_lock: asyncio.Lock | None = None
@property
def profile_dir(self) -> Path:
return Path(files.get_abs_path("tmp/browser/sessions", self.safe_context_id))
@property
def downloads_dir(self) -> Path:
return Path(files.get_abs_path("usr/downloads/browser"))
async def ensure_started(self) -> None:
if self.context:
return
if self._start_lock is None:
self._start_lock = asyncio.Lock()
async with self._start_lock:
if self.context:
return
await self._start()
async def _start(self) -> None:
from playwright.async_api import async_playwright
self.profile_dir.mkdir(parents=True, exist_ok=True)
self.downloads_dir.mkdir(parents=True, exist_ok=True)
self._release_orphaned_profile_singleton()
browser_config = get_browser_config()
launch_config = build_browser_launch_config(browser_config)
configure_playwright_env()
browser_binary = ensure_playwright_binary(
full_browser=launch_config["requires_full_browser"]
)
self.playwright = await async_playwright().start()
launch_kwargs: dict[str, Any] = {
"user_data_dir": str(self.profile_dir),
"headless": True,
"accept_downloads": True,
"downloads_path": str(self.downloads_dir),
"viewport": DEFAULT_VIEWPORT,
"screen": DEFAULT_VIEWPORT,
"no_viewport": False,
"args": launch_config["args"],
}
if launch_config["channel"]:
launch_kwargs["channel"] = launch_config["channel"]
else:
launch_kwargs["executable_path"] = str(browser_binary)
try:
self.context = await self.playwright.chromium.launch_persistent_context(
**launch_kwargs
)
except Exception:
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
raise
self.context.set_default_timeout(30000)
self.context.set_default_navigation_timeout(30000)
await self.context.add_init_script(self._shadow_dom_script())
await self.context.add_init_script(path=str(CONTENT_HELPER_PATH))
for page in list(self.context.pages):
if page.url == "about:blank":
try:
await page.close()
except Exception:
pass
continue
self._register_page(page)
def _release_orphaned_profile_singleton(self) -> None:
lock_path = self.profile_dir / "SingletonLock"
owner_pid = self._profile_singleton_owner_pid(lock_path)
if owner_pid and self._process_owns_profile(owner_pid):
PrintStyle.warning(
f"Stopping orphaned Chromium process {owner_pid} for Browser profile {self.safe_context_id}."
)
self._terminate_process(owner_pid)
for name in CHROME_SINGLETON_FILES:
singleton_path = self.profile_dir / name
try:
if singleton_path.exists() or singleton_path.is_symlink():
singleton_path.unlink()
except OSError as exc:
PrintStyle.warning(f"Could not remove stale Browser profile lock {singleton_path}: {exc}")
@staticmethod
def _profile_singleton_owner_pid(lock_path: Path) -> int | None:
try:
target = os.readlink(lock_path)
except OSError:
return None
raw_pid = target.rsplit("-", 1)[-1]
if not raw_pid.isdigit():
return None
return int(raw_pid)
def _process_owns_profile(self, pid: int) -> bool:
cmdline_path = Path("/proc") / str(pid) / "cmdline"
try:
raw = cmdline_path.read_bytes()
except OSError:
return False
cmdline = raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore")
return "chrome" in cmdline.lower() and str(self.profile_dir) in cmdline
@staticmethod
def _terminate_process(pid: int) -> None:
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
return
except OSError as exc:
PrintStyle.warning(f"Could not stop orphaned Chromium process {pid}: {exc}")
return
deadline = time.monotonic() + 3
while time.monotonic() < deadline:
if not Path("/proc", str(pid)).exists():
return
time.sleep(0.1)
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
except OSError as exc:
PrintStyle.warning(f"Could not force-stop orphaned Chromium process {pid}: {exc}")
async def open(self, url: str = "about:blank") -> dict[str, Any]:
await self.ensure_started()
page = await self.context.new_page()
browser_page = self._register_page(page)
self.last_interacted_browser_id = browser_page.id
if url and url != "about:blank":
await self._goto(page, normalize_url(url))
else:
await self._settle(page)
return {"id": browser_page.id, "state": await self._state(browser_page.id)}
async def list(self) -> dict[str, Any]:
await self.ensure_started()
return {
"browsers": [await self._state(browser_id) for browser_id in sorted(self.pages)],
"last_interacted_browser_id": self.last_interacted_browser_id,
}
async def state(self, browser_id: int | str | None = None) -> dict[str, Any]:
await self.ensure_started()
return await self._state(self._resolve_browser_id(browser_id))
async def navigate(self, browser_id: int | str | None, url: str) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await self._goto(page, normalize_url(url))
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def back(self, browser_id: int | str | None = None) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await page.go_back(wait_until="domcontentloaded", timeout=10000)
await self._settle(page)
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def forward(self, browser_id: int | str | None = None) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await page.go_forward(wait_until="domcontentloaded", timeout=10000)
await self._settle(page)
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def reload(self, browser_id: int | str | None = None) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await page.reload(wait_until="domcontentloaded", timeout=15000)
await self._settle(page)
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def content(
self,
browser_id: int | str | None = None,
payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await self._ensure_content_helper(page)
result = await page.evaluate(
"(payload) => globalThis.__spaceBrowserPageContent__.capture(payload || null)",
payload or None,
)
self.last_interacted_browser_id = resolved_id
return result or {}
async def detail(self, browser_id: int | str | None, reference_id: int | str) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await self._ensure_content_helper(page)
result = await page.evaluate(
"(ref) => globalThis.__spaceBrowserPageContent__.detail(ref)",
reference_id,
)
self.last_interacted_browser_id = resolved_id
return result or {}
async def evaluate(self, browser_id: int | str | None, script: str) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
result = await page.evaluate(str(script or "undefined"))
self.last_interacted_browser_id = resolved_id
return {"result": result, "state": await self._state(resolved_id)}
async def click(self, browser_id: int | str | None, reference_id: int | str) -> dict[str, Any]:
return await self._reference_action("click", browser_id, reference_id)
async def submit(self, browser_id: int | str | None, reference_id: int | str) -> dict[str, Any]:
return await self._reference_action("submit", browser_id, reference_id)
async def scroll(self, browser_id: int | str | None, reference_id: int | str) -> dict[str, Any]:
return await self._reference_action("scroll", browser_id, reference_id)
async def type(
self,
browser_id: int | str | None,
reference_id: int | str,
text: str,
) -> dict[str, Any]:
return await self._reference_action("type", browser_id, reference_id, text)
async def type_submit(
self,
browser_id: int | str | None,
reference_id: int | str,
text: str,
) -> dict[str, Any]:
return await self._reference_action("typeSubmit", browser_id, reference_id, text)
async def close_browser(self, browser_id: int | str | None = None) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
await self._stop_screencasts_for_browser(resolved_id)
page = self._page(resolved_id)
await page.close()
self.pages.pop(resolved_id, None)
if self.last_interacted_browser_id == resolved_id:
self.last_interacted_browser_id = next(iter(sorted(self.pages)), None)
return await self.list()
async def close_all_browsers(self) -> dict[str, Any]:
await self.ensure_started()
await self._stop_all_screencasts()
for browser_id in list(self.pages):
try:
await self.pages[browser_id].page.close()
except Exception:
pass
self.pages.clear()
self.last_interacted_browser_id = None
return {"browsers": [], "last_interacted_browser_id": None}
async def screenshot(
self,
browser_id: int | str | None = None,
*,
quality: int = 70,
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
image = await page.screenshot(type="jpeg", quality=max(20, min(95, int(quality))))
return {
"browser_id": resolved_id,
"mime": "image/jpeg",
"image": base64.b64encode(image).decode("ascii"),
"state": await self._state(resolved_id),
}
async def start_screencast(
self,
browser_id: int | str | None = None,
*,
quality: int = 78,
every_nth_frame: int = 1,
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
stream_id = uuid.uuid4().hex
session = await self.context.new_cdp_session(page)
screencast = _BrowserScreencast(
stream_id=stream_id,
browser_id=resolved_id,
session=session,
mime="image/jpeg",
)
self.screencasts[stream_id] = screencast
try:
await screencast.start(
quality=quality,
every_nth_frame=every_nth_frame,
viewport=page.viewport_size or DEFAULT_VIEWPORT,
)
except Exception:
self.screencasts.pop(stream_id, None)
await screencast.stop()
raise
self.last_interacted_browser_id = resolved_id
return {
"stream_id": stream_id,
"browser_id": resolved_id,
"state": await self._state(resolved_id),
}
async def read_screencast_frame(
self,
stream_id: str,
*,
timeout: float = 1.0,
) -> dict[str, Any]:
screencast = self.screencasts.get(str(stream_id or ""))
if not screencast:
raise KeyError("Browser screencast is not active.")
return await screencast.next_frame(timeout=timeout)
async def pop_screencast_frame(self, stream_id: str) -> dict[str, Any] | None:
screencast = self.screencasts.get(str(stream_id or ""))
if not screencast:
raise KeyError("Browser screencast is not active.")
return await screencast.pop_frame()
async def stop_screencast(self, stream_id: str) -> None:
screencast = self.screencasts.pop(str(stream_id or ""), None)
if screencast:
await screencast.stop()
async def set_viewport(
self,
browser_id: int | str | None,
width: int,
height: int,
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
viewport = {
"width": max(320, min(4096, int(width or DEFAULT_VIEWPORT["width"]))),
"height": max(200, min(4096, int(height or DEFAULT_VIEWPORT["height"]))),
}
current_viewport = page.viewport_size or {}
changed = (
int(current_viewport.get("width") or 0) != viewport["width"]
or int(current_viewport.get("height") or 0) != viewport["height"]
)
if changed:
await page.set_viewport_size(viewport)
await self._stop_screencasts_for_browser(resolved_id)
await self._settle(page, short=True)
self.last_interacted_browser_id = resolved_id
return {"state": await self._state(resolved_id), "viewport": viewport}
async def mouse(
self,
browser_id: int | str | None,
event_type: str,
x: float,
y: float,
button: str = "left",
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
event_type = str(event_type or "click").lower()
if event_type == "move":
await page.mouse.move(float(x), float(y))
elif event_type == "down":
await page.mouse.down(button=button)
elif event_type == "up":
await page.mouse.up(button=button)
else:
await page.mouse.click(float(x), float(y), button=button)
await self._settle(page, short=True)
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def wheel(
self,
browser_id: int | str | None,
x: float,
y: float,
delta_x: float = 0,
delta_y: float = 0,
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await page.mouse.move(float(x), float(y))
await page.mouse.wheel(float(delta_x), float(delta_y))
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def keyboard(
self,
browser_id: int | str | None,
*,
key: str = "",
text: str = "",
) -> dict[str, Any]:
await self.ensure_started()
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
if text:
await page.keyboard.type(str(text))
elif key:
await page.keyboard.press(str(key))
await self._settle(page, short=True)
self.last_interacted_browser_id = resolved_id
return await self._state(resolved_id)
async def close(self, delete_profile: bool = False) -> None:
await self._stop_all_screencasts()
for browser_id in list(self.pages):
try:
await self.pages[browser_id].page.close()
except Exception:
pass
self.pages.clear()
if self.context:
try:
await self.context.close()
except Exception as exc:
PrintStyle.warning(f"Browser context close failed: {exc}")
self.context = None
if self.playwright:
try:
await self.playwright.stop()
except Exception as exc:
PrintStyle.warning(f"Playwright stop failed: {exc}")
self.playwright = None
self.last_interacted_browser_id = None
if delete_profile:
shutil.rmtree(self.profile_dir, ignore_errors=True)
async def _reference_action(
self,
helper_method: str,
browser_id: int | str | None,
reference_id: int | str,
text: str | None = None,
) -> dict[str, Any]:
resolved_id = self._resolve_browser_id(browser_id)
page = self._page(resolved_id)
await self._ensure_content_helper(page)
if text is None:
action = await page.evaluate(
"(args) => globalThis.__spaceBrowserPageContent__[args.method](args.ref)",
{"method": helper_method, "ref": reference_id},
)
else:
action = await page.evaluate(
"(args) => globalThis.__spaceBrowserPageContent__[args.method](args.ref, args.text)",
{"method": helper_method, "ref": reference_id, "text": text},
)
await self._settle(page, short=False)
self.last_interacted_browser_id = resolved_id
return {"action": action or {}, "state": await self._state(resolved_id)}
async def _goto(self, page: Any, url: str) -> None:
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
try:
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
except PlaywrightTimeoutError:
PrintStyle.warning(f"Browser navigation timed out after DOM handoff: {url}")
await self._settle(page)
async def _settle(self, page: Any, short: bool = False) -> None:
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
try:
await page.wait_for_load_state(
"domcontentloaded",
timeout=1000 if short else 5000,
)
except PlaywrightTimeoutError:
pass
await asyncio.sleep(0.1 if short else 0.35)
async def _state(self, browser_id: int) -> dict[str, Any]:
browser_page = self.pages.get(int(browser_id))
if not browser_page:
raise KeyError(f"Browser {browser_id} is not open.")
page = browser_page.page
try:
title = await page.title()
except Exception:
title = ""
try:
history_length = await page.evaluate("() => globalThis.history?.length || 0")
except Exception:
history_length = 0
return {
"id": browser_page.id,
"currentUrl": page.url,
"title": title,
"canGoBack": bool(history_length and int(history_length) > 1),
"canGoForward": False,
"loading": False,
}
def _register_page(self, page: Any) -> BrowserPage:
existing = self._browser_id_for_page(page)
if existing is not None:
return self.pages[existing]
browser_id = self.next_browser_id
self.next_browser_id += 1
browser_page = BrowserPage(id=browser_id, page=page)
self.pages[browser_id] = browser_page
def on_close() -> None:
self.pages.pop(browser_id, None)
page.on("close", on_close)
return browser_page
def _browser_id_for_page(self, page: Any) -> int | None:
for browser_id, browser_page in self.pages.items():
if browser_page.page == page:
return browser_id
return None
def _resolve_browser_id(self, browser_id: int | str | None = None) -> int:
if browser_id is None or str(browser_id).strip() == "":
if self.last_interacted_browser_id in self.pages:
return int(self.last_interacted_browser_id)
if self.pages:
return sorted(self.pages)[0]
raise KeyError("No browser is open. Use action=open first.")
value = str(browser_id).strip()
if value.startswith("browser-"):
value = value.split("-", 1)[1]
resolved = int(value)
if resolved not in self.pages:
raise KeyError(f"Browser {resolved} is not open.")
return resolved
def _page(self, browser_id: int) -> Any:
return self.pages[int(browser_id)].page
async def _stop_screencasts_for_browser(self, browser_id: int) -> None:
stream_ids = [
stream_id
for stream_id, screencast in self.screencasts.items()
if screencast.browser_id == int(browser_id)
]
for stream_id in stream_ids:
await self.stop_screencast(stream_id)
async def _stop_all_screencasts(self) -> None:
for stream_id in list(self.screencasts):
await self.stop_screencast(stream_id)
async def _ensure_content_helper(self, page: Any) -> None:
has_helper = await page.evaluate(
"() => Boolean(globalThis.__spaceBrowserPageContent__?.capture)"
)
if has_helper:
return
if self._content_helper_source is None:
self._content_helper_source = CONTENT_HELPER_PATH.read_text(encoding="utf-8")
await page.evaluate(self._content_helper_source)
@staticmethod
def _shadow_dom_script() -> str:
return """
(() => {
const original = Element.prototype.attachShadow;
if (original && !original.__a0BrowserOpenShadowPatch) {
const patched = function attachShadow(options) {
return original.call(this, { ...(options || {}), mode: "open" });
};
patched.__a0BrowserOpenShadowPatch = true;
Element.prototype.attachShadow = patched;
}
})();
"""
_runtimes: dict[str, BrowserRuntime] = {}
_runtime_lock = threading.RLock()
async def get_runtime(context_id: str, *, create: bool = True) -> BrowserRuntime | None:
context_id = str(context_id or "").strip()
if not context_id:
raise ValueError("context_id is required")
with _runtime_lock:
runtime = _runtimes.get(context_id)
if runtime is None and create:
runtime = BrowserRuntime(context_id)
_runtimes[context_id] = runtime
return runtime
async def close_runtime(context_id: str, *, delete_profile: bool = True) -> None:
context_id = str(context_id or "").strip()
if not context_id:
return
with _runtime_lock:
runtime = _runtimes.pop(context_id, None)
if runtime:
await runtime.close(delete_profile=delete_profile)
def close_runtime_sync(context_id: str, *, delete_profile: bool = True) -> None:
task = DeferredTask(thread_name="BrowserCleanup")
task.start_task(close_runtime, context_id, delete_profile=delete_profile)
try:
task.result_sync(timeout=30)
finally:
task.kill(terminate_thread=True)
async def close_all_runtimes(*, delete_profiles: bool = False) -> None:
with _runtime_lock:
runtimes = list(_runtimes.values())
_runtimes.clear()
for runtime in runtimes:
try:
await runtime.close(delete_profile=delete_profiles)
except Exception as exc:
PrintStyle.warning(f"Browser runtime cleanup failed: {exc}")
def close_all_runtimes_sync() -> None:
task = DeferredTask(thread_name="BrowserCleanupAll")
task.start_task(close_all_runtimes, delete_profiles=False)
try:
task.result_sync(timeout=30)
finally:
task.kill(terminate_thread=True)
def known_context_ids() -> list[str]:
with _runtime_lock:
return sorted(_runtimes)
atexit.register(close_all_runtimes_sync)