From dccf017d2cf74bbec67151d03ba3e4282ea08c6b Mon Sep 17 00:00:00 2001 From: Alessandro <155005371+3clyp50@users.noreply.github.com> Date: Sun, 26 Apr 2026 02:28:59 +0200 Subject: [PATCH] Redesign Browser viewer screencast transport and viewport fit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the Browser viewer’s screenshot polling with CDP screencast streaming for much smoother navigation. The runtime now starts/stops CDP screencasts cleanly, acknowledges frames, drops stale frames, and keeps the WebSocket payload compatible with the existing viewer. Also fixes modal viewport sizing by sending the initial stage dimensions on subscribe, applying CDP emulation sizing before the first frame, avoiding image stretching, and increasing screencast JPEG quality to 92. Regression coverage was added for the screencast path, frame ack/drop behavior, viewport sizing, and UI rendering assumptions. -- Still needs thorough performance audit and optimization -- --- plugins/_browser/api/ws_browser.py | 187 ++++++++++++---- plugins/_browser/helpers/runtime.py | 271 +++++++++++++++++++++++- plugins/_browser/webui/browser-store.js | 115 ++++++---- plugins/_browser/webui/main.html | 38 ++-- tests/test_browser_agent_regressions.py | 117 +++++++++- 5 files changed, 636 insertions(+), 92 deletions(-) diff --git a/plugins/_browser/api/ws_browser.py b/plugins/_browser/api/ws_browser.py index 2dc7002a5..6ac7039e5 100644 --- a/plugins/_browser/api/ws_browser.py +++ b/plugins/_browser/api/ws_browser.py @@ -1,6 +1,8 @@ from __future__ import annotations import asyncio +import contextlib +import time from typing import Any, ClassVar from agent import AgentContext @@ -9,6 +11,12 @@ from helpers.ws_manager import WsResult from plugins._browser.helpers.runtime import get_runtime +FRAME_IDLE_TIMEOUT_SECONDS = 0.35 +FRAME_RETRY_DELAY_SECONDS = 0.5 +FRAME_STATE_REFRESH_SECONDS = 0.75 +SCREENCAST_QUALITY = 92 + + class WsBrowser(WsHandler): _streams: ClassVar[dict[tuple[str, str], asyncio.Task[None]]] = {} @@ -57,9 +65,17 @@ class WsBrowser(WsHandler): browsers = listing.get("browsers") or [] if opened.get("id"): listing["last_interacted_browser_id"] = opened.get("id") - active_id = data.get("browser_id") or listing.get("last_interacted_browser_id") - if not active_id and browsers: - active_id = browsers[0].get("id") + active_id = self._active_browser_id(listing, data.get("browser_id")) + initial_viewport = self._viewport_from_data(data) + if active_id and initial_viewport: + await runtime.call( + "set_viewport", + active_id, + initial_viewport["width"], + initial_viewport["height"], + ) + listing = await runtime.call("list") + browsers = listing.get("browsers") or [] stream_key = (sid, context_id) existing = self._streams.pop(stream_key, None) @@ -187,46 +203,145 @@ class WsBrowser(WsHandler): context_id: str, browser_id: int | str | None, ) -> None: + runtime = None + stream_id = None while True: try: runtime = await get_runtime(context_id, create=False) - if runtime: - listing = await runtime.call("list") - browsers = listing.get("browsers") or [] - browser_ids = {str(browser.get("id")) for browser in browsers} - requested_id = str(browser_id or "") if browser_id else "" - active_id = ( - browser_id - if requested_id and requested_id in browser_ids - else listing.get("last_interacted_browser_id") - ) - if active_id and str(active_id) not in browser_ids: - active_id = None - if not active_id and browsers: - active_id = browsers[0].get("id") - if active_id: - frame = await runtime.call("screenshot", active_id) - frame["context_id"] = context_id - frame["browsers"] = browsers - await self.emit_to(sid, "browser_viewer_frame", frame) - else: - await self.emit_to( - sid, - "browser_viewer_frame", - { - "context_id": context_id, - "browser_id": None, - "browsers": browsers, - "image": "", - "mime": "", - "state": None, - }, + if not runtime: + await self._emit_empty_frame(sid, context_id) + await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS) + continue + + listing = await runtime.call("list") + browsers = listing.get("browsers") or [] + active_id = self._active_browser_id(listing, browser_id) + if not active_id: + await self._emit_empty_frame(sid, context_id, browsers=browsers) + await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS) + continue + + screencast = await runtime.call( + "start_screencast", + active_id, + quality=SCREENCAST_QUALITY, + every_nth_frame=1, + ) + stream_id = screencast["stream_id"] + active_id = screencast["browser_id"] + state = screencast.get("state") + await self.emit_to( + sid, + "browser_viewer_frame", + { + "context_id": context_id, + "browser_id": active_id, + "browsers": browsers, + "image": "", + "mime": "", + "state": state, + }, + ) + + last_state_refresh = 0.0 + while True: + now = time.monotonic() + if now - last_state_refresh >= FRAME_STATE_REFRESH_SECONDS: + listing = await runtime.call("list") + browsers = listing.get("browsers") or [] + browser_ids = {str(browser.get("id")) for browser in browsers} + if str(active_id) not in browser_ids: + break + state = self._state_for_browser(browsers, active_id, state) + last_state_refresh = now + + try: + frame = await runtime.call( + "read_screencast_frame", + stream_id, + timeout=FRAME_IDLE_TIMEOUT_SECONDS, ) - await asyncio.sleep(0.75) + except TimeoutError: + continue + + frame["context_id"] = context_id + frame["browser_id"] = active_id + frame["browsers"] = browsers + frame["state"] = state + await self.emit_to(sid, "browser_viewer_frame", frame) except asyncio.CancelledError: raise except Exception: - await asyncio.sleep(1.5) + await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS) + finally: + if runtime and stream_id: + with contextlib.suppress(Exception): + await runtime.call("stop_screencast", stream_id) + stream_id = None + + @staticmethod + def _active_browser_id( + listing: dict[str, Any], + requested_browser_id: int | str | None, + ) -> int | str | None: + browsers = listing.get("browsers") or [] + browser_ids = {str(browser.get("id")) for browser in browsers} + requested_id = str(requested_browser_id or "") if requested_browser_id else "" + active_id = ( + requested_browser_id + if requested_id and requested_id in browser_ids + else listing.get("last_interacted_browser_id") + ) + if active_id and str(active_id) not in browser_ids: + active_id = None + if not active_id and browsers: + active_id = browsers[0].get("id") + return active_id + + @staticmethod + def _state_for_browser( + browsers: list[dict[str, Any]], + browser_id: int | str, + current_state: dict[str, Any] | None, + ) -> dict[str, Any] | None: + for browser in browsers: + if str(browser.get("id")) == str(browser_id): + return browser + return current_state + + async def _emit_empty_frame( + self, + sid: str, + context_id: str, + *, + browsers: list[dict[str, Any]] | None = None, + ) -> None: + await self.emit_to( + sid, + "browser_viewer_frame", + { + "context_id": context_id, + "browser_id": None, + "browsers": browsers or [], + "image": "", + "mime": "", + "state": None, + }, + ) + + @staticmethod + def _viewport_from_data(data: dict[str, Any]) -> dict[str, int] | None: + try: + width = int(data.get("viewport_width") or data.get("width") or 0) + height = int(data.get("viewport_height") or data.get("height") or 0) + except (TypeError, ValueError): + return None + if width < 80 or height < 80: + return None + return { + "width": max(320, min(4096, width)), + "height": max(200, min(4096, height)), + } @staticmethod def _context_id(data: dict[str, Any]) -> str: diff --git a/plugins/_browser/helpers/runtime.py b/plugins/_browser/helpers/runtime.py index a54f13908..e72b48e87 100644 --- a/plugins/_browser/helpers/runtime.py +++ b/plugins/_browser/helpers/runtime.py @@ -3,12 +3,14 @@ from __future__ import annotations import atexit import asyncio import base64 +import contextlib import os import re import shutil import signal import threading import time +import uuid from dataclasses import dataclass from pathlib import Path from typing import Any @@ -27,6 +29,8 @@ CONTENT_HELPER_PATH = PLUGIN_DIR / "assets" / "browser-page-content.js" RUNTIME_DATA_KEY = "_browser_runtime" DEFAULT_VIEWPORT = {"width": 1024, "height": 768} CHROME_SINGLETON_FILES = ("SingletonLock", "SingletonCookie", "SingletonSocket") +SCREENCAST_MAX_WIDTH = 4096 +SCREENCAST_MAX_HEIGHT = 4096 _SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I) _URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I) @@ -84,6 +88,195 @@ class BrowserPage: page: Any +class _BrowserScreencast: + def __init__( + self, + *, + stream_id: str, + browser_id: int, + session: Any, + mime: str, + ): + self.id = stream_id + self.browser_id = browser_id + self.session = session + self.mime = mime + self.queue = asyncio.Queue(maxsize=1) + self.stopped = False + self._ack_tasks: set[asyncio.Task] = set() + self._expected_width = 0 + self._expected_height = 0 + self._dimension_mismatches = 0 + + async def start( + self, + *, + quality: int, + every_nth_frame: int, + viewport: dict[str, int], + ) -> None: + self.session.on("Page.screencastFrame", self._on_frame) + width = max(320, min(4096, int(viewport.get("width") or DEFAULT_VIEWPORT["width"]))) + height = max(200, min(4096, int(viewport.get("height") or DEFAULT_VIEWPORT["height"]))) + self._expected_width = width + self._expected_height = height + self._dimension_mismatches = 0 + with contextlib.suppress(Exception): + await self.session.send("Page.enable") + await self.session.send( + "Emulation.setDeviceMetricsOverride", + { + "width": width, + "height": height, + "deviceScaleFactor": 1, + "mobile": False, + "dontSetVisibleSize": True, + }, + ) + with contextlib.suppress(Exception): + await self.session.send( + "Emulation.setVisibleSize", + { + "width": width, + "height": height, + }, + ) + await self.session.send( + "Page.startScreencast", + { + "format": "jpeg", + "quality": max(20, min(95, int(quality))), + "maxWidth": SCREENCAST_MAX_WIDTH, + "maxHeight": SCREENCAST_MAX_HEIGHT, + "everyNthFrame": max(1, int(every_nth_frame)), + }, + ) + + async def next_frame(self, timeout: float = 1.0) -> dict[str, Any]: + frame = await asyncio.wait_for(self.queue.get(), timeout=max(0.1, float(timeout))) + if frame is None: + raise RuntimeError("Browser screencast stopped.") + return frame + + async def stop(self) -> None: + if self.stopped: + return + self.stopped = True + self._drop_queued_frames() + with contextlib.suppress(asyncio.QueueFull): + self.queue.put_nowait(None) + with contextlib.suppress(Exception): + await self.session.send("Page.stopScreencast") + for task in list(self._ack_tasks): + task.cancel() + if self._ack_tasks: + await asyncio.gather(*self._ack_tasks, return_exceptions=True) + self._ack_tasks.clear() + with contextlib.suppress(Exception): + await self.session.detach() + + def _on_frame(self, params: dict[str, Any]) -> None: + if self.stopped: + return + task = asyncio.create_task(self._handle_frame(params or {})) + self._ack_tasks.add(task) + task.add_done_callback(self._ack_tasks.discard) + + async def _handle_frame(self, params: dict[str, Any]) -> None: + try: + data = params.get("data") or "" + if data and self._frame_matches_viewport(data): + self._queue_latest( + { + "browser_id": self.browser_id, + "mime": self.mime, + "image": data, + "metadata": params.get("metadata") or {}, + } + ) + finally: + session_id = params.get("sessionId") + if session_id is not None and not self.stopped: + with contextlib.suppress(Exception): + await self.session.send( + "Page.screencastFrameAck", + {"sessionId": int(session_id)}, + ) + + def _queue_latest(self, frame: dict[str, Any]) -> None: + self._drop_queued_frames() + with contextlib.suppress(asyncio.QueueFull): + self.queue.put_nowait(frame) + + def _frame_matches_viewport(self, data: str) -> bool: + if not self._expected_width or not self._expected_height: + return True + size = self._jpeg_size(data) + if not size: + return True + width, height = size + if abs(width - self._expected_width) <= 2 and abs(height - self._expected_height) <= 2: + return True + self._dimension_mismatches += 1 + return self._dimension_mismatches > 10 + + @staticmethod + def _jpeg_size(data: str) -> tuple[int, int] | None: + try: + raw = base64.b64decode(data, validate=False) + except Exception: + return None + if len(raw) < 10 or raw[:2] != b"\xff\xd8": + return None + index = 2 + standalone_markers = {0x01, *range(0xD0, 0xD8)} + size_markers = { + 0xC0, + 0xC1, + 0xC2, + 0xC3, + 0xC5, + 0xC6, + 0xC7, + 0xC9, + 0xCA, + 0xCB, + 0xCD, + 0xCE, + 0xCF, + } + while index < len(raw) - 9: + if raw[index] != 0xFF: + index += 1 + continue + while index < len(raw) and raw[index] == 0xFF: + index += 1 + if index >= len(raw): + return None + marker = raw[index] + index += 1 + if marker in standalone_markers: + continue + if index + 2 > len(raw): + return None + segment_length = int.from_bytes(raw[index : index + 2], "big") + if segment_length < 2 or index + segment_length > len(raw): + return None + if marker in size_markers and segment_length >= 7: + height = int.from_bytes(raw[index + 3 : index + 5], "big") + width = int.from_bytes(raw[index + 5 : index + 7], "big") + return width, height + index += segment_length + return None + + def _drop_queued_frames(self) -> None: + while True: + try: + self.queue.get_nowait() + except asyncio.QueueEmpty: + return + + class BrowserRuntime: def __init__(self, context_id: str): self.context_id = str(context_id) @@ -118,6 +311,7 @@ class _BrowserRuntimeCore: self.playwright = None self.context = None self.pages: dict[int, BrowserPage] = {} + self.screencasts: dict[str, _BrowserScreencast] = {} self.next_browser_id = 1 self.last_interacted_browser_id: int | None = None self._content_helper_source: str | None = None @@ -378,6 +572,7 @@ class _BrowserRuntimeCore: async def close_browser(self, browser_id: int | str | None = None) -> dict[str, Any]: await self.ensure_started() resolved_id = self._resolve_browser_id(browser_id) + await self._stop_screencasts_for_browser(resolved_id) page = self._page(resolved_id) await page.close() self.pages.pop(resolved_id, None) @@ -387,6 +582,7 @@ class _BrowserRuntimeCore: async def close_all_browsers(self) -> dict[str, Any]: await self.ensure_started() + await self._stop_all_screencasts() for browser_id in list(self.pages): try: await self.pages[browser_id].page.close() @@ -413,6 +609,58 @@ class _BrowserRuntimeCore: "state": await self._state(resolved_id), } + async def start_screencast( + self, + browser_id: int | str | None = None, + *, + quality: int = 78, + every_nth_frame: int = 1, + ) -> dict[str, Any]: + await self.ensure_started() + resolved_id = self._resolve_browser_id(browser_id) + page = self._page(resolved_id) + stream_id = uuid.uuid4().hex + session = await self.context.new_cdp_session(page) + screencast = _BrowserScreencast( + stream_id=stream_id, + browser_id=resolved_id, + session=session, + mime="image/jpeg", + ) + self.screencasts[stream_id] = screencast + try: + await screencast.start( + quality=quality, + every_nth_frame=every_nth_frame, + viewport=page.viewport_size or DEFAULT_VIEWPORT, + ) + except Exception: + self.screencasts.pop(stream_id, None) + await screencast.stop() + raise + self.last_interacted_browser_id = resolved_id + return { + "stream_id": stream_id, + "browser_id": resolved_id, + "state": await self._state(resolved_id), + } + + async def read_screencast_frame( + self, + stream_id: str, + *, + timeout: float = 1.0, + ) -> dict[str, Any]: + screencast = self.screencasts.get(str(stream_id or "")) + if not screencast: + raise KeyError("Browser screencast is not active.") + return await screencast.next_frame(timeout=timeout) + + async def stop_screencast(self, stream_id: str) -> None: + screencast = self.screencasts.pop(str(stream_id or ""), None) + if screencast: + await screencast.stop() + async def set_viewport( self, browser_id: int | str | None, @@ -426,7 +674,14 @@ class _BrowserRuntimeCore: "width": max(320, min(4096, int(width or DEFAULT_VIEWPORT["width"]))), "height": max(200, min(4096, int(height or DEFAULT_VIEWPORT["height"]))), } - await page.set_viewport_size(viewport) + current_viewport = page.viewport_size or {} + changed = ( + int(current_viewport.get("width") or 0) != viewport["width"] + or int(current_viewport.get("height") or 0) != viewport["height"] + ) + if changed: + await page.set_viewport_size(viewport) + await self._stop_screencasts_for_browser(resolved_id) self.last_interacted_browser_id = resolved_id return {"state": await self._state(resolved_id), "viewport": viewport} @@ -490,6 +745,7 @@ class _BrowserRuntimeCore: return await self._state(resolved_id) async def close(self, delete_profile: bool = False) -> None: + await self._stop_all_screencasts() for browser_id in list(self.pages): try: await self.pages[browser_id].page.close() @@ -618,6 +874,19 @@ class _BrowserRuntimeCore: def _page(self, browser_id: int) -> Any: return self.pages[int(browser_id)].page + async def _stop_screencasts_for_browser(self, browser_id: int) -> None: + stream_ids = [ + stream_id + for stream_id, screencast in self.screencasts.items() + if screencast.browser_id == int(browser_id) + ] + for stream_id in stream_ids: + await self.stop_screencast(stream_id) + + async def _stop_all_screencasts(self) -> None: + for stream_id in list(self.screencasts): + await self.stop_screencast(stream_id) + async def _ensure_content_helper(self, page: Any) -> None: has_helper = await page.evaluate( "() => Boolean(globalThis.__spaceBrowserPageContent__?.capture)" diff --git a/plugins/_browser/webui/browser-store.js b/plugins/_browser/webui/browser-store.js index 44f585cbf..c5ec6477a 100644 --- a/plugins/_browser/webui/browser-store.js +++ b/plugins/_browser/webui/browser-store.js @@ -40,6 +40,9 @@ const model = { _frameOff: null, _stateOff: null, _lastFrameAt: 0, + _pendingFrameSrc: "", + _frameRenderHandle: null, + _frameRenderCancel: null, _floatingCleanup: null, _stageElement: null, _stageResizeObserver: null, @@ -298,21 +301,24 @@ const model = { } }, - async connectViewer() { - if (!this.contextId) { - this.connected = false; - this.error = "No active chat context is selected."; - return; - } - this.error = ""; - await this._bindSocketEvents(); - const response = await websocket.request( - "browser_viewer_subscribe", - { - context_id: this.contextId, - browser_id: this.activeBrowserId, - }, - { + async connectViewer() { + if (!this.contextId) { + this.connected = false; + this.error = "No active chat context is selected."; + return; + } + this.error = ""; + await this._bindSocketEvents(); + const initialViewport = this.currentViewportSize(); + const response = await websocket.request( + "browser_viewer_subscribe", + { + context_id: this.contextId, + browser_id: this.activeBrowserId, + viewport_width: initialViewport?.width, + viewport_height: initialViewport?.height, + }, + { timeoutMs: this.browserInstallExpected ? BROWSER_FIRST_INSTALL_TIMEOUT_MS : BROWSER_SUBSCRIBE_TIMEOUT_MS, @@ -329,18 +335,25 @@ const model = { async _bindSocketEvents() { if (!this._frameOff) { const frameHandler = ({ data }) => { - if (data?.context_id !== this.contextId) return; - this.browsers = data.browsers || this.browsers; - this.setActiveBrowserId(data.browser_id || data.state?.id || this.activeBrowserId); - this.frameState = data.state || null; - if (!this.addressFocused && data.state?.currentUrl) { - this.address = data.state.currentUrl; - } - this.frameSrc = data.image ? `data:${data.mime || "image/jpeg"};base64,${data.image}` : ""; - if (!data.image && !data.state) { - this.setActiveBrowserId(null); - this.frameState = null; - this.frameSrc = ""; + if (data?.context_id !== this.contextId) return; + this.browsers = data.browsers || this.browsers; + this.setActiveBrowserId(data.browser_id || data.state?.id || this.activeBrowserId); + if (data.state) { + this.frameState = data.state; + } + if (!this.addressFocused && data.state?.currentUrl) { + this.address = data.state.currentUrl; + } + if (data.image) { + this.queueFrameRender(`data:${data.mime || "image/jpeg"};base64,${data.image}`); + } else { + this.cancelFrameRender(); + this.frameSrc = ""; + } + if (!data.image && !data.state) { + this.setActiveBrowserId(null); + this.frameState = null; + this.frameSrc = ""; } this._lastFrameAt = Date.now(); }; @@ -356,12 +369,41 @@ const model = { }; await websocket.on("browser_viewer_state", stateHandler); this._stateOff = () => websocket.off("browser_viewer_state", stateHandler); - } - }, + } + }, - async command(command, extra = {}) { - this.error = ""; - const previousActiveBrowserId = this.activeBrowserId; + queueFrameRender(frameSrc) { + this._pendingFrameSrc = frameSrc; + if (this._frameRenderHandle) return; + const schedule = globalThis.requestAnimationFrame?.bind(globalThis); + if (schedule) { + this._frameRenderCancel = globalThis.cancelAnimationFrame?.bind(globalThis) || null; + this._frameRenderHandle = schedule(() => this.flushFrameRender()); + return; + } + this._frameRenderCancel = globalThis.clearTimeout?.bind(globalThis) || null; + this._frameRenderHandle = globalThis.setTimeout(() => this.flushFrameRender(), 16); + }, + + flushFrameRender() { + this._frameRenderHandle = null; + this._frameRenderCancel = null; + this.frameSrc = this._pendingFrameSrc || ""; + this._pendingFrameSrc = ""; + }, + + cancelFrameRender() { + if (this._frameRenderHandle && this._frameRenderCancel) { + this._frameRenderCancel(this._frameRenderHandle); + } + this._frameRenderHandle = null; + this._frameRenderCancel = null; + this._pendingFrameSrc = ""; + }, + + async command(command, extra = {}) { + this.error = ""; + const previousActiveBrowserId = this.activeBrowserId; try { const response = await websocket.request( "browser_viewer_command", @@ -577,10 +619,11 @@ const model = { } catch {} } this._frameOff?.(); - this._stateOff?.(); - this._frameOff = null; - this._stateOff = null; - this._floatingCleanup?.(); + this._stateOff?.(); + this._frameOff = null; + this._stateOff = null; + this.cancelFrameRender(); + this._floatingCleanup?.(); this._floatingCleanup = null; this._stageResizeObserver?.disconnect?.(); this._stageResizeObserver = null; diff --git a/plugins/_browser/webui/main.html b/plugins/_browser/webui/main.html index 614cedd71..7883c202f 100644 --- a/plugins/_browser/webui/main.html +++ b/plugins/_browser/webui/main.html @@ -782,24 +782,26 @@ color: #9f1239; } - .browser-stage { - flex: 1 1 auto; - display: flex; - flex-direction: column; - min-height: 0; - overflow: auto; - background: #fff; - outline: none; - } + .browser-stage { + flex: 1 1 auto; + display: flex; + flex-direction: column; + min-height: 0; + overflow: hidden; + background: #fff; + outline: none; + } - .browser-frame { - flex: 0 0 auto; - display: block; - width: 100%; - height: auto; - user-select: none; - background: #fff; - } + .browser-frame { + flex: 0 0 auto; + display: block; + width: 100%; + height: auto; + min-width: 0; + image-rendering: auto; + user-select: none; + background: #fff; + } .browser-status, .browser-error, @@ -884,4 +886,4 @@ - \ No newline at end of file + diff --git a/tests/test_browser_agent_regressions.py b/tests/test_browser_agent_regressions.py index 69a74c9b4..f3c65b557 100644 --- a/tests/test_browser_agent_regressions.py +++ b/tests/test_browser_agent_regressions.py @@ -1,3 +1,4 @@ +import asyncio import sys import threading from pathlib import Path @@ -26,7 +27,11 @@ from plugins._browser.helpers.extension_manager import ( parse_chrome_web_store_extension_id, ) import plugins._browser.helpers.extension_manager as browser_extension_manager_module -from plugins._browser.helpers.runtime import _BrowserRuntimeCore, normalize_url +from plugins._browser.helpers.runtime import ( + _BrowserRuntimeCore, + _BrowserScreencast, + normalize_url, +) import plugins._browser.helpers.runtime as browser_runtime_module from plugins._browser.helpers.playwright import ( get_playwright_binary, @@ -320,6 +325,116 @@ def test_browser_viewer_uses_tabs_for_session_switching(): assert "Using ${this.mainModelSummary}" in browser_store +def test_browser_viewer_uses_cdp_screencast_transport(): + ws_browser = (PROJECT_ROOT / "plugins" / "_browser" / "api" / "ws_browser.py").read_text( + encoding="utf-8" + ) + main_html = (PROJECT_ROOT / "plugins" / "_browser" / "webui" / "main.html").read_text( + encoding="utf-8" + ) + runtime = ( + PROJECT_ROOT / "plugins" / "_browser" / "helpers" / "runtime.py" + ).read_text(encoding="utf-8") + browser_store = ( + PROJECT_ROOT / "plugins" / "_browser" / "webui" / "browser-store.js" + ).read_text(encoding="utf-8") + + assert 'runtime.call("screenshot"' not in ws_browser + assert "SCREENCAST_QUALITY = 92" in ws_browser + assert "initial_viewport = self._viewport_from_data(data)" in ws_browser + assert '"set_viewport"' in ws_browser + assert "start_screencast" in ws_browser + assert "read_screencast_frame" in ws_browser + assert "stop_screencast" in ws_browser + assert '"Page.startScreencast"' in runtime + assert '"Page.screencastFrame"' in runtime + assert '"Page.screencastFrameAck"' in runtime + assert '"Page.stopScreencast"' in runtime + assert '"Emulation.setDeviceMetricsOverride"' in runtime + assert '"Emulation.setVisibleSize"' in runtime + assert "asyncio.Queue(maxsize=1)" in runtime + assert "await self._stop_screencasts_for_browser(resolved_id)" in runtime + assert "queueFrameRender" in browser_store + assert "requestAnimationFrame" in browser_store + assert "viewport_width: initialViewport?.width" in browser_store + assert "viewport_height: initialViewport?.height" in browser_store + assert "this.frameState = data.state || null" not in browser_store + assert "overflow: hidden;" in main_html + assert "object-fit: fill;" not in main_html + assert "height: auto;" in main_html + assert "image-rendering: auto;" in main_html + + +@pytest.mark.asyncio +async def test_browser_screencast_acknowledges_and_drops_stale_frames(): + first_image = ( + "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsL" + "DBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/" + "2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIy" + "MjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAKAAoDASIAAhEBAxEB/8QAFQAB" + "AAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAMAwEAAhADE" + "AAAAKf/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/9oACAEBAAEFAqf/xAAUEQEAAAAAAAA" + "AAAAAAAAAAAAA/9oACAEDAQE/ASP/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oACAECA" + "QE/ASP/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/9oACAEBAAY/Aqf/xAAUEAEAAAAAAA" + "AAAAAAAAAAAAAA/9oACAEBAAE/ISf/2gAMAwEAAgADAAAAEP/EABQRAQAAAAAAAAA" + "AAAAAAAAAAP/aAAgBAwEBPxAk/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAgBAgEB" + "PxAk/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/aAAgBAQABPxAn/9k=" + ) + + class FakeSession: + def __init__(self): + self.handlers = {} + self.sent = [] + self.detached = False + + def on(self, event, handler): + self.handlers[event] = handler + + async def send(self, method, params=None): + self.sent.append((method, params or {})) + + async def detach(self): + self.detached = True + + session = FakeSession() + screencast = _BrowserScreencast( + stream_id="stream", + browser_id=7, + session=session, + mime="image/jpeg", + ) + + await screencast.start(quality=92, every_nth_frame=1, viewport={"width": 1118, "height": 662}) + session.handlers["Page.screencastFrame"]( + {"data": first_image, "metadata": {"deviceWidth": 10}, "sessionId": 1} + ) + session.handlers["Page.screencastFrame"]( + {"data": "second", "metadata": {"deviceWidth": 200}, "sessionId": 2} + ) + await asyncio.sleep(0) + + frame = await screencast.next_frame(timeout=0.1) + + assert frame["browser_id"] == 7 + assert frame["image"] == "second" + assert frame["metadata"]["deviceWidth"] == 200 + assert ("Emulation.setDeviceMetricsOverride", { + "width": 1118, + "height": 662, + "deviceScaleFactor": 1, + "mobile": False, + "dontSetVisibleSize": True, + }) in session.sent + assert ("Emulation.setVisibleSize", {"width": 1118, "height": 662}) in session.sent + assert ("Page.screencastFrameAck", {"sessionId": 1}) in session.sent + assert ("Page.screencastFrameAck", {"sessionId": 2}) in session.sent + + await screencast.stop() + + assert ("Page.stopScreencast", {}) in session.sent + assert session.detached is True + + def test_browser_docker_installs_full_chromium_to_persistent_cache(): script = ( PROJECT_ROOT / "docker" / "run" / "fs" / "ins" / "install_playwright.sh"