mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-17 04:01:13 +00:00
Redesign Browser viewer screencast transport and viewport fit
Replace the Browser viewer’s screenshot polling with CDP screencast streaming for much smoother navigation. The runtime now starts/stops CDP screencasts cleanly, acknowledges frames, drops stale frames, and keeps the WebSocket payload compatible with the existing viewer. Also fixes modal viewport sizing by sending the initial stage dimensions on subscribe, applying CDP emulation sizing before the first frame, avoiding image stretching, and increasing screencast JPEG quality to 92. Regression coverage was added for the screencast path, frame ack/drop behavior, viewport sizing, and UI rendering assumptions. -- Still needs thorough performance audit and optimization --
This commit is contained in:
parent
cf67047ad3
commit
dccf017d2c
5 changed files with 636 additions and 92 deletions
|
|
@ -1,6 +1,8 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import time
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from agent import AgentContext
|
||||
|
|
@ -9,6 +11,12 @@ from helpers.ws_manager import WsResult
|
|||
from plugins._browser.helpers.runtime import get_runtime
|
||||
|
||||
|
||||
FRAME_IDLE_TIMEOUT_SECONDS = 0.35
|
||||
FRAME_RETRY_DELAY_SECONDS = 0.5
|
||||
FRAME_STATE_REFRESH_SECONDS = 0.75
|
||||
SCREENCAST_QUALITY = 92
|
||||
|
||||
|
||||
class WsBrowser(WsHandler):
|
||||
_streams: ClassVar[dict[tuple[str, str], asyncio.Task[None]]] = {}
|
||||
|
||||
|
|
@ -57,9 +65,17 @@ class WsBrowser(WsHandler):
|
|||
browsers = listing.get("browsers") or []
|
||||
if opened.get("id"):
|
||||
listing["last_interacted_browser_id"] = opened.get("id")
|
||||
active_id = data.get("browser_id") or listing.get("last_interacted_browser_id")
|
||||
if not active_id and browsers:
|
||||
active_id = browsers[0].get("id")
|
||||
active_id = self._active_browser_id(listing, data.get("browser_id"))
|
||||
initial_viewport = self._viewport_from_data(data)
|
||||
if active_id and initial_viewport:
|
||||
await runtime.call(
|
||||
"set_viewport",
|
||||
active_id,
|
||||
initial_viewport["width"],
|
||||
initial_viewport["height"],
|
||||
)
|
||||
listing = await runtime.call("list")
|
||||
browsers = listing.get("browsers") or []
|
||||
|
||||
stream_key = (sid, context_id)
|
||||
existing = self._streams.pop(stream_key, None)
|
||||
|
|
@ -187,46 +203,145 @@ class WsBrowser(WsHandler):
|
|||
context_id: str,
|
||||
browser_id: int | str | None,
|
||||
) -> None:
|
||||
runtime = None
|
||||
stream_id = None
|
||||
while True:
|
||||
try:
|
||||
runtime = await get_runtime(context_id, create=False)
|
||||
if runtime:
|
||||
listing = await runtime.call("list")
|
||||
browsers = listing.get("browsers") or []
|
||||
browser_ids = {str(browser.get("id")) for browser in browsers}
|
||||
requested_id = str(browser_id or "") if browser_id else ""
|
||||
active_id = (
|
||||
browser_id
|
||||
if requested_id and requested_id in browser_ids
|
||||
else listing.get("last_interacted_browser_id")
|
||||
)
|
||||
if active_id and str(active_id) not in browser_ids:
|
||||
active_id = None
|
||||
if not active_id and browsers:
|
||||
active_id = browsers[0].get("id")
|
||||
if active_id:
|
||||
frame = await runtime.call("screenshot", active_id)
|
||||
frame["context_id"] = context_id
|
||||
frame["browsers"] = browsers
|
||||
await self.emit_to(sid, "browser_viewer_frame", frame)
|
||||
else:
|
||||
await self.emit_to(
|
||||
sid,
|
||||
"browser_viewer_frame",
|
||||
{
|
||||
"context_id": context_id,
|
||||
"browser_id": None,
|
||||
"browsers": browsers,
|
||||
"image": "",
|
||||
"mime": "",
|
||||
"state": None,
|
||||
},
|
||||
if not runtime:
|
||||
await self._emit_empty_frame(sid, context_id)
|
||||
await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS)
|
||||
continue
|
||||
|
||||
listing = await runtime.call("list")
|
||||
browsers = listing.get("browsers") or []
|
||||
active_id = self._active_browser_id(listing, browser_id)
|
||||
if not active_id:
|
||||
await self._emit_empty_frame(sid, context_id, browsers=browsers)
|
||||
await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS)
|
||||
continue
|
||||
|
||||
screencast = await runtime.call(
|
||||
"start_screencast",
|
||||
active_id,
|
||||
quality=SCREENCAST_QUALITY,
|
||||
every_nth_frame=1,
|
||||
)
|
||||
stream_id = screencast["stream_id"]
|
||||
active_id = screencast["browser_id"]
|
||||
state = screencast.get("state")
|
||||
await self.emit_to(
|
||||
sid,
|
||||
"browser_viewer_frame",
|
||||
{
|
||||
"context_id": context_id,
|
||||
"browser_id": active_id,
|
||||
"browsers": browsers,
|
||||
"image": "",
|
||||
"mime": "",
|
||||
"state": state,
|
||||
},
|
||||
)
|
||||
|
||||
last_state_refresh = 0.0
|
||||
while True:
|
||||
now = time.monotonic()
|
||||
if now - last_state_refresh >= FRAME_STATE_REFRESH_SECONDS:
|
||||
listing = await runtime.call("list")
|
||||
browsers = listing.get("browsers") or []
|
||||
browser_ids = {str(browser.get("id")) for browser in browsers}
|
||||
if str(active_id) not in browser_ids:
|
||||
break
|
||||
state = self._state_for_browser(browsers, active_id, state)
|
||||
last_state_refresh = now
|
||||
|
||||
try:
|
||||
frame = await runtime.call(
|
||||
"read_screencast_frame",
|
||||
stream_id,
|
||||
timeout=FRAME_IDLE_TIMEOUT_SECONDS,
|
||||
)
|
||||
await asyncio.sleep(0.75)
|
||||
except TimeoutError:
|
||||
continue
|
||||
|
||||
frame["context_id"] = context_id
|
||||
frame["browser_id"] = active_id
|
||||
frame["browsers"] = browsers
|
||||
frame["state"] = state
|
||||
await self.emit_to(sid, "browser_viewer_frame", frame)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
await asyncio.sleep(1.5)
|
||||
await asyncio.sleep(FRAME_RETRY_DELAY_SECONDS)
|
||||
finally:
|
||||
if runtime and stream_id:
|
||||
with contextlib.suppress(Exception):
|
||||
await runtime.call("stop_screencast", stream_id)
|
||||
stream_id = None
|
||||
|
||||
@staticmethod
|
||||
def _active_browser_id(
|
||||
listing: dict[str, Any],
|
||||
requested_browser_id: int | str | None,
|
||||
) -> int | str | None:
|
||||
browsers = listing.get("browsers") or []
|
||||
browser_ids = {str(browser.get("id")) for browser in browsers}
|
||||
requested_id = str(requested_browser_id or "") if requested_browser_id else ""
|
||||
active_id = (
|
||||
requested_browser_id
|
||||
if requested_id and requested_id in browser_ids
|
||||
else listing.get("last_interacted_browser_id")
|
||||
)
|
||||
if active_id and str(active_id) not in browser_ids:
|
||||
active_id = None
|
||||
if not active_id and browsers:
|
||||
active_id = browsers[0].get("id")
|
||||
return active_id
|
||||
|
||||
@staticmethod
|
||||
def _state_for_browser(
|
||||
browsers: list[dict[str, Any]],
|
||||
browser_id: int | str,
|
||||
current_state: dict[str, Any] | None,
|
||||
) -> dict[str, Any] | None:
|
||||
for browser in browsers:
|
||||
if str(browser.get("id")) == str(browser_id):
|
||||
return browser
|
||||
return current_state
|
||||
|
||||
async def _emit_empty_frame(
|
||||
self,
|
||||
sid: str,
|
||||
context_id: str,
|
||||
*,
|
||||
browsers: list[dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
await self.emit_to(
|
||||
sid,
|
||||
"browser_viewer_frame",
|
||||
{
|
||||
"context_id": context_id,
|
||||
"browser_id": None,
|
||||
"browsers": browsers or [],
|
||||
"image": "",
|
||||
"mime": "",
|
||||
"state": None,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _viewport_from_data(data: dict[str, Any]) -> dict[str, int] | None:
|
||||
try:
|
||||
width = int(data.get("viewport_width") or data.get("width") or 0)
|
||||
height = int(data.get("viewport_height") or data.get("height") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if width < 80 or height < 80:
|
||||
return None
|
||||
return {
|
||||
"width": max(320, min(4096, width)),
|
||||
"height": max(200, min(4096, height)),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _context_id(data: dict[str, Any]) -> str:
|
||||
|
|
|
|||
|
|
@ -3,12 +3,14 @@ from __future__ import annotations
|
|||
import atexit
|
||||
import asyncio
|
||||
import base64
|
||||
import contextlib
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
|
@ -27,6 +29,8 @@ CONTENT_HELPER_PATH = PLUGIN_DIR / "assets" / "browser-page-content.js"
|
|||
RUNTIME_DATA_KEY = "_browser_runtime"
|
||||
DEFAULT_VIEWPORT = {"width": 1024, "height": 768}
|
||||
CHROME_SINGLETON_FILES = ("SingletonLock", "SingletonCookie", "SingletonSocket")
|
||||
SCREENCAST_MAX_WIDTH = 4096
|
||||
SCREENCAST_MAX_HEIGHT = 4096
|
||||
|
||||
_SPECIAL_SCHEME_RE = re.compile(r"^(?:about|blob|data|file|mailto|tel):", re.I)
|
||||
_URL_SCHEME_RE = re.compile(r"^[a-z][a-z\d+\-.]*://", re.I)
|
||||
|
|
@ -84,6 +88,195 @@ class BrowserPage:
|
|||
page: Any
|
||||
|
||||
|
||||
class _BrowserScreencast:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
stream_id: str,
|
||||
browser_id: int,
|
||||
session: Any,
|
||||
mime: str,
|
||||
):
|
||||
self.id = stream_id
|
||||
self.browser_id = browser_id
|
||||
self.session = session
|
||||
self.mime = mime
|
||||
self.queue = asyncio.Queue(maxsize=1)
|
||||
self.stopped = False
|
||||
self._ack_tasks: set[asyncio.Task] = set()
|
||||
self._expected_width = 0
|
||||
self._expected_height = 0
|
||||
self._dimension_mismatches = 0
|
||||
|
||||
async def start(
|
||||
self,
|
||||
*,
|
||||
quality: int,
|
||||
every_nth_frame: int,
|
||||
viewport: dict[str, int],
|
||||
) -> None:
|
||||
self.session.on("Page.screencastFrame", self._on_frame)
|
||||
width = max(320, min(4096, int(viewport.get("width") or DEFAULT_VIEWPORT["width"])))
|
||||
height = max(200, min(4096, int(viewport.get("height") or DEFAULT_VIEWPORT["height"])))
|
||||
self._expected_width = width
|
||||
self._expected_height = height
|
||||
self._dimension_mismatches = 0
|
||||
with contextlib.suppress(Exception):
|
||||
await self.session.send("Page.enable")
|
||||
await self.session.send(
|
||||
"Emulation.setDeviceMetricsOverride",
|
||||
{
|
||||
"width": width,
|
||||
"height": height,
|
||||
"deviceScaleFactor": 1,
|
||||
"mobile": False,
|
||||
"dontSetVisibleSize": True,
|
||||
},
|
||||
)
|
||||
with contextlib.suppress(Exception):
|
||||
await self.session.send(
|
||||
"Emulation.setVisibleSize",
|
||||
{
|
||||
"width": width,
|
||||
"height": height,
|
||||
},
|
||||
)
|
||||
await self.session.send(
|
||||
"Page.startScreencast",
|
||||
{
|
||||
"format": "jpeg",
|
||||
"quality": max(20, min(95, int(quality))),
|
||||
"maxWidth": SCREENCAST_MAX_WIDTH,
|
||||
"maxHeight": SCREENCAST_MAX_HEIGHT,
|
||||
"everyNthFrame": max(1, int(every_nth_frame)),
|
||||
},
|
||||
)
|
||||
|
||||
async def next_frame(self, timeout: float = 1.0) -> dict[str, Any]:
|
||||
frame = await asyncio.wait_for(self.queue.get(), timeout=max(0.1, float(timeout)))
|
||||
if frame is None:
|
||||
raise RuntimeError("Browser screencast stopped.")
|
||||
return frame
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self.stopped:
|
||||
return
|
||||
self.stopped = True
|
||||
self._drop_queued_frames()
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
self.queue.put_nowait(None)
|
||||
with contextlib.suppress(Exception):
|
||||
await self.session.send("Page.stopScreencast")
|
||||
for task in list(self._ack_tasks):
|
||||
task.cancel()
|
||||
if self._ack_tasks:
|
||||
await asyncio.gather(*self._ack_tasks, return_exceptions=True)
|
||||
self._ack_tasks.clear()
|
||||
with contextlib.suppress(Exception):
|
||||
await self.session.detach()
|
||||
|
||||
def _on_frame(self, params: dict[str, Any]) -> None:
|
||||
if self.stopped:
|
||||
return
|
||||
task = asyncio.create_task(self._handle_frame(params or {}))
|
||||
self._ack_tasks.add(task)
|
||||
task.add_done_callback(self._ack_tasks.discard)
|
||||
|
||||
async def _handle_frame(self, params: dict[str, Any]) -> None:
|
||||
try:
|
||||
data = params.get("data") or ""
|
||||
if data and self._frame_matches_viewport(data):
|
||||
self._queue_latest(
|
||||
{
|
||||
"browser_id": self.browser_id,
|
||||
"mime": self.mime,
|
||||
"image": data,
|
||||
"metadata": params.get("metadata") or {},
|
||||
}
|
||||
)
|
||||
finally:
|
||||
session_id = params.get("sessionId")
|
||||
if session_id is not None and not self.stopped:
|
||||
with contextlib.suppress(Exception):
|
||||
await self.session.send(
|
||||
"Page.screencastFrameAck",
|
||||
{"sessionId": int(session_id)},
|
||||
)
|
||||
|
||||
def _queue_latest(self, frame: dict[str, Any]) -> None:
|
||||
self._drop_queued_frames()
|
||||
with contextlib.suppress(asyncio.QueueFull):
|
||||
self.queue.put_nowait(frame)
|
||||
|
||||
def _frame_matches_viewport(self, data: str) -> bool:
|
||||
if not self._expected_width or not self._expected_height:
|
||||
return True
|
||||
size = self._jpeg_size(data)
|
||||
if not size:
|
||||
return True
|
||||
width, height = size
|
||||
if abs(width - self._expected_width) <= 2 and abs(height - self._expected_height) <= 2:
|
||||
return True
|
||||
self._dimension_mismatches += 1
|
||||
return self._dimension_mismatches > 10
|
||||
|
||||
@staticmethod
|
||||
def _jpeg_size(data: str) -> tuple[int, int] | None:
|
||||
try:
|
||||
raw = base64.b64decode(data, validate=False)
|
||||
except Exception:
|
||||
return None
|
||||
if len(raw) < 10 or raw[:2] != b"\xff\xd8":
|
||||
return None
|
||||
index = 2
|
||||
standalone_markers = {0x01, *range(0xD0, 0xD8)}
|
||||
size_markers = {
|
||||
0xC0,
|
||||
0xC1,
|
||||
0xC2,
|
||||
0xC3,
|
||||
0xC5,
|
||||
0xC6,
|
||||
0xC7,
|
||||
0xC9,
|
||||
0xCA,
|
||||
0xCB,
|
||||
0xCD,
|
||||
0xCE,
|
||||
0xCF,
|
||||
}
|
||||
while index < len(raw) - 9:
|
||||
if raw[index] != 0xFF:
|
||||
index += 1
|
||||
continue
|
||||
while index < len(raw) and raw[index] == 0xFF:
|
||||
index += 1
|
||||
if index >= len(raw):
|
||||
return None
|
||||
marker = raw[index]
|
||||
index += 1
|
||||
if marker in standalone_markers:
|
||||
continue
|
||||
if index + 2 > len(raw):
|
||||
return None
|
||||
segment_length = int.from_bytes(raw[index : index + 2], "big")
|
||||
if segment_length < 2 or index + segment_length > len(raw):
|
||||
return None
|
||||
if marker in size_markers and segment_length >= 7:
|
||||
height = int.from_bytes(raw[index + 3 : index + 5], "big")
|
||||
width = int.from_bytes(raw[index + 5 : index + 7], "big")
|
||||
return width, height
|
||||
index += segment_length
|
||||
return None
|
||||
|
||||
def _drop_queued_frames(self) -> None:
|
||||
while True:
|
||||
try:
|
||||
self.queue.get_nowait()
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
|
||||
|
||||
class BrowserRuntime:
|
||||
def __init__(self, context_id: str):
|
||||
self.context_id = str(context_id)
|
||||
|
|
@ -118,6 +311,7 @@ class _BrowserRuntimeCore:
|
|||
self.playwright = None
|
||||
self.context = None
|
||||
self.pages: dict[int, BrowserPage] = {}
|
||||
self.screencasts: dict[str, _BrowserScreencast] = {}
|
||||
self.next_browser_id = 1
|
||||
self.last_interacted_browser_id: int | None = None
|
||||
self._content_helper_source: str | None = None
|
||||
|
|
@ -378,6 +572,7 @@ class _BrowserRuntimeCore:
|
|||
async def close_browser(self, browser_id: int | str | None = None) -> dict[str, Any]:
|
||||
await self.ensure_started()
|
||||
resolved_id = self._resolve_browser_id(browser_id)
|
||||
await self._stop_screencasts_for_browser(resolved_id)
|
||||
page = self._page(resolved_id)
|
||||
await page.close()
|
||||
self.pages.pop(resolved_id, None)
|
||||
|
|
@ -387,6 +582,7 @@ class _BrowserRuntimeCore:
|
|||
|
||||
async def close_all_browsers(self) -> dict[str, Any]:
|
||||
await self.ensure_started()
|
||||
await self._stop_all_screencasts()
|
||||
for browser_id in list(self.pages):
|
||||
try:
|
||||
await self.pages[browser_id].page.close()
|
||||
|
|
@ -413,6 +609,58 @@ class _BrowserRuntimeCore:
|
|||
"state": await self._state(resolved_id),
|
||||
}
|
||||
|
||||
async def start_screencast(
|
||||
self,
|
||||
browser_id: int | str | None = None,
|
||||
*,
|
||||
quality: int = 78,
|
||||
every_nth_frame: int = 1,
|
||||
) -> dict[str, Any]:
|
||||
await self.ensure_started()
|
||||
resolved_id = self._resolve_browser_id(browser_id)
|
||||
page = self._page(resolved_id)
|
||||
stream_id = uuid.uuid4().hex
|
||||
session = await self.context.new_cdp_session(page)
|
||||
screencast = _BrowserScreencast(
|
||||
stream_id=stream_id,
|
||||
browser_id=resolved_id,
|
||||
session=session,
|
||||
mime="image/jpeg",
|
||||
)
|
||||
self.screencasts[stream_id] = screencast
|
||||
try:
|
||||
await screencast.start(
|
||||
quality=quality,
|
||||
every_nth_frame=every_nth_frame,
|
||||
viewport=page.viewport_size or DEFAULT_VIEWPORT,
|
||||
)
|
||||
except Exception:
|
||||
self.screencasts.pop(stream_id, None)
|
||||
await screencast.stop()
|
||||
raise
|
||||
self.last_interacted_browser_id = resolved_id
|
||||
return {
|
||||
"stream_id": stream_id,
|
||||
"browser_id": resolved_id,
|
||||
"state": await self._state(resolved_id),
|
||||
}
|
||||
|
||||
async def read_screencast_frame(
|
||||
self,
|
||||
stream_id: str,
|
||||
*,
|
||||
timeout: float = 1.0,
|
||||
) -> dict[str, Any]:
|
||||
screencast = self.screencasts.get(str(stream_id or ""))
|
||||
if not screencast:
|
||||
raise KeyError("Browser screencast is not active.")
|
||||
return await screencast.next_frame(timeout=timeout)
|
||||
|
||||
async def stop_screencast(self, stream_id: str) -> None:
|
||||
screencast = self.screencasts.pop(str(stream_id or ""), None)
|
||||
if screencast:
|
||||
await screencast.stop()
|
||||
|
||||
async def set_viewport(
|
||||
self,
|
||||
browser_id: int | str | None,
|
||||
|
|
@ -426,7 +674,14 @@ class _BrowserRuntimeCore:
|
|||
"width": max(320, min(4096, int(width or DEFAULT_VIEWPORT["width"]))),
|
||||
"height": max(200, min(4096, int(height or DEFAULT_VIEWPORT["height"]))),
|
||||
}
|
||||
await page.set_viewport_size(viewport)
|
||||
current_viewport = page.viewport_size or {}
|
||||
changed = (
|
||||
int(current_viewport.get("width") or 0) != viewport["width"]
|
||||
or int(current_viewport.get("height") or 0) != viewport["height"]
|
||||
)
|
||||
if changed:
|
||||
await page.set_viewport_size(viewport)
|
||||
await self._stop_screencasts_for_browser(resolved_id)
|
||||
self.last_interacted_browser_id = resolved_id
|
||||
return {"state": await self._state(resolved_id), "viewport": viewport}
|
||||
|
||||
|
|
@ -490,6 +745,7 @@ class _BrowserRuntimeCore:
|
|||
return await self._state(resolved_id)
|
||||
|
||||
async def close(self, delete_profile: bool = False) -> None:
|
||||
await self._stop_all_screencasts()
|
||||
for browser_id in list(self.pages):
|
||||
try:
|
||||
await self.pages[browser_id].page.close()
|
||||
|
|
@ -618,6 +874,19 @@ class _BrowserRuntimeCore:
|
|||
def _page(self, browser_id: int) -> Any:
|
||||
return self.pages[int(browser_id)].page
|
||||
|
||||
async def _stop_screencasts_for_browser(self, browser_id: int) -> None:
|
||||
stream_ids = [
|
||||
stream_id
|
||||
for stream_id, screencast in self.screencasts.items()
|
||||
if screencast.browser_id == int(browser_id)
|
||||
]
|
||||
for stream_id in stream_ids:
|
||||
await self.stop_screencast(stream_id)
|
||||
|
||||
async def _stop_all_screencasts(self) -> None:
|
||||
for stream_id in list(self.screencasts):
|
||||
await self.stop_screencast(stream_id)
|
||||
|
||||
async def _ensure_content_helper(self, page: Any) -> None:
|
||||
has_helper = await page.evaluate(
|
||||
"() => Boolean(globalThis.__spaceBrowserPageContent__?.capture)"
|
||||
|
|
|
|||
|
|
@ -40,6 +40,9 @@ const model = {
|
|||
_frameOff: null,
|
||||
_stateOff: null,
|
||||
_lastFrameAt: 0,
|
||||
_pendingFrameSrc: "",
|
||||
_frameRenderHandle: null,
|
||||
_frameRenderCancel: null,
|
||||
_floatingCleanup: null,
|
||||
_stageElement: null,
|
||||
_stageResizeObserver: null,
|
||||
|
|
@ -298,21 +301,24 @@ const model = {
|
|||
}
|
||||
},
|
||||
|
||||
async connectViewer() {
|
||||
if (!this.contextId) {
|
||||
this.connected = false;
|
||||
this.error = "No active chat context is selected.";
|
||||
return;
|
||||
}
|
||||
this.error = "";
|
||||
await this._bindSocketEvents();
|
||||
const response = await websocket.request(
|
||||
"browser_viewer_subscribe",
|
||||
{
|
||||
context_id: this.contextId,
|
||||
browser_id: this.activeBrowserId,
|
||||
},
|
||||
{
|
||||
async connectViewer() {
|
||||
if (!this.contextId) {
|
||||
this.connected = false;
|
||||
this.error = "No active chat context is selected.";
|
||||
return;
|
||||
}
|
||||
this.error = "";
|
||||
await this._bindSocketEvents();
|
||||
const initialViewport = this.currentViewportSize();
|
||||
const response = await websocket.request(
|
||||
"browser_viewer_subscribe",
|
||||
{
|
||||
context_id: this.contextId,
|
||||
browser_id: this.activeBrowserId,
|
||||
viewport_width: initialViewport?.width,
|
||||
viewport_height: initialViewport?.height,
|
||||
},
|
||||
{
|
||||
timeoutMs: this.browserInstallExpected
|
||||
? BROWSER_FIRST_INSTALL_TIMEOUT_MS
|
||||
: BROWSER_SUBSCRIBE_TIMEOUT_MS,
|
||||
|
|
@ -329,18 +335,25 @@ const model = {
|
|||
async _bindSocketEvents() {
|
||||
if (!this._frameOff) {
|
||||
const frameHandler = ({ data }) => {
|
||||
if (data?.context_id !== this.contextId) return;
|
||||
this.browsers = data.browsers || this.browsers;
|
||||
this.setActiveBrowserId(data.browser_id || data.state?.id || this.activeBrowserId);
|
||||
this.frameState = data.state || null;
|
||||
if (!this.addressFocused && data.state?.currentUrl) {
|
||||
this.address = data.state.currentUrl;
|
||||
}
|
||||
this.frameSrc = data.image ? `data:${data.mime || "image/jpeg"};base64,${data.image}` : "";
|
||||
if (!data.image && !data.state) {
|
||||
this.setActiveBrowserId(null);
|
||||
this.frameState = null;
|
||||
this.frameSrc = "";
|
||||
if (data?.context_id !== this.contextId) return;
|
||||
this.browsers = data.browsers || this.browsers;
|
||||
this.setActiveBrowserId(data.browser_id || data.state?.id || this.activeBrowserId);
|
||||
if (data.state) {
|
||||
this.frameState = data.state;
|
||||
}
|
||||
if (!this.addressFocused && data.state?.currentUrl) {
|
||||
this.address = data.state.currentUrl;
|
||||
}
|
||||
if (data.image) {
|
||||
this.queueFrameRender(`data:${data.mime || "image/jpeg"};base64,${data.image}`);
|
||||
} else {
|
||||
this.cancelFrameRender();
|
||||
this.frameSrc = "";
|
||||
}
|
||||
if (!data.image && !data.state) {
|
||||
this.setActiveBrowserId(null);
|
||||
this.frameState = null;
|
||||
this.frameSrc = "";
|
||||
}
|
||||
this._lastFrameAt = Date.now();
|
||||
};
|
||||
|
|
@ -356,12 +369,41 @@ const model = {
|
|||
};
|
||||
await websocket.on("browser_viewer_state", stateHandler);
|
||||
this._stateOff = () => websocket.off("browser_viewer_state", stateHandler);
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
async command(command, extra = {}) {
|
||||
this.error = "";
|
||||
const previousActiveBrowserId = this.activeBrowserId;
|
||||
queueFrameRender(frameSrc) {
|
||||
this._pendingFrameSrc = frameSrc;
|
||||
if (this._frameRenderHandle) return;
|
||||
const schedule = globalThis.requestAnimationFrame?.bind(globalThis);
|
||||
if (schedule) {
|
||||
this._frameRenderCancel = globalThis.cancelAnimationFrame?.bind(globalThis) || null;
|
||||
this._frameRenderHandle = schedule(() => this.flushFrameRender());
|
||||
return;
|
||||
}
|
||||
this._frameRenderCancel = globalThis.clearTimeout?.bind(globalThis) || null;
|
||||
this._frameRenderHandle = globalThis.setTimeout(() => this.flushFrameRender(), 16);
|
||||
},
|
||||
|
||||
flushFrameRender() {
|
||||
this._frameRenderHandle = null;
|
||||
this._frameRenderCancel = null;
|
||||
this.frameSrc = this._pendingFrameSrc || "";
|
||||
this._pendingFrameSrc = "";
|
||||
},
|
||||
|
||||
cancelFrameRender() {
|
||||
if (this._frameRenderHandle && this._frameRenderCancel) {
|
||||
this._frameRenderCancel(this._frameRenderHandle);
|
||||
}
|
||||
this._frameRenderHandle = null;
|
||||
this._frameRenderCancel = null;
|
||||
this._pendingFrameSrc = "";
|
||||
},
|
||||
|
||||
async command(command, extra = {}) {
|
||||
this.error = "";
|
||||
const previousActiveBrowserId = this.activeBrowserId;
|
||||
try {
|
||||
const response = await websocket.request(
|
||||
"browser_viewer_command",
|
||||
|
|
@ -577,10 +619,11 @@ const model = {
|
|||
} catch {}
|
||||
}
|
||||
this._frameOff?.();
|
||||
this._stateOff?.();
|
||||
this._frameOff = null;
|
||||
this._stateOff = null;
|
||||
this._floatingCleanup?.();
|
||||
this._stateOff?.();
|
||||
this._frameOff = null;
|
||||
this._stateOff = null;
|
||||
this.cancelFrameRender();
|
||||
this._floatingCleanup?.();
|
||||
this._floatingCleanup = null;
|
||||
this._stageResizeObserver?.disconnect?.();
|
||||
this._stageResizeObserver = null;
|
||||
|
|
|
|||
|
|
@ -782,24 +782,26 @@
|
|||
color: #9f1239;
|
||||
}
|
||||
|
||||
.browser-stage {
|
||||
flex: 1 1 auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: 0;
|
||||
overflow: auto;
|
||||
background: #fff;
|
||||
outline: none;
|
||||
}
|
||||
.browser-stage {
|
||||
flex: 1 1 auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: 0;
|
||||
overflow: hidden;
|
||||
background: #fff;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.browser-frame {
|
||||
flex: 0 0 auto;
|
||||
display: block;
|
||||
width: 100%;
|
||||
height: auto;
|
||||
user-select: none;
|
||||
background: #fff;
|
||||
}
|
||||
.browser-frame {
|
||||
flex: 0 0 auto;
|
||||
display: block;
|
||||
width: 100%;
|
||||
height: auto;
|
||||
min-width: 0;
|
||||
image-rendering: auto;
|
||||
user-select: none;
|
||||
background: #fff;
|
||||
}
|
||||
|
||||
.browser-status,
|
||||
.browser-error,
|
||||
|
|
@ -884,4 +886,4 @@
|
|||
</style>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
|
@ -26,7 +27,11 @@ from plugins._browser.helpers.extension_manager import (
|
|||
parse_chrome_web_store_extension_id,
|
||||
)
|
||||
import plugins._browser.helpers.extension_manager as browser_extension_manager_module
|
||||
from plugins._browser.helpers.runtime import _BrowserRuntimeCore, normalize_url
|
||||
from plugins._browser.helpers.runtime import (
|
||||
_BrowserRuntimeCore,
|
||||
_BrowserScreencast,
|
||||
normalize_url,
|
||||
)
|
||||
import plugins._browser.helpers.runtime as browser_runtime_module
|
||||
from plugins._browser.helpers.playwright import (
|
||||
get_playwright_binary,
|
||||
|
|
@ -320,6 +325,116 @@ def test_browser_viewer_uses_tabs_for_session_switching():
|
|||
assert "Using ${this.mainModelSummary}" in browser_store
|
||||
|
||||
|
||||
def test_browser_viewer_uses_cdp_screencast_transport():
|
||||
ws_browser = (PROJECT_ROOT / "plugins" / "_browser" / "api" / "ws_browser.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
main_html = (PROJECT_ROOT / "plugins" / "_browser" / "webui" / "main.html").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
runtime = (
|
||||
PROJECT_ROOT / "plugins" / "_browser" / "helpers" / "runtime.py"
|
||||
).read_text(encoding="utf-8")
|
||||
browser_store = (
|
||||
PROJECT_ROOT / "plugins" / "_browser" / "webui" / "browser-store.js"
|
||||
).read_text(encoding="utf-8")
|
||||
|
||||
assert 'runtime.call("screenshot"' not in ws_browser
|
||||
assert "SCREENCAST_QUALITY = 92" in ws_browser
|
||||
assert "initial_viewport = self._viewport_from_data(data)" in ws_browser
|
||||
assert '"set_viewport"' in ws_browser
|
||||
assert "start_screencast" in ws_browser
|
||||
assert "read_screencast_frame" in ws_browser
|
||||
assert "stop_screencast" in ws_browser
|
||||
assert '"Page.startScreencast"' in runtime
|
||||
assert '"Page.screencastFrame"' in runtime
|
||||
assert '"Page.screencastFrameAck"' in runtime
|
||||
assert '"Page.stopScreencast"' in runtime
|
||||
assert '"Emulation.setDeviceMetricsOverride"' in runtime
|
||||
assert '"Emulation.setVisibleSize"' in runtime
|
||||
assert "asyncio.Queue(maxsize=1)" in runtime
|
||||
assert "await self._stop_screencasts_for_browser(resolved_id)" in runtime
|
||||
assert "queueFrameRender" in browser_store
|
||||
assert "requestAnimationFrame" in browser_store
|
||||
assert "viewport_width: initialViewport?.width" in browser_store
|
||||
assert "viewport_height: initialViewport?.height" in browser_store
|
||||
assert "this.frameState = data.state || null" not in browser_store
|
||||
assert "overflow: hidden;" in main_html
|
||||
assert "object-fit: fill;" not in main_html
|
||||
assert "height: auto;" in main_html
|
||||
assert "image-rendering: auto;" in main_html
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_browser_screencast_acknowledges_and_drops_stale_frames():
|
||||
first_image = (
|
||||
"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsL"
|
||||
"DBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/"
|
||||
"2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIy"
|
||||
"MjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAKAAoDASIAAhEBAxEB/8QAFQAB"
|
||||
"AAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAMAwEAAhADE"
|
||||
"AAAAKf/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/9oACAEBAAEFAqf/xAAUEQEAAAAAAAA"
|
||||
"AAAAAAAAAAAAA/9oACAEDAQE/ASP/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oACAECA"
|
||||
"QE/ASP/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/9oACAEBAAY/Aqf/xAAUEAEAAAAAAA"
|
||||
"AAAAAAAAAAAAAA/9oACAEBAAE/ISf/2gAMAwEAAgADAAAAEP/EABQRAQAAAAAAAAA"
|
||||
"AAAAAAAAAAP/aAAgBAwEBPxAk/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAgBAgEB"
|
||||
"PxAk/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/aAAgBAQABPxAn/9k="
|
||||
)
|
||||
|
||||
class FakeSession:
|
||||
def __init__(self):
|
||||
self.handlers = {}
|
||||
self.sent = []
|
||||
self.detached = False
|
||||
|
||||
def on(self, event, handler):
|
||||
self.handlers[event] = handler
|
||||
|
||||
async def send(self, method, params=None):
|
||||
self.sent.append((method, params or {}))
|
||||
|
||||
async def detach(self):
|
||||
self.detached = True
|
||||
|
||||
session = FakeSession()
|
||||
screencast = _BrowserScreencast(
|
||||
stream_id="stream",
|
||||
browser_id=7,
|
||||
session=session,
|
||||
mime="image/jpeg",
|
||||
)
|
||||
|
||||
await screencast.start(quality=92, every_nth_frame=1, viewport={"width": 1118, "height": 662})
|
||||
session.handlers["Page.screencastFrame"](
|
||||
{"data": first_image, "metadata": {"deviceWidth": 10}, "sessionId": 1}
|
||||
)
|
||||
session.handlers["Page.screencastFrame"](
|
||||
{"data": "second", "metadata": {"deviceWidth": 200}, "sessionId": 2}
|
||||
)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
frame = await screencast.next_frame(timeout=0.1)
|
||||
|
||||
assert frame["browser_id"] == 7
|
||||
assert frame["image"] == "second"
|
||||
assert frame["metadata"]["deviceWidth"] == 200
|
||||
assert ("Emulation.setDeviceMetricsOverride", {
|
||||
"width": 1118,
|
||||
"height": 662,
|
||||
"deviceScaleFactor": 1,
|
||||
"mobile": False,
|
||||
"dontSetVisibleSize": True,
|
||||
}) in session.sent
|
||||
assert ("Emulation.setVisibleSize", {"width": 1118, "height": 662}) in session.sent
|
||||
assert ("Page.screencastFrameAck", {"sessionId": 1}) in session.sent
|
||||
assert ("Page.screencastFrameAck", {"sessionId": 2}) in session.sent
|
||||
|
||||
await screencast.stop()
|
||||
|
||||
assert ("Page.stopScreencast", {}) in session.sent
|
||||
assert session.detached is True
|
||||
|
||||
|
||||
def test_browser_docker_installs_full_chromium_to_persistent_cache():
|
||||
script = (
|
||||
PROJECT_ROOT / "docker" / "run" / "fs" / "ins" / "install_playwright.sh"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue