Improve Linux Desktop state controls

Add a desktop_state helper, expanded desktopctl observe-act-verify commands, backend desktop_state support, Extra prompt state, and Xpra bridge diagnostics for the built-in Linux Desktop.

Update the Linux Desktop skill so agents prefer structured/app-native/keyboard workflows, treat coordinate clicks as last resort, and verify terminal or CLI-agent work with fresh final screenshots. Cover the behavior with focused Office desktop state, canvas setup, and office_session tests.
This commit is contained in:
Alessandro 2026-05-05 11:20:50 +02:00
parent 2398bd1601
commit 78570e5689
11 changed files with 1398 additions and 13 deletions

View file

@ -72,6 +72,14 @@ def test_document_canvas_uses_markdown_editor_and_official_libreoffice_desktop_f
assert "primeXpraDesktopFrame" in store
assert "normalizeXpraDesktopWindow" in store
assert "installXpraDesktopWheelBridge" in store
assert "installXpraDesktopAgentBridge" in store
assert "agentZeroDesktop" in store
assert 'callOffice("desktop_state"' in store
assert "desktopToClient" in store
assert "clientToDesktop" in store
assert "requestRefresh" in store
assert "_desktopBridgeReady" in store
assert "_desktopKeyboardCaptureState" in store
assert "installXpraDesktopKeyboardBridge" in store
assert "focusDesktopFrame" in store
assert "_desktopKeyboardActive" in store
@ -227,6 +235,10 @@ def test_official_libreoffice_desktop_route_and_packages_are_declared():
linux_desktopctl = (
PROJECT_ROOT / "plugins" / "_office" / "skills" / "linux-desktop" / "scripts" / "desktopctl.sh"
).read_text(encoding="utf-8")
desktop_state_helper = (
PROJECT_ROOT / "plugins" / "_office" / "helpers" / "desktop_state.py"
).read_text(encoding="utf-8")
hooks_py = (PROJECT_ROOT / "plugins" / "_office" / "hooks.py").read_text(encoding="utf-8")
linux_calc_helper = (
PROJECT_ROOT / "plugins" / "_office" / "skills" / "linux-desktop" / "scripts" / "calc_set_cell.py"
).read_text(encoding="utf-8")
@ -320,10 +332,42 @@ def test_official_libreoffice_desktop_route_and_packages_are_declared():
assert "/a0/usr/projects" in linux_desktop_skill
assert "desktopctl.sh" in linux_desktop_skill
assert "calc-set-cell" in linux_desktop_skill
assert "Clicks are explicitly last resort" in linux_desktop_skill or "clicks are explicitly last resort" in linux_desktop_skill
assert "fresh Desktop observation" in linux_desktop_skill
assert "observe --json --screenshot" in linux_desktop_skill
assert "Terminal And CLI Agent Verification" in linux_desktop_skill
assert "Do not report from an earlier screenshot path" in linux_desktop_skill
assert "screenshot path returned by that final observation" in linux_desktop_skill
assert "xdotool" in linux_desktopctl
assert "agent-zero-desktop" in linux_desktopctl
assert "launch_app" in linux_desktopctl
assert "paste_key_for_active_window" in linux_desktopctl
assert "active_window_is_terminal" in linux_desktopctl
assert "WM_CLASS" in linux_desktopctl
for command in (
"state)",
"observe)",
"screenshot)",
"active-window)",
"geometry)",
"wait-window)",
"scroll)",
"drag)",
"right-click)",
"paste-text)",
"sequence)",
):
assert command in linux_desktopctl
assert "calc_set_cell.py" in linux_desktopctl
assert "collect_state" in desktop_state_helper
assert "compact_prompt_context" in desktop_state_helper
assert "fresh final" in desktop_state_helper
assert "xwd" in desktop_state_helper
assert "PIL" in desktop_state_helper
assert '"x11-utils"' in hooks_py
assert '"x11-apps"' in hooks_py
assert '"xclip"' in hooks_py
assert '"python3-pil"' in hooks_py
assert "wait_for_document" in linux_calc_helper
assert "document.store()" in linux_calc_helper
assert "read_xlsx_cell" in linux_calc_helper
@ -419,6 +463,8 @@ def test_office_skills_preserve_markdown_first_and_opt_in_desktop_policy():
assert "Download and Open in canvas actions" in office_skill
assert "method: \"create\"" in office_skill
assert "The Desktop is opt-in" in desktop_skill
assert "coordinate clicks only as a last resort" in desktop_skill
assert "After any GUI action, verify" in desktop_skill
assert "custom Markdown editor" in desktop_skill
assert "Never open the Desktop/canvas automatically" in desktop_skill
assert "persistent Desktop runtime during initial startup" in desktop_skill
@ -432,3 +478,19 @@ def test_office_skills_preserve_markdown_first_and_opt_in_desktop_policy():
assert "must not open the canvas automatically" in excel_skill
assert '"format": "odp"' in presentation_skill
assert "must not open the canvas automatically" in presentation_skill
def test_office_extra_prompt_includes_existing_desktop_state_without_opening_canvas():
canvas_context = (
PROJECT_ROOT / "plugins" / "_office" / "helpers" / "canvas_context.py"
).read_text(encoding="utf-8")
prompt = (
PROJECT_ROOT / "plugins" / "_office" / "prompts" / "agent.extras.office_canvas.md"
).read_text(encoding="utf-8")
assert "build_desktop_context" in canvas_context
assert "session_manifest_exists" in canvas_context
assert "collect_state(include_screenshot=False)" in canvas_context
assert "compact_prompt_context" in canvas_context
assert "ensure_system_desktop" not in canvas_context
assert "[DOCUMENT CANVAS]" in prompt

View file

@ -0,0 +1,201 @@
from __future__ import annotations
import subprocess
import struct
import sys
import types
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from plugins._office.helpers import desktop_state
def _completed(command, returncode=0, stdout="", stderr=""):
return subprocess.CompletedProcess(command, returncode, stdout, stderr)
def test_desktop_state_collects_x11_state_from_mocked_tools(tmp_path, monkeypatch):
session_dir = tmp_path / "sessions"
profile_dir = tmp_path / "profiles" / desktop_state.SESSION_ID
session_dir.mkdir(parents=True)
profile_dir.mkdir(parents=True)
(session_dir / f"{desktop_state.SESSION_ID}.json").write_text(
'{"display": 120, "profile_dir": "%s"}' % profile_dir,
encoding="utf-8",
)
monkeypatch.setattr(desktop_state, "SESSION_DIR", session_dir)
monkeypatch.setattr(desktop_state, "PROFILE_DIR", tmp_path / "profiles")
monkeypatch.setattr(desktop_state, "SCREENSHOT_DIR", tmp_path / "screenshots")
monkeypatch.setattr(
desktop_state.shutil,
"which",
lambda name: f"/usr/bin/{name}"
if name in {"xdotool", "xrandr", "xwininfo", "xprop", "xwd", "xclip"}
else "",
)
def fake_run(command, **kwargs):
name = Path(command[0]).name
if name == "xrandr":
return _completed(command, stdout="Screen 0: current 1440 x 900, maximum 1920 x 1080\n")
if name == "xdotool" and command[1:3] == ["getmouselocation", "--shell"]:
return _completed(command, stdout="X=12\nY=34\nSCREEN=0\nWINDOW=111\n")
if name == "xdotool" and command[1] == "getactivewindow":
return _completed(command, stdout="111\n")
if name == "xdotool" and command[1] == "search":
return _completed(command, stdout="111\n222\n")
if name == "xdotool" and command[1] == "getwindowname":
return _completed(command, stdout={"111": "LibreOffice Calc", "222": "Terminal"}[command[2]] + "\n")
if name == "xwininfo":
geometry = {
"111": (5, 7, 800, 600),
"222": (20, 30, 640, 480),
}[command[2]]
return _completed(
command,
stdout=(
f" Absolute upper-left X: {geometry[0]}\n"
f" Absolute upper-left Y: {geometry[1]}\n"
f" Width: {geometry[2]}\n"
f" Height: {geometry[3]}\n"
),
)
if name == "xprop":
window_id = command[2]
if window_id == "111":
return _completed(
command,
stdout='WM_CLASS(STRING) = "libreoffice", "libreoffice-calc"\n_NET_WM_PID(CARDINAL) = 4242\n',
)
return _completed(
command,
stdout='WM_CLASS(STRING) = "xfce4-terminal", "Xfce4-terminal"\n_NET_WM_PID(CARDINAL) = 4343\n',
)
raise AssertionError(f"unexpected command: {command}")
monkeypatch.setattr(desktop_state.subprocess, "run", fake_run)
state = desktop_state.collect_state()
assert state["ok"] is True
assert state["display"] == ":120"
assert state["profile_dir"] == str(profile_dir)
assert state["size"] == {"width": 1440, "height": 900}
assert state["pointer"]["x"] == 12
assert state["active_window"]["title"] == "LibreOffice Calc"
assert state["active_window"]["class"] == "libreoffice-calc"
assert state["active_window"]["geometry"]["width"] == 800
assert [window["title"] for window in state["windows"]] == ["LibreOffice Calc", "Terminal"]
def test_desktop_state_screenshot_capture_uses_xwd_and_pillow_when_available(tmp_path, monkeypatch):
monkeypatch.setattr(desktop_state, "SCREENSHOT_DIR", tmp_path)
capabilities = {"xwd": "/usr/bin/xwd"}
env = {"DISPLAY": ":120"}
def fake_run(command, *, env, timeout):
raw_path = Path(command[command.index("-out") + 1])
raw_path.write_bytes(b"xwd")
return _completed(command)
image_module = types.ModuleType("PIL.Image")
class FakeImage:
width = 320
height = 240
def __enter__(self):
return self
def __exit__(self, *_args):
return False
def save(self, target):
Path(target).write_bytes(b"png")
image_module.open = lambda _path: FakeImage()
pil_module = types.ModuleType("PIL")
pil_module.Image = image_module
monkeypatch.setattr(desktop_state, "run", fake_run)
monkeypatch.setitem(sys.modules, "PIL", pil_module)
monkeypatch.setitem(sys.modules, "PIL.Image", image_module)
screenshot = desktop_state.capture_screenshot(env, capabilities, path=tmp_path / "shot.png", errors=[])
assert screenshot["ok"] is True
assert screenshot["path"] == str(tmp_path / "shot.png")
assert screenshot["format"] == "png"
assert (tmp_path / "shot.png").read_bytes() == b"png"
assert not (tmp_path / "shot.xwd").exists()
def test_xwd_fallback_parser_handles_truecolor_pixels(tmp_path, monkeypatch):
raw_path = tmp_path / "shot.xwd"
target = tmp_path / "shot.png"
header_values = [
100, # header_size
7, # file_version
2, # pixmap_format
24, # pixmap_depth
2, # pixmap_width
1, # pixmap_height
0, # xoffset
1, # byte_order: MSBFirst for pixel bytes
32, # bitmap_unit
1, # bitmap_bit_order
32, # bitmap_pad
32, # bits_per_pixel
8, # bytes_per_line
4, # visual_class: TrueColor
0x00FF0000, # red_mask
0x0000FF00, # green_mask
0x000000FF, # blue_mask
8, # bits_per_rgb
256, # colormap_entries
0, # ncolors
2, # window_width
1, # window_height
0, # window_x
0, # window_y
0, # window_bdrwidth
]
raw_path.write_bytes(
struct.pack(">25I", *header_values)
+ bytes.fromhex("00ff0000")
+ bytes.fromhex("0000ff00")
)
captured: dict[str, object] = {}
image_module = types.ModuleType("PIL.Image")
class FakeOutputImage:
def putdata(self, pixels):
captured["pixels"] = list(pixels)
def save(self, path):
Path(path).write_bytes(b"fallback-png")
def fake_new(mode, size):
captured["mode"] = mode
captured["size"] = size
return FakeOutputImage()
image_module.new = fake_new
pil_module = types.ModuleType("PIL")
pil_module.Image = image_module
monkeypatch.setitem(sys.modules, "PIL", pil_module)
monkeypatch.setitem(sys.modules, "PIL.Image", image_module)
converted = desktop_state.convert_xwd_to_image(raw_path, target)
assert converted == {"width": 2, "height": 1}
assert captured["mode"] == "RGB"
assert captured["size"] == (2, 1)
assert captured["pixels"] == [(255, 0, 0), (0, 255, 0)]
assert target.read_bytes() == b"fallback-png"

View file

@ -503,6 +503,57 @@ def test_official_libreoffice_desktop_status_and_url_contract(tmp_path, monkeypa
assert "printing=true" in url
def test_office_session_desktop_state_action_defaults_without_screenshot(monkeypatch):
api_module = types.ModuleType("helpers.api")
class ApiHandler:
def __init__(self, app=None, thread_lock=None):
self.app = app
self.thread_lock = thread_lock
api_module.ApiHandler = ApiHandler
api_module.Request = object
monkeypatch.setitem(sys.modules, "helpers.api", api_module)
monkeypatch.delitem(sys.modules, "plugins._office.api.office_session", raising=False)
from plugins._office.api import office_session
calls = []
class FakeManager:
def state(self, *, include_screenshot=False):
calls.append(include_screenshot)
return {
"ok": True,
"display": ":120",
"profile_dir": "/a0/tmp/_office/desktop/profiles/agent-zero-desktop",
"size": {"width": 1440, "height": 900},
"pointer": {"x": 0, "y": 0, "screen": 0, "window": 0},
"active_window": None,
"windows": [],
"screenshot": {"ok": False, "path": ""},
"capabilities": {},
"errors": [],
}
monkeypatch.setattr(office_session.libreoffice_desktop, "get_manager", lambda: FakeManager())
handler = office_session.OfficeSession(app=None, thread_lock=None)
request = types.SimpleNamespace(headers={}, host_url="http://localhost:32080")
default_result = asyncio.run(handler.process({"action": "desktop_state"}, request))
screenshot_result = asyncio.run(
handler.process({"action": "desktop_state", "include_screenshot": True}, request),
)
assert default_result["ok"] is True
assert screenshot_result["ok"] is True
assert calls == [False, True]
monkeypatch.delitem(sys.modules, "plugins._office.api.office_session", raising=False)
api_package = sys.modules.get("plugins._office.api")
if api_package is not None:
monkeypatch.delattr(api_package, "office_session", raising=False)
def test_official_libreoffice_desktop_manager_opens_binary_session(office_state, tmp_path, monkeypatch):
class FakeProcess:
pid = 4242