mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-21 18:51:38 +00:00
Improve Linux Desktop state controls
Add a desktop_state helper, expanded desktopctl observe-act-verify commands, backend desktop_state support, Extra prompt state, and Xpra bridge diagnostics for the built-in Linux Desktop. Update the Linux Desktop skill so agents prefer structured/app-native/keyboard workflows, treat coordinate clicks as last resort, and verify terminal or CLI-agent work with fresh final screenshots. Cover the behavior with focused Office desktop state, canvas setup, and office_session tests.
This commit is contained in:
parent
2398bd1601
commit
78570e5689
11 changed files with 1398 additions and 13 deletions
|
|
@ -72,6 +72,14 @@ def test_document_canvas_uses_markdown_editor_and_official_libreoffice_desktop_f
|
|||
assert "primeXpraDesktopFrame" in store
|
||||
assert "normalizeXpraDesktopWindow" in store
|
||||
assert "installXpraDesktopWheelBridge" in store
|
||||
assert "installXpraDesktopAgentBridge" in store
|
||||
assert "agentZeroDesktop" in store
|
||||
assert 'callOffice("desktop_state"' in store
|
||||
assert "desktopToClient" in store
|
||||
assert "clientToDesktop" in store
|
||||
assert "requestRefresh" in store
|
||||
assert "_desktopBridgeReady" in store
|
||||
assert "_desktopKeyboardCaptureState" in store
|
||||
assert "installXpraDesktopKeyboardBridge" in store
|
||||
assert "focusDesktopFrame" in store
|
||||
assert "_desktopKeyboardActive" in store
|
||||
|
|
@ -227,6 +235,10 @@ def test_official_libreoffice_desktop_route_and_packages_are_declared():
|
|||
linux_desktopctl = (
|
||||
PROJECT_ROOT / "plugins" / "_office" / "skills" / "linux-desktop" / "scripts" / "desktopctl.sh"
|
||||
).read_text(encoding="utf-8")
|
||||
desktop_state_helper = (
|
||||
PROJECT_ROOT / "plugins" / "_office" / "helpers" / "desktop_state.py"
|
||||
).read_text(encoding="utf-8")
|
||||
hooks_py = (PROJECT_ROOT / "plugins" / "_office" / "hooks.py").read_text(encoding="utf-8")
|
||||
linux_calc_helper = (
|
||||
PROJECT_ROOT / "plugins" / "_office" / "skills" / "linux-desktop" / "scripts" / "calc_set_cell.py"
|
||||
).read_text(encoding="utf-8")
|
||||
|
|
@ -320,10 +332,42 @@ def test_official_libreoffice_desktop_route_and_packages_are_declared():
|
|||
assert "/a0/usr/projects" in linux_desktop_skill
|
||||
assert "desktopctl.sh" in linux_desktop_skill
|
||||
assert "calc-set-cell" in linux_desktop_skill
|
||||
assert "Clicks are explicitly last resort" in linux_desktop_skill or "clicks are explicitly last resort" in linux_desktop_skill
|
||||
assert "fresh Desktop observation" in linux_desktop_skill
|
||||
assert "observe --json --screenshot" in linux_desktop_skill
|
||||
assert "Terminal And CLI Agent Verification" in linux_desktop_skill
|
||||
assert "Do not report from an earlier screenshot path" in linux_desktop_skill
|
||||
assert "screenshot path returned by that final observation" in linux_desktop_skill
|
||||
assert "xdotool" in linux_desktopctl
|
||||
assert "agent-zero-desktop" in linux_desktopctl
|
||||
assert "launch_app" in linux_desktopctl
|
||||
assert "paste_key_for_active_window" in linux_desktopctl
|
||||
assert "active_window_is_terminal" in linux_desktopctl
|
||||
assert "WM_CLASS" in linux_desktopctl
|
||||
for command in (
|
||||
"state)",
|
||||
"observe)",
|
||||
"screenshot)",
|
||||
"active-window)",
|
||||
"geometry)",
|
||||
"wait-window)",
|
||||
"scroll)",
|
||||
"drag)",
|
||||
"right-click)",
|
||||
"paste-text)",
|
||||
"sequence)",
|
||||
):
|
||||
assert command in linux_desktopctl
|
||||
assert "calc_set_cell.py" in linux_desktopctl
|
||||
assert "collect_state" in desktop_state_helper
|
||||
assert "compact_prompt_context" in desktop_state_helper
|
||||
assert "fresh final" in desktop_state_helper
|
||||
assert "xwd" in desktop_state_helper
|
||||
assert "PIL" in desktop_state_helper
|
||||
assert '"x11-utils"' in hooks_py
|
||||
assert '"x11-apps"' in hooks_py
|
||||
assert '"xclip"' in hooks_py
|
||||
assert '"python3-pil"' in hooks_py
|
||||
assert "wait_for_document" in linux_calc_helper
|
||||
assert "document.store()" in linux_calc_helper
|
||||
assert "read_xlsx_cell" in linux_calc_helper
|
||||
|
|
@ -419,6 +463,8 @@ def test_office_skills_preserve_markdown_first_and_opt_in_desktop_policy():
|
|||
assert "Download and Open in canvas actions" in office_skill
|
||||
assert "method: \"create\"" in office_skill
|
||||
assert "The Desktop is opt-in" in desktop_skill
|
||||
assert "coordinate clicks only as a last resort" in desktop_skill
|
||||
assert "After any GUI action, verify" in desktop_skill
|
||||
assert "custom Markdown editor" in desktop_skill
|
||||
assert "Never open the Desktop/canvas automatically" in desktop_skill
|
||||
assert "persistent Desktop runtime during initial startup" in desktop_skill
|
||||
|
|
@ -432,3 +478,19 @@ def test_office_skills_preserve_markdown_first_and_opt_in_desktop_policy():
|
|||
assert "must not open the canvas automatically" in excel_skill
|
||||
assert '"format": "odp"' in presentation_skill
|
||||
assert "must not open the canvas automatically" in presentation_skill
|
||||
|
||||
|
||||
def test_office_extra_prompt_includes_existing_desktop_state_without_opening_canvas():
|
||||
canvas_context = (
|
||||
PROJECT_ROOT / "plugins" / "_office" / "helpers" / "canvas_context.py"
|
||||
).read_text(encoding="utf-8")
|
||||
prompt = (
|
||||
PROJECT_ROOT / "plugins" / "_office" / "prompts" / "agent.extras.office_canvas.md"
|
||||
).read_text(encoding="utf-8")
|
||||
|
||||
assert "build_desktop_context" in canvas_context
|
||||
assert "session_manifest_exists" in canvas_context
|
||||
assert "collect_state(include_screenshot=False)" in canvas_context
|
||||
assert "compact_prompt_context" in canvas_context
|
||||
assert "ensure_system_desktop" not in canvas_context
|
||||
assert "[DOCUMENT CANVAS]" in prompt
|
||||
|
|
|
|||
201
tests/test_office_desktop_state.py
Normal file
201
tests/test_office_desktop_state.py
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import struct
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from plugins._office.helpers import desktop_state
|
||||
|
||||
|
||||
def _completed(command, returncode=0, stdout="", stderr=""):
|
||||
return subprocess.CompletedProcess(command, returncode, stdout, stderr)
|
||||
|
||||
|
||||
def test_desktop_state_collects_x11_state_from_mocked_tools(tmp_path, monkeypatch):
|
||||
session_dir = tmp_path / "sessions"
|
||||
profile_dir = tmp_path / "profiles" / desktop_state.SESSION_ID
|
||||
session_dir.mkdir(parents=True)
|
||||
profile_dir.mkdir(parents=True)
|
||||
(session_dir / f"{desktop_state.SESSION_ID}.json").write_text(
|
||||
'{"display": 120, "profile_dir": "%s"}' % profile_dir,
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(desktop_state, "SESSION_DIR", session_dir)
|
||||
monkeypatch.setattr(desktop_state, "PROFILE_DIR", tmp_path / "profiles")
|
||||
monkeypatch.setattr(desktop_state, "SCREENSHOT_DIR", tmp_path / "screenshots")
|
||||
monkeypatch.setattr(
|
||||
desktop_state.shutil,
|
||||
"which",
|
||||
lambda name: f"/usr/bin/{name}"
|
||||
if name in {"xdotool", "xrandr", "xwininfo", "xprop", "xwd", "xclip"}
|
||||
else "",
|
||||
)
|
||||
|
||||
def fake_run(command, **kwargs):
|
||||
name = Path(command[0]).name
|
||||
if name == "xrandr":
|
||||
return _completed(command, stdout="Screen 0: current 1440 x 900, maximum 1920 x 1080\n")
|
||||
if name == "xdotool" and command[1:3] == ["getmouselocation", "--shell"]:
|
||||
return _completed(command, stdout="X=12\nY=34\nSCREEN=0\nWINDOW=111\n")
|
||||
if name == "xdotool" and command[1] == "getactivewindow":
|
||||
return _completed(command, stdout="111\n")
|
||||
if name == "xdotool" and command[1] == "search":
|
||||
return _completed(command, stdout="111\n222\n")
|
||||
if name == "xdotool" and command[1] == "getwindowname":
|
||||
return _completed(command, stdout={"111": "LibreOffice Calc", "222": "Terminal"}[command[2]] + "\n")
|
||||
if name == "xwininfo":
|
||||
geometry = {
|
||||
"111": (5, 7, 800, 600),
|
||||
"222": (20, 30, 640, 480),
|
||||
}[command[2]]
|
||||
return _completed(
|
||||
command,
|
||||
stdout=(
|
||||
f" Absolute upper-left X: {geometry[0]}\n"
|
||||
f" Absolute upper-left Y: {geometry[1]}\n"
|
||||
f" Width: {geometry[2]}\n"
|
||||
f" Height: {geometry[3]}\n"
|
||||
),
|
||||
)
|
||||
if name == "xprop":
|
||||
window_id = command[2]
|
||||
if window_id == "111":
|
||||
return _completed(
|
||||
command,
|
||||
stdout='WM_CLASS(STRING) = "libreoffice", "libreoffice-calc"\n_NET_WM_PID(CARDINAL) = 4242\n',
|
||||
)
|
||||
return _completed(
|
||||
command,
|
||||
stdout='WM_CLASS(STRING) = "xfce4-terminal", "Xfce4-terminal"\n_NET_WM_PID(CARDINAL) = 4343\n',
|
||||
)
|
||||
raise AssertionError(f"unexpected command: {command}")
|
||||
|
||||
monkeypatch.setattr(desktop_state.subprocess, "run", fake_run)
|
||||
|
||||
state = desktop_state.collect_state()
|
||||
|
||||
assert state["ok"] is True
|
||||
assert state["display"] == ":120"
|
||||
assert state["profile_dir"] == str(profile_dir)
|
||||
assert state["size"] == {"width": 1440, "height": 900}
|
||||
assert state["pointer"]["x"] == 12
|
||||
assert state["active_window"]["title"] == "LibreOffice Calc"
|
||||
assert state["active_window"]["class"] == "libreoffice-calc"
|
||||
assert state["active_window"]["geometry"]["width"] == 800
|
||||
assert [window["title"] for window in state["windows"]] == ["LibreOffice Calc", "Terminal"]
|
||||
|
||||
|
||||
def test_desktop_state_screenshot_capture_uses_xwd_and_pillow_when_available(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(desktop_state, "SCREENSHOT_DIR", tmp_path)
|
||||
capabilities = {"xwd": "/usr/bin/xwd"}
|
||||
env = {"DISPLAY": ":120"}
|
||||
|
||||
def fake_run(command, *, env, timeout):
|
||||
raw_path = Path(command[command.index("-out") + 1])
|
||||
raw_path.write_bytes(b"xwd")
|
||||
return _completed(command)
|
||||
|
||||
image_module = types.ModuleType("PIL.Image")
|
||||
|
||||
class FakeImage:
|
||||
width = 320
|
||||
height = 240
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *_args):
|
||||
return False
|
||||
|
||||
def save(self, target):
|
||||
Path(target).write_bytes(b"png")
|
||||
|
||||
image_module.open = lambda _path: FakeImage()
|
||||
pil_module = types.ModuleType("PIL")
|
||||
pil_module.Image = image_module
|
||||
|
||||
monkeypatch.setattr(desktop_state, "run", fake_run)
|
||||
monkeypatch.setitem(sys.modules, "PIL", pil_module)
|
||||
monkeypatch.setitem(sys.modules, "PIL.Image", image_module)
|
||||
|
||||
screenshot = desktop_state.capture_screenshot(env, capabilities, path=tmp_path / "shot.png", errors=[])
|
||||
|
||||
assert screenshot["ok"] is True
|
||||
assert screenshot["path"] == str(tmp_path / "shot.png")
|
||||
assert screenshot["format"] == "png"
|
||||
assert (tmp_path / "shot.png").read_bytes() == b"png"
|
||||
assert not (tmp_path / "shot.xwd").exists()
|
||||
|
||||
|
||||
def test_xwd_fallback_parser_handles_truecolor_pixels(tmp_path, monkeypatch):
|
||||
raw_path = tmp_path / "shot.xwd"
|
||||
target = tmp_path / "shot.png"
|
||||
header_values = [
|
||||
100, # header_size
|
||||
7, # file_version
|
||||
2, # pixmap_format
|
||||
24, # pixmap_depth
|
||||
2, # pixmap_width
|
||||
1, # pixmap_height
|
||||
0, # xoffset
|
||||
1, # byte_order: MSBFirst for pixel bytes
|
||||
32, # bitmap_unit
|
||||
1, # bitmap_bit_order
|
||||
32, # bitmap_pad
|
||||
32, # bits_per_pixel
|
||||
8, # bytes_per_line
|
||||
4, # visual_class: TrueColor
|
||||
0x00FF0000, # red_mask
|
||||
0x0000FF00, # green_mask
|
||||
0x000000FF, # blue_mask
|
||||
8, # bits_per_rgb
|
||||
256, # colormap_entries
|
||||
0, # ncolors
|
||||
2, # window_width
|
||||
1, # window_height
|
||||
0, # window_x
|
||||
0, # window_y
|
||||
0, # window_bdrwidth
|
||||
]
|
||||
raw_path.write_bytes(
|
||||
struct.pack(">25I", *header_values)
|
||||
+ bytes.fromhex("00ff0000")
|
||||
+ bytes.fromhex("0000ff00")
|
||||
)
|
||||
|
||||
captured: dict[str, object] = {}
|
||||
image_module = types.ModuleType("PIL.Image")
|
||||
|
||||
class FakeOutputImage:
|
||||
def putdata(self, pixels):
|
||||
captured["pixels"] = list(pixels)
|
||||
|
||||
def save(self, path):
|
||||
Path(path).write_bytes(b"fallback-png")
|
||||
|
||||
def fake_new(mode, size):
|
||||
captured["mode"] = mode
|
||||
captured["size"] = size
|
||||
return FakeOutputImage()
|
||||
|
||||
image_module.new = fake_new
|
||||
pil_module = types.ModuleType("PIL")
|
||||
pil_module.Image = image_module
|
||||
|
||||
monkeypatch.setitem(sys.modules, "PIL", pil_module)
|
||||
monkeypatch.setitem(sys.modules, "PIL.Image", image_module)
|
||||
|
||||
converted = desktop_state.convert_xwd_to_image(raw_path, target)
|
||||
|
||||
assert converted == {"width": 2, "height": 1}
|
||||
assert captured["mode"] == "RGB"
|
||||
assert captured["size"] == (2, 1)
|
||||
assert captured["pixels"] == [(255, 0, 0), (0, 255, 0)]
|
||||
assert target.read_bytes() == b"fallback-png"
|
||||
|
|
@ -503,6 +503,57 @@ def test_official_libreoffice_desktop_status_and_url_contract(tmp_path, monkeypa
|
|||
assert "printing=true" in url
|
||||
|
||||
|
||||
def test_office_session_desktop_state_action_defaults_without_screenshot(monkeypatch):
|
||||
api_module = types.ModuleType("helpers.api")
|
||||
|
||||
class ApiHandler:
|
||||
def __init__(self, app=None, thread_lock=None):
|
||||
self.app = app
|
||||
self.thread_lock = thread_lock
|
||||
|
||||
api_module.ApiHandler = ApiHandler
|
||||
api_module.Request = object
|
||||
monkeypatch.setitem(sys.modules, "helpers.api", api_module)
|
||||
monkeypatch.delitem(sys.modules, "plugins._office.api.office_session", raising=False)
|
||||
|
||||
from plugins._office.api import office_session
|
||||
|
||||
calls = []
|
||||
|
||||
class FakeManager:
|
||||
def state(self, *, include_screenshot=False):
|
||||
calls.append(include_screenshot)
|
||||
return {
|
||||
"ok": True,
|
||||
"display": ":120",
|
||||
"profile_dir": "/a0/tmp/_office/desktop/profiles/agent-zero-desktop",
|
||||
"size": {"width": 1440, "height": 900},
|
||||
"pointer": {"x": 0, "y": 0, "screen": 0, "window": 0},
|
||||
"active_window": None,
|
||||
"windows": [],
|
||||
"screenshot": {"ok": False, "path": ""},
|
||||
"capabilities": {},
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
monkeypatch.setattr(office_session.libreoffice_desktop, "get_manager", lambda: FakeManager())
|
||||
handler = office_session.OfficeSession(app=None, thread_lock=None)
|
||||
request = types.SimpleNamespace(headers={}, host_url="http://localhost:32080")
|
||||
|
||||
default_result = asyncio.run(handler.process({"action": "desktop_state"}, request))
|
||||
screenshot_result = asyncio.run(
|
||||
handler.process({"action": "desktop_state", "include_screenshot": True}, request),
|
||||
)
|
||||
|
||||
assert default_result["ok"] is True
|
||||
assert screenshot_result["ok"] is True
|
||||
assert calls == [False, True]
|
||||
monkeypatch.delitem(sys.modules, "plugins._office.api.office_session", raising=False)
|
||||
api_package = sys.modules.get("plugins._office.api")
|
||||
if api_package is not None:
|
||||
monkeypatch.delattr(api_package, "office_session", raising=False)
|
||||
|
||||
|
||||
def test_official_libreoffice_desktop_manager_opens_binary_session(office_state, tmp_path, monkeypatch):
|
||||
class FakeProcess:
|
||||
pid = 4242
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue