connector: gate remote tool guidance on active permissions

Move the heavy remote-tool operating guidance out of the always-on tool prompts
and inject it only when the current context can actually use those tools.

- add extras prompts for computer_use_remote, code_execution_remote, and text_editor_remote
- trim the base tool prompts down to the stable contract and minimal notes
- inject detailed guidance from message-loop extensions instead of always paying the token cost
- store remote_files and remote_exec hello metadata alongside computer_use metadata
- make code_execution_remote follow the real F4 exec-enabled state
- make text_editor_remote follow the real F3 read-only vs read-write state
- surface read-only mode in the injected text-editor guidance and suppress write guidance there
- keep legacy fallback behavior for older CLIs that do not yet advertise the new hello metadata
This commit is contained in:
Alessandro 2026-04-19 22:06:13 +02:00
parent bdf9cad447
commit a5d733c85f
13 changed files with 463 additions and 146 deletions

View file

@ -13,6 +13,8 @@ from plugins._a0_connector.helpers.event_bridge import get_context_log_entries
from plugins._a0_connector.helpers.ws_runtime import (
clear_remote_tree_snapshot,
clear_sid_computer_use_metadata,
clear_sid_remote_exec_metadata,
clear_sid_remote_file_metadata,
fail_pending_computer_use_ops_for_sid,
fail_pending_file_ops_for_sid,
fail_pending_exec_ops_for_sid,
@ -22,6 +24,8 @@ from plugins._a0_connector.helpers.ws_runtime import (
resolve_pending_file_op,
store_remote_tree_snapshot,
store_sid_computer_use_metadata,
store_sid_remote_exec_metadata,
store_sid_remote_file_metadata,
subscribe_sid_to_context,
subscribed_contexts_for_sid,
subscribed_sids_for_context,
@ -81,6 +85,8 @@ class WsConnector(WsHandler):
error="CLI disconnected before completing the requested computer-use operation",
)
clear_sid_computer_use_metadata(sid)
clear_sid_remote_file_metadata(sid)
clear_sid_remote_exec_metadata(sid)
PrintStyle.debug(f"[a0-connector] /ws disconnected: {sid}")
async def process(
@ -91,10 +97,20 @@ class WsConnector(WsHandler):
) -> dict[str, Any] | WsResult | None:
if event == "connector_hello":
computer_use = data.get("computer_use")
remote_files = data.get("remote_files")
remote_exec = data.get("remote_exec")
if isinstance(computer_use, dict):
store_sid_computer_use_metadata(sid, computer_use)
else:
clear_sid_computer_use_metadata(sid)
if isinstance(remote_files, dict):
store_sid_remote_file_metadata(sid, remote_files)
else:
clear_sid_remote_file_metadata(sid)
if isinstance(remote_exec, dict):
store_sid_remote_exec_metadata(sid, remote_exec)
else:
clear_sid_remote_exec_metadata(sid)
return {
"protocol": PROTOCOL_VERSION,
"features": WS_FEATURES,

View file

@ -0,0 +1,50 @@
from __future__ import annotations
from agent import LoopData
from helpers.extension import Extension
from plugins._a0_connector.helpers.ws_runtime import (
computer_use_metadata_for_sid,
select_computer_use_target_sid,
)
class IncludeComputerUseRemote(Extension):
async def execute(self, loop_data: LoopData = LoopData(), **kwargs):
if not self.agent:
return
context_id = getattr(self.agent.context, "id", "")
if not context_id:
return
sid = select_computer_use_target_sid(context_id)
if not sid:
return
metadata = computer_use_metadata_for_sid(sid)
if not metadata or not metadata.get("supported") or not metadata.get("enabled"):
return
backend_id = str(metadata.get("backend_id") or "").strip() or "unknown"
backend_family = str(metadata.get("backend_family") or "").strip()
backend = backend_id if not backend_family else f"{backend_id}/{backend_family}"
trust_mode = str(metadata.get("trust_mode") or "").strip() or "unknown"
support_reason = str(metadata.get("support_reason") or "").strip() or "No support details available."
features_value = metadata.get("features")
if isinstance(features_value, (list, tuple)):
features = ", ".join(str(item).strip() for item in features_value if str(item).strip())
else:
features = ""
if not features:
features = "none advertised"
prompt = self.agent.read_prompt(
"agent.extras.computer_use_remote.md",
backend=backend,
trust_mode=trust_mode,
features=features,
support_reason=support_reason,
)
loop_data.extras_temporary["computer_use_remote"] = prompt

View file

@ -0,0 +1,48 @@
from __future__ import annotations
from agent import LoopData
from helpers.extension import Extension
from plugins._a0_connector.helpers.exec_config import build_exec_config
from plugins._a0_connector.helpers.ws_runtime import select_remote_exec_target_sid
def _format_timeouts(payload: dict[str, int]) -> str:
return ", ".join(f"{key}={value}" for key, value in payload.items()) or "none"
def _format_patterns(value: object) -> str:
if isinstance(value, (list, tuple)):
items = [str(item).strip() for item in value if str(item).strip()]
else:
items = []
return ", ".join(items) or "none"
class IncludeCodeExecutionRemote(Extension):
async def execute(self, loop_data: LoopData = LoopData(), **kwargs):
if not self.agent:
return
context_id = getattr(self.agent.context, "id", "")
if not context_id or not select_remote_exec_target_sid(context_id):
return
exec_config = build_exec_config(agent=self.agent)
code_exec_timeouts = exec_config.get("code_exec_timeouts")
output_timeouts = exec_config.get("output_timeouts")
prompt_patterns = exec_config.get("prompt_patterns")
dialog_patterns = exec_config.get("dialog_patterns")
prompt = self.agent.read_prompt(
"agent.extras.code_execution_remote.md",
code_exec_timeouts=_format_timeouts(
code_exec_timeouts if isinstance(code_exec_timeouts, dict) else {}
),
output_timeouts=_format_timeouts(
output_timeouts if isinstance(output_timeouts, dict) else {}
),
prompt_patterns=_format_patterns(prompt_patterns),
dialog_patterns=_format_patterns(dialog_patterns),
)
loop_data.extras_temporary["code_execution_remote"] = prompt

View file

@ -0,0 +1,98 @@
from __future__ import annotations
from agent import LoopData
from helpers.extension import Extension
from plugins._a0_connector.helpers.ws_runtime import (
remote_file_metadata_for_sid,
select_remote_file_target_sid,
)
class IncludeTextEditorRemote(Extension):
async def execute(self, loop_data: LoopData = LoopData(), **kwargs):
if not self.agent:
return
context_id = getattr(self.agent.context, "id", "")
if not context_id:
return
sid = select_remote_file_target_sid(context_id)
if not sid:
return
metadata = remote_file_metadata_for_sid(sid)
if metadata is None:
access_mode = "Read&Write (legacy/unknown)"
write_guidance = (
"- Writes and patches are expected to be available, but this CLI did not "
"advertise an explicit F3 access mode."
)
write_examples = """```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "write",
"path": "/path/on/remote/machine/file.py",
"content": "import os\\nprint('hello')\\n"
}
}
```
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "patch",
"path": "/path/on/remote/machine/file.py",
"edits": [
{"from": 5, "to": 5, "content": " if x == 2:\\n"}
]
}
}
```"""
elif metadata.get("write_enabled"):
access_mode = "Read&Write"
write_guidance = (
"- Use `write` only when replacing or creating the full file is the right operation.\n"
"- Use `patch` for surgical line-range edits. Keep the edit set tight and based on the latest remote read.\n"
"- Freshness-aware patching may reject stale edits. If a patch requires a reread, read the file again and then retry with updated ranges."
)
write_examples = """```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "write",
"path": "/path/on/remote/machine/file.py",
"content": "import os\\nprint('hello')\\n"
}
}
```
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "patch",
"path": "/path/on/remote/machine/file.py",
"edits": [
{"from": 5, "to": 5, "content": " if x == 2:\\n"}
]
}
}
```"""
else:
access_mode = "Read only"
write_guidance = (
"- Writes and patches are disabled in this CLI session. Press F3 to switch the host machine to Read&Write before attempting `write` or `patch`."
)
write_examples = ""
prompt = self.agent.read_prompt(
"agent.extras.text_editor_remote.md",
access_mode=access_mode,
write_guidance=write_guidance,
write_examples=write_examples,
)
loop_data.extras_temporary["text_editor_remote"] = prompt

View file

@ -51,6 +51,20 @@ class ComputerUseMetadata:
updated_at: float
@dataclass(frozen=True)
class RemoteFileMetadata:
enabled: bool
write_enabled: bool
mode: str
updated_at: float
@dataclass(frozen=True)
class RemoteExecMetadata:
enabled: bool
updated_at: float
_context_subscriptions: dict[str, set[str]] = {}
_sid_contexts: dict[str, set[str]] = {}
_pending_file_ops: dict[str, PendingFileOperation] = {}
@ -58,6 +72,8 @@ _pending_exec_ops: dict[str, PendingExecOperation] = {}
_pending_computer_use_ops: dict[str, PendingComputerUseOperation] = {}
_remote_tree_snapshots: dict[str, RemoteTreeSnapshot] = {}
_sid_computer_use_metadata: dict[str, ComputerUseMetadata] = {}
_sid_remote_file_metadata: dict[str, RemoteFileMetadata] = {}
_sid_remote_exec_metadata: dict[str, RemoteExecMetadata] = {}
_state_lock = threading.RLock()
@ -71,6 +87,8 @@ def unregister_sid(sid: str) -> set[str]:
contexts = _sid_contexts.pop(sid, set())
_remote_tree_snapshots.pop(sid, None)
_sid_computer_use_metadata.pop(sid, None)
_sid_remote_file_metadata.pop(sid, None)
_sid_remote_exec_metadata.pop(sid, None)
for context_id in contexts:
subscribers = _context_subscriptions.get(context_id)
if not subscribers:
@ -167,6 +185,99 @@ def select_target_sid(context_id: str) -> str | None:
return sorted(subscribers)[0]
def store_sid_remote_file_metadata(sid: str, payload: dict[str, Any]) -> RemoteFileMetadata:
write_enabled = bool(payload.get("write_enabled"))
mode = str(payload.get("mode", "") or "").strip().lower()
if mode not in {"read_only", "read_write"}:
mode = "read_write" if write_enabled else "read_only"
metadata = RemoteFileMetadata(
enabled=bool(payload.get("enabled", True)),
write_enabled=write_enabled,
mode=mode,
updated_at=time.time(),
)
with _state_lock:
_sid_remote_file_metadata[sid] = metadata
return metadata
def clear_sid_remote_file_metadata(sid: str) -> None:
with _state_lock:
_sid_remote_file_metadata.pop(sid, None)
def remote_file_metadata_for_sid(sid: str) -> dict[str, Any] | None:
with _state_lock:
metadata = _sid_remote_file_metadata.get(sid)
if metadata is None:
return None
return {
"enabled": metadata.enabled,
"write_enabled": metadata.write_enabled,
"mode": metadata.mode,
"updated_at": metadata.updated_at,
}
def select_remote_file_target_sid(context_id: str, *, require_writes: bool = False) -> str | None:
with _state_lock:
subscribers = sorted(_context_subscriptions.get(context_id, set()))
fallback_sid: str | None = None
for sid in subscribers:
metadata = _sid_remote_file_metadata.get(sid)
if metadata is None:
if fallback_sid is None:
fallback_sid = sid
continue
if not metadata.enabled:
continue
if require_writes and not metadata.write_enabled:
continue
return sid
return fallback_sid
def store_sid_remote_exec_metadata(sid: str, payload: dict[str, Any]) -> RemoteExecMetadata:
metadata = RemoteExecMetadata(
enabled=bool(payload.get("enabled")),
updated_at=time.time(),
)
with _state_lock:
_sid_remote_exec_metadata[sid] = metadata
return metadata
def clear_sid_remote_exec_metadata(sid: str) -> None:
with _state_lock:
_sid_remote_exec_metadata.pop(sid, None)
def remote_exec_metadata_for_sid(sid: str) -> dict[str, Any] | None:
with _state_lock:
metadata = _sid_remote_exec_metadata.get(sid)
if metadata is None:
return None
return {
"enabled": metadata.enabled,
"updated_at": metadata.updated_at,
}
def select_remote_exec_target_sid(context_id: str) -> str | None:
with _state_lock:
subscribers = sorted(_context_subscriptions.get(context_id, set()))
fallback_sid: str | None = None
for sid in subscribers:
metadata = _sid_remote_exec_metadata.get(sid)
if metadata is None:
if fallback_sid is None:
fallback_sid = sid
continue
if metadata.enabled:
return sid
return fallback_sid
def store_sid_computer_use_metadata(sid: str, payload: dict[str, Any]) -> ComputerUseMetadata:
features_value = payload.get("features")
if isinstance(features_value, (list, tuple)):

View file

@ -0,0 +1,52 @@
## code_execution_remote guidance
Remote code execution is currently available in this context through the connected CLI.
Execution config:
- code execution timeouts: `{{code_exec_timeouts}}`
- output polling timeouts: `{{output_timeouts}}`
- prompt patterns: `{{prompt_patterns}}`
- dialog patterns: `{{dialog_patterns}}`
- Use this tool for shell-backed execution on the remote CLI machine, not on the Agent Zero server.
- Session ids are frontend-local and persistent across calls. Reuse the same `session` when continuing a workflow.
- Use `runtime=terminal` for shell commands, `runtime=python` for Python snippets, and `runtime=nodejs` for Node.js snippets.
- Use `runtime=output` to poll a running session after a prior call returned before the shell settled.
- Use `runtime=reset` when a session is stuck or you need a clean shell.
- `runtime=input` is only a deprecated compatibility alias for sending one line of keyboard input into a running shell session.
- Frontend execution may still be locally disabled in the CLI session. If so, expect a structured `{ok: false}` error instead of a fallback runtime.
- Prefer concise, self-checking commands. For multi-step work, inspect output and continue in the same session instead of restarting from scratch.
Examples:
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "terminal",
"session": 0,
"code": "pwd && ls -la"
}
}
```
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "python",
"session": 0,
"code": "import os\nprint(os.getcwd())"
}
}
```
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "output",
"session": 0
}
}
```

View file

@ -0,0 +1,21 @@
## computer_use_remote guidance
Computer use is currently available in this context.
Backend: `{{backend}}`
Trust mode: `{{trust_mode}}`
Features: `{{features}}`
Support note: `{{support_reason}}`
- Use this for local desktop and native UI tasks on the connected machine.
- If the task is browser-only and the user is flexible, prefer `browser_agent` because it is usually more reliable and token-efficient than screenshot-driven desktop control.
- Use `start_session` before interactive desktop actions. `status` is for inspection; `stop_session` ends the session.
- Base every decision on the latest screenshot or a definitive tool result, not memory.
- Successful `start_session`, `move`, `click`, `scroll`, `key`, and `type` calls already attach a fresh screenshot.
- Use `capture` only when you need a screen refresh without taking another action.
- Prefer keyboard actions over pointer actions when there is a reliable keyboard path.
- Treat menus and popups as transient UI. If a click dismisses one without visible progress, treat that attempt as failed and switch approach.
- If the same approach has already failed twice without visible progress, stop repeating it and try a different strategy.
- For browser work done through this tool, only claim success when the page content area visibly shows the expected destination or result.
- Use `type(..., submit=true)` only for navigation-style entry such as an address bar or command box. For ordinary text fields, type first and send `enter` separately only if needed.
- In `free_run`, do not expect a fresh approval prompt. If silent restore is no longer valid, expect `COMPUTER_USE_REARM_REQUIRED`.
- Treat user interventions as high-priority control signals. If the user says `stop`, `pause`, `abort`, `hold`, `don't continue`, or equivalent, stop using computer-use tools until the user explicitly resumes.

View file

@ -0,0 +1,25 @@
## text_editor_remote guidance
Remote file editing is currently available in this context through the connected CLI.
Current access mode: `{{access_mode}}`
- Use `text_editor_remote` when the user asks you to edit files on their local machine while connected via the CLI.
- Paths are evaluated on the remote CLI machine's filesystem, not on the Agent Zero server.
- Prefer `read` before `patch` so you have current line numbers and freshness metadata.
- `read` is always the safest first step for inspecting the local file.
{{write_guidance}}
Examples:
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "read",
"path": "/path/on/remote/machine/file.py",
"line_from": 1,
"line_to": 50
}
}
```
{{write_examples}}

View file

@ -1,9 +1,8 @@
# code_execution_remote tool
This tool runs shell-backed execution on the **remote machine where the CLI is running**.
It converges onto Agent Zero Core's persistent local-shell model, so the frontend session
can execute terminal commands and shell-launched `python` / `nodejs` snippets while keeping
session ids stable across calls.
Detailed usage guidance is injected separately only when the current context has a
subscribed CLI, so the base system prompt stays small when remote execution is not in play.
## Requirements
- A CLI client must be connected to this context via the shared `/ws` namespace.
@ -22,79 +21,6 @@ Runtime-specific fields:
- `input`: requires `keyboard` (or `code` as fallback)
- `reset`: optional `reason`
## Usage
### Execute a terminal command
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "terminal",
"session": 0,
"code": "pwd && ls -la"
}
}
```
### Execute Python through the shell-backed runtime
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "python",
"session": 0,
"code": "import os\nprint(os.getcwd())"
}
}
```
### Execute Node.js through the shell-backed runtime
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "nodejs",
"session": 0,
"code": "console.log(process.cwd())"
}
}
```
### Poll output from a running session
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "output",
"session": 0
}
}
```
### Send keyboard input to a running session
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "input",
"session": 0,
"keyboard": "yes"
}
}
```
### Reset a session
```json
{
"tool_name": "code_execution_remote",
"tool_args": {
"runtime": "reset",
"session": 0,
"reason": "stuck process"
}
}
```
## Notes
- Session state is frontend-local and shell-backed.
- `output` is for long-running operations where a prior call returned control before the

View file

@ -2,25 +2,12 @@
Use the connected CLI host machine as a local desktop target.
## Preferred Scope
- Use this for local desktop and native UI tasks on the connected machine.
- For ordinary website browsing, search, form filling, and web downloads, prefer `browser_agent`.
- If the user is flexible and the task is browser-only, briefly guide them toward browser tools because they are usually more reliable and token-efficient than screenshot-driven computer use.
- Before doing real computer-use work, load the `computer-use-remote` skill and follow it.
This tool is only usable when the current context has a subscribed CLI with enabled local computer use.
Detailed operating guidance is injected separately only when that condition is true, so the base system prompt stays small when computer use is not in play.
## Requirements
- A CLI client must be connected to this context via the shared `/ws` namespace.
- The CLI must advertise `computer_use_remote` support and local computer use must be enabled there.
- In `free_run`, do not expect a fresh approval prompt. If restore is no longer valid, the tool will surface `COMPUTER_USE_REARM_REQUIRED`.
## Minimal Rules
- Treat user interventions as high-priority control signals.
- If the user says `stop`, `pause`, `abort`, `hold`, `don't continue`, or equivalent, halt immediately and do not use computer-use tools again until the user explicitly resumes.
- Call `start_session` first. It automatically attaches the current screen.
- Decide from the latest screenshot, not from memory.
- Interactive actions (`move`, `click`, `scroll`, `key`, `type`) automatically attach a fresh screenshot after they run.
- Use `capture` only when you need another screen refresh without taking an action.
- Prefer keyboard actions over pointer actions whenever a reliable keyboard path exists.
## Arguments
- `action`: one of `start_session`, `status`, `capture`, `move`, `click`, `scroll`, `key`, `type`, `stop_session`
@ -32,3 +19,7 @@ Action-specific fields:
- `scroll`: `dx`, `dy`
- `key`: `key` or `keys`
- `type`: `text`, optional `submit` boolean
## Runtime Notes
- Successful `start_session`, `move`, `click`, `scroll`, `key`, and `type` calls automatically attach a fresh screenshot.
- `status` reports the current computer-use state without starting a session.

View file

@ -2,59 +2,18 @@
This tool allows you to read, write, and patch files on the **remote machine where the CLI is running**.
This is different from `text_editor` which operates on the Agent Zero server's filesystem.
Use `text_editor_remote` when the user asks you to edit files on their local machine while connected via the CLI.
Detailed usage guidance is injected separately only when the current context has a
subscribed CLI, so the base system prompt stays small when remote editing is not in play.
## Requirements
- A CLI client must be connected to this context via the shared `/ws` namespace.
- The CLI client must have enabled remote file editing support.
## Operations
### Read a file
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "read",
"path": "/path/on/remote/machine/file.py",
"line_from": 1,
"line_to": 50
}
}
```
Returns file content with line numbers. `line_from` and `line_to` are optional.
### Write a file
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "write",
"path": "/path/on/remote/machine/file.py",
"content": "import os\nprint('hello')\n"
}
}
```
Creates or overwrites the file on the remote machine.
### Patch a file
```json
{
"tool_name": "text_editor_remote",
"tool_args": {
"op": "patch",
"path": "/path/on/remote/machine/file.py",
"edits": [
{"from": 5, "to": 5, "content": " if x == 2:\n"}
]
}
}
```
Applies line-range patches to the file. Use the same format as the standard `text_editor:patch` tool.
- `read`: optional `line_from`, `line_to`
- `write`: requires `content`
- `patch`: requires `edits`
## Notes
- Always read the file first before patching to get current line numbers.
- Paths are evaluated on the **remote machine's filesystem**, not the Agent Zero server.
- If no CLI is connected, the tool will return an error message.
- The transport uses `connector_file_op` and `connector_file_op_result` with a shared `op_id`.

View file

@ -11,8 +11,9 @@ from helpers.ws_manager import ConnectionNotFoundError, get_shared_ws_manager
from plugins._a0_connector.helpers.ws_runtime import (
clear_pending_exec_op,
select_target_sid,
select_remote_exec_target_sid,
store_pending_exec_op,
subscribed_sids_for_context,
)
@ -60,11 +61,15 @@ class CodeExecutionRemote(Tool):
)
context_id = self.agent.context.id
sid = select_target_sid(context_id)
subscribers = subscribed_sids_for_context(context_id)
sid = select_remote_exec_target_sid(context_id)
if not sid:
return Response(
message=(
"code_execution_remote: no CLI client connected to this context. "
"code_execution_remote: no subscribed CLI in this context currently has "
"remote execution enabled. Connect the CLI and press F4 to switch exec on."
if subscribers
else "code_execution_remote: no CLI client connected to this context. "
"Make sure the CLI is connected and subscribed."
),
break_loop=False,

View file

@ -18,8 +18,9 @@ from plugins._a0_connector.helpers.text_editor_freshness import (
)
from plugins._a0_connector.helpers.ws_runtime import (
clear_pending_file_op,
select_target_sid,
select_remote_file_target_sid,
store_pending_file_op,
subscribed_sids_for_context,
)
@ -125,14 +126,28 @@ class TextEditorRemote(Tool):
**payload_extra: Any,
) -> dict[str, Any]:
context_id = self.agent.context.id
sid = select_target_sid(context_id)
require_writes = op in {"write", "patch"}
subscribers = subscribed_sids_for_context(context_id)
sid = select_remote_file_target_sid(context_id, require_writes=require_writes)
if not sid:
return {
"ok": False,
"error": (
if not subscribers:
error = (
"text_editor_remote: no CLI client connected to this context. "
"Make sure the CLI is connected and subscribed."
),
)
elif require_writes:
error = (
"text_editor_remote: no subscribed CLI in this context currently allows "
"remote file writes. Press F3 to switch the CLI to Read&Write."
)
else:
error = (
"text_editor_remote: no subscribed CLI in this context currently advertises "
"remote file access."
)
return {
"ok": False,
"error": error,
}
op_id = str(uuid.uuid4())