From daf95ec3abe6a08d4bf2a927bd54e45f58091ed0 Mon Sep 17 00:00:00 2001 From: Alessandro <155005371+3clyp50@users.noreply.github.com> Date: Sat, 9 May 2026 21:54:43 +0200 Subject: [PATCH] Normalize tool contracts and slim prompt surface Standardize multi-action tools around tool_args.action while keeping parser compatibility for older tool/args, tool_name:action, and method-shaped requests. This keeps new prompts clean without breaking agents that learned the previous dialect. Move A0 connector remote execution/file tools into stable standard prompts, make remote targeting independent of the active chat context, and skill-gate beta computer-use remote so it no longer weighs down the always-on tool list. Align text editor, scheduler, skills, office artifact, memory, notify, and browser prompts/tools around the canonical action contract. Add scheduler update/timezone handling, skills_tool read_file, text editor patch coverage, and fixes for memory_forget, behaviour_adjustment, and code execution progress warnings. Reduce default prompt pressure by compacting browser and scheduler prompts into skill-backed manifests, shortening skill catalog descriptions, and pruning noisy framework knowledge. Remove obsolete connector prompt stubs and root tool-call knowledge examples. Tests: conda run -n a0 pytest tests/test_a0_connector_prompt_gating.py tests/test_tool_action_contracts.py tests/test_task_scheduler_timezone.py tests/test_text_editor_context_patch.py tests/test_tool_request_normalization.py tests/test_office_document_store.py::test_odf_is_advertised_and_docx_remains_explicit_compatibility tests/test_office_document_store.py::test_document_artifact_accepts_method_alias_for_ods_create tests/test_skills_runtime.py tests/test_default_prompt_budget.py::test_a0_small_profile_removed_and_prompt_text_generic -q --- agent.py | 4 - .../python/system_prompt/_13_skills_prompt.py | 6 +- helpers/extract_tools.py | 9 + helpers/skills.py | 2 +- helpers/task_scheduler.py | 33 +- knowledge/main/about/architecture.md | 75 +-- knowledge/main/about/capabilities.md | 95 +--- knowledge/main/about/configuration.md | 113 +---- knowledge/main/about/identity.md | 38 +- knowledge/main/about/setup-and-deployment.md | 104 +--- .../main/tool_call_reference_examples.md | 79 --- .../end/_70_include_remote_tool_stubs.py | 166 +----- plugins/_a0_connector/helpers/ws_runtime.py | 88 ++-- ...nt.connector_tool.code_execution_remote.md | 27 - ...gent.connector_tool.computer_use_remote.md | 27 - ...agent.connector_tool.text_editor_remote.md | 21 - ...agent.system.tool.code_execution_remote.md | 46 ++ .../agent.system.tool.text_editor_remote.md | 41 ++ .../skills/code-execution-remote/SKILL.md | 11 +- .../skills/text-editor-remote/SKILL.md | 7 +- .../tools/code_execution_remote.py | 29 +- .../tools/computer_use_remote.py | 4 +- .../_a0_connector/tools/text_editor_remote.py | 40 +- .../prompts/agent.system.tool.browser.md | 187 +------ plugins/_browser/skills/browser-tool/SKILL.md | 76 +++ .../tools/code_execution_tool.py | 4 +- plugins/_memory/tools/behaviour_adjustment.py | 50 +- plugins/_memory/tools/memory_forget.py | 2 +- plugins/_office/helpers/canvas_context.py | 4 +- .../agent.system.tool.document_artifact.md | 5 +- .../_office/skills/calc-spreadsheets/SKILL.md | 6 +- .../skills/document-artifacts/SKILL.md | 23 +- .../skills/impress-presentations/SKILL.md | 6 +- .../skills/markdown-documents/SKILL.md | 7 +- .../_office/skills/writer-documents/SKILL.md | 7 +- .../prompts/agent.system.tool.text_editor.md | 28 +- plugins/_text_editor/tools/text_editor.py | 25 +- prompts/agent.system.main.communication.md | 2 + prompts/agent.system.skills.md | 10 +- prompts/agent.system.skills.relevant.md | 3 +- prompts/agent.system.tool.notify_user.md | 1 + prompts/agent.system.tool.scheduler.md | 38 +- prompts/agent.system.tool.skills.md | 12 +- prompts/agent.system.tools.md | 1 + skills/computer-use-remote/SKILL.md | 29 +- skills/scheduler-tasks/SKILL.md | 53 ++ tests/test_a0_connector_prompt_gating.py | 222 +++++--- tests/test_default_prompt_budget.py | 12 +- tests/test_office_document_store.py | 2 +- tests/test_task_scheduler_timezone.py | 65 +++ tests/test_text_editor_context_patch.py | 30 ++ tests/test_tool_action_contracts.py | 472 ++++++++++++++++++ tests/test_tool_request_normalization.py | 31 ++ tools/scheduler.py | 227 +++++++-- tools/skills_tool.py | 105 +++- 55 files changed, 1656 insertions(+), 1154 deletions(-) delete mode 100644 knowledge/main/tool_call_reference_examples.md delete mode 100644 plugins/_a0_connector/prompts/agent.connector_tool.code_execution_remote.md delete mode 100644 plugins/_a0_connector/prompts/agent.connector_tool.computer_use_remote.md delete mode 100644 plugins/_a0_connector/prompts/agent.connector_tool.text_editor_remote.md create mode 100644 plugins/_a0_connector/prompts/agent.system.tool.code_execution_remote.md create mode 100644 plugins/_a0_connector/prompts/agent.system.tool.text_editor_remote.md create mode 100644 plugins/_browser/skills/browser-tool/SKILL.md create mode 100644 skills/scheduler-tasks/SKILL.md create mode 100644 tests/test_task_scheduler_timezone.py create mode 100644 tests/test_tool_action_contracts.py diff --git a/agent.py b/agent.py index 987829bd9..267a90807 100644 --- a/agent.py +++ b/agent.py @@ -885,10 +885,6 @@ class Agent: tool_name = raw_tool_name # Initialize tool_name with raw_tool_name tool_method = None # Initialize tool_method - # Split raw_tool_name into tool_name and tool_method if applicable - if ":" in raw_tool_name: - tool_name, tool_method = raw_tool_name.split(":", 1) - tool = None # Initialize tool to None # Try getting tool from MCP first diff --git a/extensions/python/system_prompt/_13_skills_prompt.py b/extensions/python/system_prompt/_13_skills_prompt.py index 6e7cd43b5..f52b08dec 100644 --- a/extensions/python/system_prompt/_13_skills_prompt.py +++ b/extensions/python/system_prompt/_13_skills_prompt.py @@ -26,8 +26,10 @@ async def build_prompt(agent: Agent) -> str: result: list[str] = [] for skill in available: name = skill.name.strip().replace("\n", " ")[:100] - descr = skill.description.replace("\n", " ")[:500] - result.append(f"**{name}** {descr}") + descr = skill.description.replace("\n", " ").strip() + if len(descr) > 100: + descr = descr[:100].rstrip() + "..." + result.append(f"- {name}: {descr}" if descr else f"- {name}") if not result: return "" diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py index 4f5a35efe..39e837855 100644 --- a/helpers/extract_tools.py +++ b/helpers/extract_tools.py @@ -33,6 +33,15 @@ def normalize_tool_request(tool_request: Any) -> tuple[str, dict]: tool_args = tool_request.get("args") if not isinstance(tool_args, dict): raise ValueError("Tool request must have a tool_args (type dictionary) field") + tool_args = dict(tool_args) + if ":" in tool_name: + tool_name, action = tool_name.split(":", 1) + if not tool_name or not action: + raise ValueError("tool_name method suffix must include tool and action") + tool_args.setdefault("action", action) + method = tool_args.get("method") + if "action" not in tool_args and isinstance(method, str) and method: + tool_args["action"] = method return tool_name, tool_args diff --git a/helpers/skills.py b/helpers/skills.py index 39b5ff7bd..1112d2973 100644 --- a/helpers/skills.py +++ b/helpers/skills.py @@ -434,7 +434,7 @@ def load_skill_for_agent( files_tree = _get_skill_files(skill.path) lines.append("") if files_tree: - lines.append("Files (use skills_tool method=read_file to open):") + lines.append("Files (use skills_tool action=read_file to open):") lines.append(files_tree) else: lines.append("No additional files found.") diff --git a/helpers/task_scheduler.py b/helpers/task_scheduler.py index 71f55e1c6..f77b457d1 100644 --- a/helpers/task_scheduler.py +++ b/helpers/task_scheduler.py @@ -27,6 +27,19 @@ import pytz from typing import Annotated SCHEDULER_FOLDER = "usr/scheduler" +LOCAL_TIMEZONE_ALIASES = {"local", "user", "default", "current", "current_timezone"} + + +def normalize_schedule_timezone(timezone_name: str | None) -> str: + name = str(timezone_name or "").strip() + if not name or name.lower() in LOCAL_TIMEZONE_ALIASES: + return Localization.get().get_timezone() + try: + pytz.timezone(name) + except pytz.exceptions.UnknownTimeZoneError: + PrintStyle.error(f"Unknown task schedule timezone: {name}, using current user timezone") + return Localization.get().get_timezone() + return name # ---------------------- # Task Models @@ -304,9 +317,9 @@ class ScheduledTask(BaseTask): ): # Set timezone in schedule if provided if timezone is not None: - schedule.timezone = timezone + schedule.timezone = normalize_schedule_timezone(timezone) else: - schedule.timezone = Localization.get().get_timezone() + schedule.timezone = normalize_schedule_timezone(schedule.timezone) return cls(name=name, system_prompt=system_prompt, @@ -344,7 +357,8 @@ class ScheduledTask(BaseTask): crontab = CronTab(crontab=self.schedule.to_crontab()) # type: ignore # Get the timezone from the schedule or use UTC as fallback - task_timezone = pytz.timezone(self.schedule.timezone or Localization.get().get_timezone()) + self.schedule.timezone = normalize_schedule_timezone(self.schedule.timezone) + task_timezone = pytz.timezone(self.schedule.timezone) # Get reference time in task's timezone (by default now - frequency_seconds) reference_time = datetime.now(timezone.utc) - timedelta(seconds=frequency_seconds) @@ -364,7 +378,15 @@ class ScheduledTask(BaseTask): def get_next_run(self) -> datetime | None: with self._lock: crontab = CronTab(crontab=self.schedule.to_crontab()) # type: ignore - return crontab.next(now=datetime.now(timezone.utc), return_datetime=True) # type: ignore + self.schedule.timezone = normalize_schedule_timezone(self.schedule.timezone) + task_timezone = pytz.timezone(self.schedule.timezone) + now_in_task_timezone = datetime.now(timezone.utc).astimezone(task_timezone) + next_run = crontab.next(now=now_in_task_timezone, return_datetime=True) # type: ignore + if next_run is None: + return None + if next_run.tzinfo is None: + next_run = task_timezone.localize(next_run) + return next_run.astimezone(timezone.utc) class PlannedTask(BaseTask): @@ -1021,6 +1043,7 @@ def parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: def serialize_task_schedule(schedule: TaskSchedule) -> Dict[str, str]: """Convert TaskSchedule to a standardized dictionary format.""" + schedule.timezone = normalize_schedule_timezone(schedule.timezone) return { 'minute': schedule.minute, 'hour': schedule.hour, @@ -1040,7 +1063,7 @@ def parse_task_schedule(schedule_data: Dict[str, str]) -> TaskSchedule: day=schedule_data.get('day', '*'), month=schedule_data.get('month', '*'), weekday=schedule_data.get('weekday', '*'), - timezone=schedule_data.get('timezone', Localization.get().get_timezone()) + timezone=normalize_schedule_timezone(schedule_data.get('timezone')) ) except Exception as e: raise ValueError(f"Invalid schedule format: {e}") from e diff --git a/knowledge/main/about/architecture.md b/knowledge/main/about/architecture.md index f763fc6af..2428f2281 100644 --- a/knowledge/main/about/architecture.md +++ b/knowledge/main/about/architecture.md @@ -1,67 +1,18 @@ -# Agent Zero - Internal Architecture +# Agent Zero Architecture -## The Agent Loop (Monologue Cycle) +The agent loop builds a system prompt, appends conversation history, asks the model for one JSON tool request, executes that tool, records the result, and repeats until `response` ends the task. -Each agent runs a continuous monologue loop. On each cycle the agent receives its current context (system prompt + message history), produces a JSON response (thoughts, headline, tool name, tool args), and the framework executes the named tool. The tool result is appended to history and the loop continues until the agent calls `response` to deliver a final answer to its superior. +Key runtime files: +- `agent.py`: `Agent`, `AgentContext`, loop state, tool dispatch +- `initialize.py`: framework initialization +- `run_ui.py`: Web UI entry point +- `helpers/`: shared framework helpers +- `tools/`: core tools +- `plugins/`: framework plugins +- `usr/`: user data, custom plugins, settings, workdir -The loop handles: message history management, context window limits (via summarization), memory recall injection, intervention from superiors, and error recovery (misformat retries, tool-not-found handling). +Prompt assembly is file-based. Main prompts come from `prompts/`, profile overrides from `agents//prompts/`, and plugin prompts from `plugins//prompts/`. -## Context and State +Plugins can add tools, prompts, API handlers, Web UI components, extensions, and hooks. User plugins live in `usr/plugins/` and should survive updates. -`AgentContext` (defined in `agent.py`) is the central state container for a conversation. It holds: -- Agent number and identifier -- Message history -- The active agent profile and prompt configuration -- Reference to memory, knowledge, and tool systems -- Project context if a project is active -- `extras` dict - additional content injected into the system prompt each turn (memories, solutions, agent info, workdir structure) - -Each WebSocket session connects to one `AgentContext`. Multiple concurrent chats run in separate contexts. The framework is initialized in `initialize.py` and the server entry point is `run_ui.py`. - -## Prompt Assembly - -System prompts are assembled from fragment files on each loop iteration. The main system prompt is `prompts/agent.system.main.md`, which includes sub-prompts via `{{ include "filename.md" }}` directives. Agent profiles (in `agents//prompts/`) can override individual fragments. This means a subordinate with the `developer` profile gets a different role and communication section while sharing the same tool list and solving workflow as the base agent. - -Prompt fragments are in `prompts/`. Plugin system prompts are in `plugins//prompts/`. The assembled system prompt is dynamic - it changes based on profile, active project, loaded tools, recalled memories, and injected extras. - -## Multi-Agent Hierarchy - -The hierarchy is a tree with the human user at the root. Each node is an agent instance running in its own context. A superior calls `call_subordinate` with a message and optional profile name; this creates a new `AgentContext` and runs the subordinate agent's loop until it returns a response. - -Agent 0 is always the top-level agent whose superior is the user. When Agent 0 delegates a task to a subordinate, that subordinate can itself delegate further. There is no enforced depth limit. Agents share the same tool system but each has its own isolated context and history. - -Subordinates can be given specific prompt profiles (`developer`, `researcher`, or any custom profile in `agents/`). Profiles change the role, communication style, and available instructions without changing the underlying framework. - -## Memory and Knowledge Pipeline - -### Knowledge (vector DB, read-only) -Knowledge files (in `knowledge/` and `usr/knowledge/`) are loaded when a memory DB is initialized (normally at the start of the first monologue in a chat), embedded, and stored in a FAISS vector index per memory subdir. Files are tracked by checksum; only changed files are re-indexed. Supported formats: `.md`, `.txt`, `.pdf`, `.csv`, `.html`, `.json`. - -The memory areas are: -- `main` - general knowledge and facts (files in knowledge root or `main/` subdir) -- `fragments` - partial or supplementary knowledge -- `solutions` - known solutions to problems - -### Recall (automatic, per conversation turn) -The `RecallMemories` extension runs every N loop iterations (configurable). It queries the vector store using either the raw conversation or a utility-LLM-generated search query. Results from `main` and `fragments` areas plus `solutions` are injected into `loop_data.extras_persistent`, which gets rendered into the system prompt via `agent.context.extras.md` template. - -The agent sees recalled memories as a section in its system prompt labeled "Memories on the topic". The agent is instructed not to over-rely on them. - -### Agent memory (read-write, via memorize tool) -The agent can explicitly save facts, solutions, and code snippets using the `memorize` tool. These are stored in the same FAISS index under the `main` or `solutions` area and recalled in future conversations. Memory can also be consolidated (summarized) and managed through the Memory Dashboard in the web UI. - -## Tool System - -Tools are Python classes in `python/tools/` that inherit from `Tool`. Each tool implements an `execute()` async method. Tools are discovered at startup and registered in the agent's tool list (rendered into the system prompt as `{{tools}}`). The agent names a tool in its JSON response; the framework finds and calls it. - -Plugin tools can be added in `plugins//tools/` or `usr/plugins//tools/` without modifying core files. - -## Extension and Plugin System - -The plugin system (`python/helpers/plugins.py`) discovers plugins from `plugins/` and `usr/plugins/`. Each plugin has a `plugin.yaml` manifest declaring name, version, and settings. Plugins can contribute: API handlers, tools, Web UI components, extensions, and hooks. User plugins in `usr/plugins/` are never overwritten by framework updates. The agent has skills to create, manage, debug, review and contribute plugins to the Plugin Index repository (https://github.com/agent0ai/a0-plugins) - -## Frontend Architecture - -The web UI is built with Alpine.js and ES module components. The main shell is `webui/index.html`. Components are in `webui/components/`. Frontend state is managed via Alpine stores defined with `createStore` from `/js/AlpineStore.js`. - -Real-time communication uses Socket.io WebSockets via a unified `/ws` namespace. WebSocket handlers (WsHandler subclasses) are in `api/ws_*.py`. The connection manager is in `helpers/ws_manager.py`. API handlers are in `api/`, each deriving from `ApiHandler` in `helpers/api.py`. +Memory and knowledge use the memory plugin and vector search. Knowledge files are indexed for recall; they should be concise because irrelevant recall can steer behavior badly. diff --git a/knowledge/main/about/capabilities.md b/knowledge/main/about/capabilities.md index 0396eb8d1..cd3067981 100644 --- a/knowledge/main/about/capabilities.md +++ b/knowledge/main/about/capabilities.md @@ -1,82 +1,19 @@ -# Agent Zero - Capabilities Reference +# Agent Zero Capabilities -## Code Execution +Agent Zero can: +- run terminal and code execution tools inside the Docker/server runtime +- use A0 CLI connector tools for host/local machine execution when connected and enabled +- read, write, and patch files with text editor tools +- browse the web with the browser/search tools +- create and query document artifacts +- save, load, and forget memories +- schedule tasks +- call subordinate agents +- use MCP and A2A integrations when configured -The agent can write and execute code in any language available in the Docker container. The execution environment is a Kali Linux container with two Python runtimes: -- `/opt/venv-a0` (Python 3.12) - the Agent Zero framework runtime -- `/opt/venv` (Python 3.13) - the agent's execution runtime (default for agent-run code) +Important boundary: +- Docker/server tools operate inside the Agent Zero container, usually `/a0/usr/workdir`. +- A0 CLI remote tools operate on the connected host machine, usually the CLI working directory. +- Do not confuse host-local paths with container paths. -The agent installs packages into the execution runtime (`/opt/venv`) via `pip install`. Packages needed by the framework itself must target `/opt/venv-a0`. - -Supported runtimes for code execution: Python, Node.js, Bash/shell. Other languages (Go, Rust, PHP, etc.) can be used if the compiler/runtime is installed in the container. - -Code runs in the terminal with real-time output streaming. Long-running processes, background jobs, and interactive sessions are supported. The agent can pause and resume code execution and interact with running processes. - -## Terminal and System Operations - -The agent has full root access to the Kali Linux Docker container. It can: -- Install packages via `apt`, `pip`, `npm`, and other package managers -- Create, read, write, move, and delete files anywhere in the container -- Run any system command, manage processes, set up services -- Access the network (HTTP requests, SSH, port scanning, etc.) -- Use Kali Linux security tools pre-installed in the container - -## Skills (SKILL.md Standard) - -Skills are structured markdown files that provide contextual expertise for specific tasks. When a skill is relevant to the current task, it is loaded into the agent's context and followed as a set of instructions. Skills are discovered from: -- `usr/skills/` (user-added skills) -- Project-scoped skills in `.a0proj/skills/` -- Skills imported via the web UI - -Skills follow the open SKILL.md standard, making them portable across tools that support it. The agent executes skill instructions using `code_execution_tool` or `skills_tool`. - -## Projects - -Projects provide isolated workspaces with their own: -- Working directory (`usr/projects//`) -- Memory and knowledge scope -- Custom agent instructions (`.a0proj/agent.instructions.md`) -- Secrets and credentials (stored encrypted, not visible in agent context) -- MCP server configurations -- Git repository (can be cloned directly with authentication) - -When a project is active, the agent's file operations, memory, and knowledge are scoped to that project. Projects prevent context bleed between separate work streams. - -## Knowledge Base Access - -The agent has automatic access to its knowledge base via similarity search. Knowledge is indexed from `knowledge/` (framework-level) and `usr/knowledge//` (user-level). The agent does not need to explicitly query knowledge - relevant content is surfaced automatically with memory recall. The `knowledge_tool` can also be called explicitly for targeted lookups. - -## Multi-Agent Delegation - -The agent can spawn subordinate agents with the `call_subordinate` tool. Subordinates can be given: -- Specific prompt profiles (`developer`, `researcher`, custom profiles) -- A defined role and task scope -- Access to the same tool set - -Delegation is used to: parallelize work, maintain clean context per task, apply specialized profiles, and isolate long subtasks from the main context. - -## Document Query - -The `document_query_tool` can load and query arbitrary documents (local files or URLs) using a separate RAG pipeline. Unlike the knowledge base (which is pre-indexed), this tool indexes documents on demand with a configurable chunk size. Useful for analyzing large documents, codebases, or external content without polluting the persistent knowledge store. - -## Scheduler - -The agent can schedule tasks to run at specified times or intervals using the scheduler tool. Scheduled tasks run in the background with their own agent instances. Tasks are managed via the Scheduler UI in the web interface. - -## External API and MCP - -Agent Zero can act as both an MCP server and an MCP client: -- As an **MCP server**: exposes agent capabilities to other MCP-compatible clients -- As an **MCP client**: uses tools from external MCP servers (configured per project or globally) - -An external REST API is available for programmatic task submission. Agent-to-Agent (A2A) protocol is supported for inter-system agent communication. - -## Limitations - -- **No persistent state between chats** unless explicitly memorized or saved to files. -- **Context window**: long conversations are summarized automatically, which can lose detail. -- **Memory recall is approximate**: similarity search may miss relevant memories or surface irrelevant ones. -- **No GUI interaction** outside built-in Browser tooling, A0 CLI host-browser mode, or configured computer-use integrations. -- **Container boundary**: the agent cannot affect systems outside the Docker container unless one of these bridges is available: network access, volume mounts, A0 CLI access, or A0 CLI host-browser mode. -- **Model capability ceiling**: tool usage quality and reasoning depth are bounded by the underlying LLM. Small models may struggle with complex multi-step tool use. -- **No real-time data** beyond web search. The agent's own knowledge cutoff is the underlying model's training cutoff. +Capabilities depend on enabled plugins, settings, model quality, permissions, and active project context. diff --git a/knowledge/main/about/configuration.md b/knowledge/main/about/configuration.md index 006d5be06..2f56bc5c5 100644 --- a/knowledge/main/about/configuration.md +++ b/knowledge/main/about/configuration.md @@ -1,109 +1,16 @@ -# Agent Zero - Configuration Reference +# Agent Zero Configuration -## LLM Roles +Main configuration lives in `usr/settings.json` and the Settings Web UI. -Agent Zero uses three configurable LLM roles: +LLM roles: +- `chat_llm`: primary reasoning and tool use +- `utility_llm`: summaries, memory queries, compression, filtering +- `embedding_llm`: vector embeddings for memory and knowledge -| Role | Purpose | -|------|---------| -| `chat_llm` | Primary model for all agent reasoning and tool use | -| `utility_llm` | Secondary model for internal framework tasks: memory summarization, query generation, history compression, memory recall filtering | -| `embedding_llm` | Produces vector embeddings for memory and knowledge indexing | +Profiles live in `agents//`; user profiles live in `usr/agents//`. Profiles override prompt fragments without changing the framework. -The utility model handles high-volume, lower-stakes operations and can be cheaper or faster than the chat model. +Plugins live in `plugins/` and `usr/plugins/`. Each plugin has a `plugin.yaml`; activation can be global or scoped to projects/profiles. -Browser automation is exposed through the direct `browser` tool. The main agent decides when to call it. +Projects isolate workdir, memory/knowledge scope, custom instructions, secrets, MCP config, and repositories. -The Browser defaults to Docker Playwright Chromium. It can optionally use A0 CLI Bring Your Own Browser mode for a host Chrome-family browser. - -Browser-owned helper operations can use a Browser LLM preset when configured. - -Changing the embedding model invalidates the existing vector index. The knowledge base is re-indexed automatically. - -## Model Providers - -Providers are defined in `conf/model_providers.yaml`. All chat and embedding providers go through LiteLLM, which normalizes the API interface. Supported chat providers (as of v0.9.8): - -- Agent Zero API (a0_venice) - hosted service with no API key required for basic use -- Anthropic, OpenAI, OpenRouter, Google (Gemini), Groq, Mistral AI -- DeepSeek, xAI, Moonshot AI, Sambanova, CometAPI, Z.AI, Inception AI -- Venice.ai, AWS Bedrock, Azure OpenAI -- GitHub Copilot, HuggingFace -- Ollama, LM Studio (local models) -- Other OpenAI-compatible endpoints (custom `api_base`) - -Embedding providers: OpenAI, Azure, Ollama, LM Studio, HuggingFace, Google, Mistral, OpenRouter (via OpenAI-compat), AWS Bedrock. - -### Model Naming Convention - -| Provider | Format | -|----------|--------| -| OpenAI | model name only (`gpt-4.1`, `o4-mini`) | -| Anthropic | model name only (`claude-sonnet-4-5`) | -| OpenRouter | `provider/model` (`anthropic/claude-sonnet-4-5`) | -| Ollama | model name only (`llama3.2`, `qwen2.5`) | -| Google | model name only (`gemini-2.0-flash`) | - -## Agent Profiles - -Profiles are in `agents//`. Each profile can override any prompt fragment from the base `prompts/` directory. Built-in profiles: - -| Profile | Description | -|---------|-------------| -| `default` | Base template for creating new profiles | -| `agent0` | Top-level general assistant; human as superior; delegates to specialized subordinates | -| `developer` | "Master Developer" - software architecture and full-stack implementation focus | -| `researcher` | "Deep Research" - research, analysis, and synthesis across academic and corporate domains | -| `hacker` | Red/blue team; penetration testing; Kali tools focus | -| `_example` | Minimal example for building custom profiles | - -Custom profiles go in `usr/agents//` to survive framework updates. - -## Plugin System - -Plugins are discovered from `plugins/` (framework plugins) and `usr/plugins/` (user plugins). Each plugin requires a `plugin.yaml` with at minimum: `name`, `description`, `version`. - -### Activation - -- **Global activation**: enabled/disabled for all contexts via the Plugins settings panel -- **Scoped activation**: enabled/disabled per project or per agent profile via the plugin Switch modal -- Activation state stored as `.toggle-1` (ON) and `.toggle-0` (OFF) files in the plugin's config dir - -### Built-in Framework Plugins - -| Plugin | Purpose | -|--------|---------| -| `_memory` | Memory and knowledge pipeline, recall, consolidation | -| `_code_execution` | Terminal and code execution tool | -| `_text_editor` | Structured file read/write/patch tool | - -## Environment Variable Configuration - -Any setting can be set via environment variable using the `A0_SET_` prefix. This is the primary mechanism for automated deployment and container configuration. - -Format: `A0_SET_=` - -Examples: -``` -A0_SET_chat_model_provider=anthropic -A0_SET_chat_model_name=claude-sonnet-4-5 -A0_SET_utility_model_provider=openai -A0_SET_utility_model_name=gpt-4o-mini -A0_SET_embedding_model_provider=openai -A0_SET_embedding_model_name=text-embedding-3-small -``` - -These can be set in the `.env` file at the project root or passed as Docker `-e` flags during container creation. - -## Key Behavioral Settings - -| Setting | Effect | -|---------|--------| -| `agent_knowledge_subdir` | Which knowledge subdir to load (default: `custom`, resolved to `usr/knowledge/`) | -| `memory_recall_interval` | How many loop iterations between automatic memory recalls | -| `memory_results` | Number of memory chunks returned per recall query | -| `memory_threshold` | Similarity threshold for memory recall (0-1); lower = more results, potentially less relevant | -| `auth_login` / `auth_password` | Web UI authentication credentials | -| `agent_temperature` | LLM temperature for the chat model | - -Settings are stored in `usr/settings.json` and managed through the Settings page in the web UI. The settings page also provides: API key management (multiple keys per provider with round-robin), backup/restore, external services (tunnels, MCP, A2A), and memory management. +Environment settings can use `A0_SET_=`. diff --git a/knowledge/main/about/identity.md b/knowledge/main/about/identity.md index 4f925c392..6aace1d42 100644 --- a/knowledge/main/about/identity.md +++ b/knowledge/main/about/identity.md @@ -1,34 +1,10 @@ -# Agent Zero - Identity and Design Philosophy +# Agent Zero Identity -## What Agent Zero Is +Agent Zero is an open-source, general-purpose agentic framework by Jan Tomasek and the Agent Zero community. It runs locally or on user-controlled infrastructure and uses tools, memory, plugins, projects, and subordinate agents to solve tasks. -Agent Zero is an open-source, general-purpose agentic framework. It is not pre-programmed for specific tasks and has no fixed capability set beyond the basics. Its defining characteristic is that it grows and adapts as it is used - accumulating knowledge, solutions, and behaviors through persistent memory and user customization. +Core idea: prompts and plugins define behavior; tools do the work; memory and knowledge provide recall when relevant. Keep user intent above framework lore. -The framework has been created by Jan Tomášek and is maintained by the Agent Zero dev team and the community. Source code lives at github.com/agent0ai/agent-zero. - -## Core Design Principles - -**No hard-coding.** Almost nothing in the framework is fixed in source code. Agent behavior, tool definitions, message templates, and response patterns are all controlled by files in the `prompts/` directory. Changing the prompts changes the agent - fundamentally if needed. - -**Transparency.** Every prompt, every message template, every tool implementation is readable and editable. There are no hidden instructions or black-box behaviors. The agent can be fully audited. - -**Computer as a tool.** Agent Zero does not have a library of pre-built skill functions. Instead, it uses the operating system directly - writing code, running terminal commands, and creating tools on demand. The terminal is the primary interface to everything. - -**Organic growth.** The agent accumulates knowledge through experience. Facts, solutions, discovered patterns, and useful code are stored in memory and recalled in future conversations. The agent becomes more effective at tasks it has done before. - -**Prompt-driven behavior.** The `prompts/` directory is the control plane. System prompts, tool instructions, framework messages, and utility AI prompts are all there. The agent's behavior is as good as its prompts. - -## Project Context - -- **Repository**: github.com/agent0ai/agent-zero -- **License**: Open source -- **Primary author**: Jan Tomášek -- **Community**: Discord (discord.gg/B8KZKNsPpj), Skool community, YouTube channel -- **Documentation**: docs/ folder in the repository; deepwiki.com/agent0ai/agent-zero for AI-generated docs -- **Current version**: v0.9.8 - -## Relationship With the User - -Agent Zero treats the human user as its top-level superior in the agent hierarchy. The user is functionally indistinguishable from a superior agent - they give tasks, receive reports, and can intervene at any time. The agent is not a chatbot that answers questions; it is an executor that solves tasks using whatever means are available to it. - -The framework is a personal tool, not a service. It runs locally (or on user-controlled infrastructure) and has access to the user's files, credentials, and systems as configured. This makes it powerful and requires the user to understand what they are delegating. +Project reference: +- Repository: `github.com/agent0ai/agent-zero` +- Documentation: `docs/` in the repo and DeepWiki for broad architecture lookup +- User data lives under `usr/` diff --git a/knowledge/main/about/setup-and-deployment.md b/knowledge/main/about/setup-and-deployment.md index c00f62021..90fd33f86 100644 --- a/knowledge/main/about/setup-and-deployment.md +++ b/knowledge/main/about/setup-and-deployment.md @@ -1,111 +1,23 @@ -# Agent Zero - Setup and Deployment +# Agent Zero Setup And Deployment -## Docker Deployment (Standard) - -Agent Zero is distributed as a Docker image: `agent0ai/agent-zero`. +Docker image: ```bash docker pull agent0ai/agent-zero docker run -p 50001:80 agent0ai/agent-zero ``` -The web UI is then accessible at `http://localhost:50001`. The container exposes port 80 internally; map any host port to it. +Persist user data by mounting `/a0/usr`: -## Persistence - -All user data lives in `/a0/usr/` inside the container. Without a volume mount, data is lost when the container is removed. - -Map `/a0/usr` to a host directory for persistence: ```bash docker run -p 50001:80 -v /path/on/host:/a0/usr agent0ai/agent-zero ``` -Contents of `/a0/usr/`: -- `settings.json` - all configuration including API keys and model settings -- `memory/` - FAISS vector indexes and knowledge import state -- `knowledge/` - user-added knowledge files -- `agents/` - custom agent profiles -- `plugins/` - user plugins -- `projects/` - project workspaces -- `work/` - default working directory for agent file output +After first start, configure API keys, chat model, utility model, and embedding model in Settings. Embeddings are required for memory and knowledge recall. -## Configuration After Start - -On first run, open Settings (gear icon) and configure: -1. **API Keys** - add at least one provider API key under the relevant provider section -2. **Chat Model** - select provider and model name for the primary LLM -3. **Utility Model** - select a cheaper/faster model for internal tasks -4. **Embedding Model** - select embedding provider and model (required for memory and knowledge) - -Settings are saved to `usr/settings.json` immediately on change. - -## Updating Agent Zero - -The recommended update process is to use Self Update: -1. Open **Settings UI → Update** tab -2. Open **Self Update** -3. Wait for the update checker to see if you have the latest version or if there's an available update - -You'll also be prompted through the UI when a new A0 version is released. Note that backups are automatically managed internally during the self-update process. - -### Updating from Pre-v0.9.8 - -If upgrading from v0.9.8 or earlier, the architecture has significantly changed. You must use the new install scripts and manually migrate your data: -1. Backup your existing `usr/` directory. -2. Run the Quick Install script (`curl -fsSL https://bash.agent-zero.ai | bash` for macOS/Linux or `irm https://ps.agent-zero.ai | iex` for Windows). -3. Copy your backed-up `usr/` contents into the new installation's `a0/usr/` directory to preserve your settings, memory, and plugins. - -## Remote Access - -### Flare Tunnel (recommended for external access) -Settings → External Services → Flare Tunnel → Create Tunnel - -This generates a public HTTPS URL without requiring firewall changes or a static IP. Set a username and password before creating the tunnel to enable authentication. - -### Local Network -Access from other devices on the same network using the host machine's IP: -`http://:` - -### Microsoft Dev Tunnels -Supported as an alternative to Flare for users in Microsoft environments. Configure under External Services in Settings. - -## Mobile Access - -Agent Zero is a Progressive Web App (PWA). On mobile, open the web UI URL in a browser, then add to home screen for an app-like experience. Works with both local network and tunnel URLs. - -## Common Troubleshooting - -**Agent responds but no memory/knowledge recall:** -- Check that an embedding model is configured (provider + model name) -- Verify the embedding provider API key is set -- Embedding model changes require re-indexing; this happens automatically but takes time on first run - -**"Model not found" or API errors:** -- Verify the model name matches the provider's naming convention exactly -- Check that the API key has access to the requested model -- For OpenRouter, model names must include the provider prefix (`anthropic/claude-sonnet-4-5`) - -**Container starts but web UI unreachable:** -- Confirm the host port mapping in `docker ps` -- Check that no firewall rule blocks the mapped port -- The container needs a few seconds to initialize on first start - -**Knowledge files not being recalled:** -- Supported formats: `.md`, `.txt`, `.pdf`, `.csv`, `.html`, `.json` -- Files must be in `knowledge/` (framework level) or `usr/knowledge//` -- The configured `agent_knowledge_subdir` must match the subdir where files are placed -- Re-indexing is triggered automatically when file checksums change - -**Ollama / local model setup:** -- Ollama must be running and accessible from inside the Docker container -- Use `http://host.docker.internal:` as the API URL for Ollama (not `localhost`) -- Pull the model first: `ollama pull ` - -## Development Setup (non-Docker) +For local development: ```bash -git clone https://github.com/agent0ai/agent-zero -cd agent-zero python -m venv .venv source .venv/bin/activate pip install -r requirements.txt @@ -113,4 +25,8 @@ pip install -r requirements2.txt python run_ui.py ``` -The dev server runs on `http://localhost:5000` by default. User data is written to `usr/` in the project root. +Typical troubleshooting: +- Web UI unreachable: check `docker ps`, port mapping, and startup logs. +- Model errors: verify provider, model name, and API key. +- Memory/knowledge not recalling: verify embedding config and reindex if needed. +- Host-local access: use A0 CLI connector tools, not Docker tools. diff --git a/knowledge/main/tool_call_reference_examples.md b/knowledge/main/tool_call_reference_examples.md deleted file mode 100644 index e44d21270..000000000 --- a/knowledge/main/tool_call_reference_examples.md +++ /dev/null @@ -1,79 +0,0 @@ -# Tool Call Reference Examples - -These examples are intentionally short and high signal so tool-call shape guidance -can live in knowledge without bloating the default prompt stack. - -## 1) Namespaced tool (`text_editor`) vs non-namespaced tool (`code_execution_tool`) - -- `text_editor` requires method in `tool_name`: - - `text_editor:read` - - `text_editor:write` - - `text_editor:patch` -- `code_execution_tool` uses a plain tool name plus behavior in `tool_args.runtime`. - -### Example A: read file lines with namespaced tool - -```json -{ - "tool_name": "text_editor:read", - "tool_args": { - "path": "/workspace/agent-zero/README.md", - "line_from": 1, - "line_to": 60 - } -} -``` - -### Example B: run shell command with `code_execution_tool` - -```json -{ - "tool_name": "code_execution_tool", - "tool_args": { - "runtime": "terminal", - "session": 0, - "reset": false, - "code": "pwd" - } -} -``` - -### Example C: poll ongoing terminal output - -```json -{ - "tool_name": "code_execution_tool", - "tool_args": { - "runtime": "output", - "session": 0 - } -} -``` - -## 2) Memory tools use plain names and structured args - -```json -{ - "tool_name": "memory_load", - "tool_args": { - "query": "tool argument format", - "limit": 3, - "threshold": 0.7 - } -} -``` - -## 3) Subordinate tool booleans are JSON booleans - -```json -{ - "tool_name": "call_subordinate", - "tool_args": { - "profile": "", - "message": "Review this patch for edge cases.", - "reset": true - } -} -``` - -Use these examples as structure references only. Adapt arguments to the current task. diff --git a/plugins/_a0_connector/extensions/python/_functions/extensions/python/system_prompt/_11_tools_prompt/build_prompt/end/_70_include_remote_tool_stubs.py b/plugins/_a0_connector/extensions/python/_functions/extensions/python/system_prompt/_11_tools_prompt/build_prompt/end/_70_include_remote_tool_stubs.py index 4f437c5c4..52b8137e9 100644 --- a/plugins/_a0_connector/extensions/python/_functions/extensions/python/system_prompt/_11_tools_prompt/build_prompt/end/_70_include_remote_tool_stubs.py +++ b/plugins/_a0_connector/extensions/python/_functions/extensions/python/system_prompt/_11_tools_prompt/build_prompt/end/_70_include_remote_tool_stubs.py @@ -1,174 +1,10 @@ from __future__ import annotations -from dataclasses import dataclass from typing import Any from helpers.extension import Extension -from plugins._a0_connector.helpers.ws_runtime import ( - computer_use_metadata_for_sid, - remote_exec_metadata_for_sid, - remote_file_metadata_for_sid, - subscribed_sids_for_context, -) - - -@dataclass(frozen=True) -class RemoteFileCapability: - available: bool - write_enabled: bool = False - access_mode: str = "Unknown" - advertised: bool = False - class IncludeRemoteToolStubs(Extension): def execute(self, data: dict[str, Any] = {}, **kwargs: Any) -> None: - if not self.agent: - return - - result = data.get("result") - if not isinstance(result, str): - return - - context_id = str(getattr(self.agent.context, "id", "") or "").strip() - if not context_id: - return - - stubs: list[str] = [] - file_capability = _remote_file_capability(context_id) - - if file_capability.available: - stubs.append( - self.agent.read_prompt( - "agent.connector_tool.text_editor_remote.md", - access_mode=file_capability.access_mode, - write_guidance=_file_write_guidance(file_capability), - ) - ) - - if _remote_exec_available(context_id): - stubs.append( - self.agent.read_prompt( - "agent.connector_tool.code_execution_remote.md", - access_mode=file_capability.access_mode, - write_runtime_note=_exec_write_runtime_note(file_capability), - ) - ) - - computer_use = _computer_use_capability(context_id) - if computer_use: - stubs.append( - self.agent.read_prompt( - "agent.connector_tool.computer_use_remote.md", - backend=computer_use["backend"], - trust_mode=computer_use["trust_mode"], - features=computer_use["features"], - ) - ) - - if not stubs: - return - - data["result"] = ( - result.rstrip() - + "\n\n" - + "\n\n".join(stub.strip() for stub in stubs if stub.strip()) - ) - - -def _subscribed_sids(context_id: str) -> list[str]: - return sorted(subscribed_sids_for_context(context_id)) - - -def _remote_file_capability(context_id: str) -> RemoteFileCapability: - saw_advertised = False - saw_enabled = False - saw_write_enabled = False - - for sid in _subscribed_sids(context_id): - metadata = remote_file_metadata_for_sid(sid) - if not metadata: - continue - saw_advertised = True - if not metadata.get("enabled", True): - continue - saw_enabled = True - if metadata.get("write_enabled"): - saw_write_enabled = True - - if not saw_enabled: - return RemoteFileCapability( - available=False, - access_mode="Disabled" if saw_advertised else "Unknown", - advertised=saw_advertised, - ) - - return RemoteFileCapability( - available=True, - write_enabled=saw_write_enabled, - access_mode="Read&Write" if saw_write_enabled else "Read only", - advertised=True, - ) - - -def _remote_exec_available(context_id: str) -> bool: - for sid in _subscribed_sids(context_id): - metadata = remote_exec_metadata_for_sid(sid) - if metadata and metadata.get("enabled"): - return True - return False - - -def _computer_use_capability(context_id: str) -> dict[str, str] | None: - for sid in _subscribed_sids(context_id): - metadata = computer_use_metadata_for_sid(sid) - if not metadata or not metadata.get("supported") or not metadata.get("enabled"): - continue - - backend_id = str(metadata.get("backend_id") or "").strip() or "unknown" - backend_family = str(metadata.get("backend_family") or "").strip() - backend = backend_id if not backend_family else f"{backend_id}/{backend_family}" - trust_mode = str(metadata.get("trust_mode") or "").strip() or "unknown" - features_value = metadata.get("features") - if isinstance(features_value, (list, tuple)): - features = ", ".join( - str(item).strip() for item in features_value if str(item).strip() - ) - else: - features = "" - - return { - "backend": backend, - "trust_mode": trust_mode, - "features": features or "none advertised", - } - - return None - - -def _file_write_guidance(capability: RemoteFileCapability) -> str: - if capability.write_enabled: - return "Writes and patches are currently available." - return ( - "Writes and patches are disabled until the user switches the CLI to " - "Read&Write with F3." - ) - - -def _exec_write_runtime_note(capability: RemoteFileCapability) -> str: - if capability.write_enabled: - return "Mutating runtimes are currently available because local access is Read&Write." - if capability.available: - return ( - "Mutating runtimes are disabled until the user switches the CLI to " - "Read&Write with F3; use output/reset only for existing sessions." - ) - if capability.advertised: - return ( - "The CLI advertises remote file access as disabled; mutating runtimes " - "are unavailable until local file access is enabled." - ) - return ( - "The CLI did not advertise a file access mode; prefer non-mutating " - "inspection until access is clear." - ) + return diff --git a/plugins/_a0_connector/helpers/ws_runtime.py b/plugins/_a0_connector/helpers/ws_runtime.py index e56b71aab..406c3df5d 100644 --- a/plugins/_a0_connector/helpers/ws_runtime.py +++ b/plugins/_a0_connector/helpers/ws_runtime.py @@ -157,6 +157,24 @@ def subscribed_sids_for_context(context_id: str) -> set[str]: return set(_context_subscriptions.get(context_id, set())) +def connected_sids() -> set[str]: + with _state_lock: + return set(_sid_contexts.keys()) + + +def _candidate_sids_for_context_locked(context_id: str) -> list[str]: + context_sids = sorted(_context_subscriptions.get(context_id, set())) + context_set = set(context_sids) + global_sids = sorted(sid for sid in _sid_contexts if sid not in context_set) + return context_sids + global_sids + + +def remote_tool_sids_for_context(context_id: str) -> list[str]: + """Return connected CLI candidates, preferring clients subscribed to context_id.""" + with _state_lock: + return _candidate_sids_for_context_locked(context_id) + + def store_remote_tree_snapshot( sid: str, payload: dict[str, Any], @@ -183,24 +201,31 @@ def latest_remote_tree_for_context( ) -> dict[str, Any] | None: now = time.time() with _state_lock: - subscribers = _context_subscriptions.get(context_id, set()) - snapshots = [ - _remote_tree_snapshots[sid] - for sid in subscribers - if sid in _remote_tree_snapshots + context_sids = sorted(_context_subscriptions.get(context_id, set())) + context_set = set(context_sids) + global_sids = sorted(sid for sid in _sid_contexts if sid not in context_set) + snapshot_groups = [ + [ + _remote_tree_snapshots[sid] + for sid in context_sids + if sid in _remote_tree_snapshots + ], + [ + _remote_tree_snapshots[sid] + for sid in global_sids + if sid in _remote_tree_snapshots + ], ] - if not snapshots: - return None - - snapshots.sort(key=lambda item: item.updated_at, reverse=True) - for snapshot in snapshots: - if max_age_seconds > 0 and now - snapshot.updated_at > max_age_seconds: - continue - payload = dict(snapshot.payload) - payload["sid"] = snapshot.sid - payload["updated_at"] = snapshot.updated_at - return payload + for snapshots in snapshot_groups: + snapshots.sort(key=lambda item: item.updated_at, reverse=True) + for snapshot in snapshots: + if max_age_seconds > 0 and now - snapshot.updated_at > max_age_seconds: + continue + payload = dict(snapshot.payload) + payload["sid"] = snapshot.sid + payload["updated_at"] = snapshot.updated_at + return payload return None @@ -248,20 +273,16 @@ def remote_file_metadata_for_sid(sid: str) -> dict[str, Any] | None: def select_remote_file_target_sid(context_id: str, *, require_writes: bool = False) -> str | None: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) - fallback_sid: str | None = None - for sid in subscribers: + for sid in _candidate_sids_for_context_locked(context_id): metadata = _sid_remote_file_metadata.get(sid) if metadata is None: - if fallback_sid is None: - fallback_sid = sid continue if not metadata.enabled: continue if require_writes and not metadata.write_enabled: continue return sid - return fallback_sid + return None def store_sid_remote_exec_metadata(sid: str, payload: dict[str, Any]) -> RemoteExecMetadata: @@ -292,23 +313,19 @@ def remote_exec_metadata_for_sid(sid: str) -> dict[str, Any] | None: def select_remote_exec_target_sid(context_id: str, *, require_writes: bool = False) -> str | None: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) - fallback_sid: str | None = None - for sid in subscribers: + for sid in _candidate_sids_for_context_locked(context_id): metadata = _sid_remote_exec_metadata.get(sid) if metadata is None: - if fallback_sid is None: - fallback_sid = sid continue if metadata.enabled: if require_writes: file_metadata = _sid_remote_file_metadata.get(sid) - if file_metadata is not None and ( + if file_metadata is None or ( not file_metadata.enabled or not file_metadata.write_enabled ): continue return sid - return fallback_sid + return None def store_sid_computer_use_metadata(sid: str, payload: dict[str, Any]) -> ComputerUseMetadata: @@ -429,9 +446,8 @@ def host_browser_metadata_for_sid(sid: str) -> dict[str, Any] | None: def select_host_browser_target_sid(context_id: str) -> str | None: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) fallback: str | None = None - for sid in subscribers: + for sid in _candidate_sids_for_context_locked(context_id): metadata = _sid_host_browser_metadata.get(sid) if not metadata: continue @@ -446,9 +462,8 @@ def select_host_browser_target_sid(context_id: str) -> str | None: def select_host_browser_candidate_sid(context_id: str) -> str | None: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) fallback: str | None = None - for sid in subscribers: + for sid in _candidate_sids_for_context_locked(context_id): metadata = _sid_host_browser_metadata.get(sid) if not metadata or not (metadata.supported or metadata.can_prepare): continue @@ -463,9 +478,9 @@ def select_host_browser_candidate_sid(context_id: str) -> str | None: def host_browser_metadata_for_context(context_id: str) -> list[dict[str, Any]]: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) + candidates = _candidate_sids_for_context_locked(context_id) rows: list[dict[str, Any]] = [] - for sid in subscribers: + for sid in candidates: metadata = host_browser_metadata_for_sid(sid) if metadata is not None: metadata["sid"] = sid @@ -498,8 +513,7 @@ def all_host_browser_metadata() -> list[dict[str, Any]]: def select_computer_use_target_sid(context_id: str) -> str | None: with _state_lock: - subscribers = sorted(_context_subscriptions.get(context_id, set())) - for sid in subscribers: + for sid in _candidate_sids_for_context_locked(context_id): metadata = _sid_computer_use_metadata.get(sid) if metadata and metadata.supported and metadata.enabled: return sid diff --git a/plugins/_a0_connector/prompts/agent.connector_tool.code_execution_remote.md b/plugins/_a0_connector/prompts/agent.connector_tool.code_execution_remote.md deleted file mode 100644 index a6f67717d..000000000 --- a/plugins/_a0_connector/prompts/agent.connector_tool.code_execution_remote.md +++ /dev/null @@ -1,27 +0,0 @@ -# code_execution_remote tool - -Runs shell-backed execution on the machine where the subscribed A0 CLI is running. -Load `code-execution-remote` before using this tool for nontrivial local project work. - -Current local access mode: `{{access_mode}}` - -## Requirements -- A CLI client is subscribed to this chat and advertises remote execution. -- Paths and shell syntax are evaluated on the CLI host, not inside Agent Zero. -- {{write_runtime_note}} - -## Arguments -- `runtime`: one of `terminal`, `python`, `nodejs`, `output`, `reset` -- `runtime=input` is a temporary deprecated compatibility alias for sending one line of - keyboard input into a running shell session -- `session`: integer session id (default `0`) - -Runtime-specific fields: -- `terminal`, `python`, `nodejs`: require `code` -- `input`: requires `keyboard` (or `code` as fallback) -- `reset`: optional `reason` - -## Notes -- Reuse `session` when continuing a workflow. -- Use `output` to poll a running session and `reset` for a stuck session. -- If the CLI returns a disabled/no-client error, ask the user to enable or reconnect the CLI instead of falling back to server-side execution. diff --git a/plugins/_a0_connector/prompts/agent.connector_tool.computer_use_remote.md b/plugins/_a0_connector/prompts/agent.connector_tool.computer_use_remote.md deleted file mode 100644 index 70e65d249..000000000 --- a/plugins/_a0_connector/prompts/agent.connector_tool.computer_use_remote.md +++ /dev/null @@ -1,27 +0,0 @@ -# computer_use_remote tool - -Controls the subscribed A0 CLI host machine as a local desktop target. -Load `computer-use-remote` before using this tool. - -## Requirements -- A CLI client is subscribed to this chat and advertises enabled local computer use. -- Backend: `{{backend}}` -- Trust mode: `{{trust_mode}}` -- Features: `{{features}}` - -## Arguments -- `action`: one of `start_session`, `status`, `capture`, `move`, `click`, `scroll`, `key`, `type`, `stop_session` -- `session_id`: optional for actions after `start_session` - -Action-specific fields: -- `move`: `x`, `y` normalized to `[0,1]` -- `click`: optional `x`, `y`, plus optional `button` (`left`, `right`, `middle`) and `count` -- `scroll`: `dx`, `dy` -- `key`: `key` or `keys` -- `type`: `text`, optional `submit` boolean - -## Runtime Notes -- Use `start_session` before interactive actions. `status` only inspects state. -- Successful interactive actions attach a fresh screenshot; base decisions on the latest capture. -- Prefer keyboard/accessibility routes before pointer actions. -- Coordinates are normalized global screen coordinates. diff --git a/plugins/_a0_connector/prompts/agent.connector_tool.text_editor_remote.md b/plugins/_a0_connector/prompts/agent.connector_tool.text_editor_remote.md deleted file mode 100644 index 35c3f1f85..000000000 --- a/plugins/_a0_connector/prompts/agent.connector_tool.text_editor_remote.md +++ /dev/null @@ -1,21 +0,0 @@ -# text_editor_remote tool - -Reads, writes, and patches files on the machine where the subscribed A0 CLI is running. -This is different from server-side file tools. Load `text-editor-remote` before using it for edits. - -Current access mode: `{{access_mode}}` - -## Requirements -- A CLI client is subscribed to this chat and advertises remote file access. -- Paths are evaluated on the CLI host filesystem, not the Agent Zero server. -- {{write_guidance}} - -## Operations -- `read`: optional `line_from`, `line_to` -- `write`: requires `content` -- `patch`: requires either `patch_text` or `edits` - -## Notes -- Prefer `read` before line-number edits. -- Prefer `patch_text` for context-anchored changes and `edits` only for fresh, surgical line ranges. -- If freshness checks reject a line patch, reread the file and retry with updated ranges. diff --git a/plugins/_a0_connector/prompts/agent.system.tool.code_execution_remote.md b/plugins/_a0_connector/prompts/agent.system.tool.code_execution_remote.md new file mode 100644 index 000000000..3b797d23e --- /dev/null +++ b/plugins/_a0_connector/prompts/agent.system.tool.code_execution_remote.md @@ -0,0 +1,46 @@ +# code_execution_remote tool + +Runs shell-backed execution on the machine where a connected A0 CLI is running. +Use this tool, not `code_execution_tool`, when the user asks for the connected +local terminal, the A0 CLI host, their local machine, or explicitly says not to +use Docker/server/container execution. +For complex local project work, optionally load skill `code-execution-remote`. + +Availability and permissions are checked when the tool runs. If no CLI is +connected, remote execution is disabled, or local access is not Read&Write for a +mutating command, report that to the user instead of falling back to server-side +execution. + +## Arguments +- `runtime`: one of `terminal`, `python`, `nodejs`, `output`, `reset` +- `session`: integer session id (default `0`) + +Runtime-specific fields: +- `terminal`, `python`, `nodejs`: require `code` +- `reset`: optional `reason` + +## Notes +- Reuse `session` when continuing a workflow. +- Use `output` to poll a running session and `reset` for a stuck session. +- Paths and shell syntax are evaluated on the CLI host, not inside Agent Zero. +- When the user gives a relative path like `tmp/file.txt`, keep it relative to + the CLI host terminal. Do not prepend or `cd` to `/a0/usr/workdir`; that is the + Agent Zero server/Docker workdir, not the connected local terminal folder. +- If the current terminal folder matters, run `pwd` first or include `pwd` in + the same command without changing directories. + +## Usage +~~~json +{ + "thoughts": [ + "The user asked for the connected local terminal rather than Docker, so I should execute on the A0 CLI host." + ], + "headline": "Running command on connected local terminal", + "tool_name": "code_execution_remote", + "tool_args": { + "runtime": "terminal", + "session": 0, + "code": "pwd" + } +} +~~~ diff --git a/plugins/_a0_connector/prompts/agent.system.tool.text_editor_remote.md b/plugins/_a0_connector/prompts/agent.system.tool.text_editor_remote.md new file mode 100644 index 000000000..94fac7e8c --- /dev/null +++ b/plugins/_a0_connector/prompts/agent.system.tool.text_editor_remote.md @@ -0,0 +1,41 @@ +# text_editor_remote tool + +Reads, writes, and patches files on the machine where a connected A0 CLI is +running. Use this tool, not server-side file tools, when the user asks for files +on the connected local machine, A0 CLI host, or explicitly says not to use +Docker/server files. For complex remote edits, optionally load skill `text-editor-remote`. + +Availability and permissions are checked when the tool runs. If no CLI is +connected, remote file access is disabled, or a write/patch needs Read&Write, +report that to the user instead of falling back to server-side file tools. + +## Arguments +- `action`: `read`, `write`, or `patch` +- `path`: file path on the CLI host filesystem +- `read`: optional `line_from`, `line_to` +- `write`: requires `content` +- `patch`: requires either `patch_text` or `edits` + +## Notes +- Prefer `read` before line-number edits. +- Prefer `patch_text` for context-anchored changes and `edits` only for fresh, surgical line ranges. +- If freshness checks reject a line patch, reread the file and retry with updated ranges. +- Relative paths are relative to the CLI host filesystem. Do not rewrite them to + `/a0/usr/workdir`; that path belongs to the Agent Zero server/Docker side. + +## Usage +~~~json +{ + "thoughts": [ + "The user asked for a file on the connected local machine, so I should read it through the A0 CLI host." + ], + "headline": "Reading file on connected local machine", + "tool_name": "text_editor_remote", + "tool_args": { + "action": "read", + "path": "README.md", + "line_from": 1, + "line_to": 80 + } +} +~~~ diff --git a/plugins/_a0_connector/skills/code-execution-remote/SKILL.md b/plugins/_a0_connector/skills/code-execution-remote/SKILL.md index 331587026..6fba66b7b 100644 --- a/plugins/_a0_connector/skills/code-execution-remote/SKILL.md +++ b/plugins/_a0_connector/skills/code-execution-remote/SKILL.md @@ -10,6 +10,14 @@ trigger_patterns: - "run commands on the cli host" - "run python on the cli host" - "run node on the cli host" + - "connected local terminal" + - "connected terminal" + - "local terminal" + - "my terminal" + - "a0 cli" + - "cli host" + - "not docker" + - "not the docker terminal" allowed_tools: - code_execution_remote --- @@ -34,12 +42,11 @@ If the task belongs inside Agent Zero's own runtime, use the normal server-side - Reuse the same integer `session` while continuing a workflow; session state is local to the CLI frontend. - Use `runtime=output` when a previous command is still running or returned before the shell reached a prompt. - Use `runtime=reset` when a session is stuck or a clean shell is safer. -- Treat `runtime=input` as deprecated compatibility for sending one line to a running shell. - Match the remote host shell syntax. A Windows CLI may need PowerShell syntax even when Agent Zero runs on Linux. ## Failure Handling -- If no CLI is connected or subscribed, ask the user to connect A0 CLI to this chat. +- If no CLI is connected, ask the user to connect A0 CLI to this Agent Zero instance. - If execution is disabled, tell the user to enable remote execution in the CLI. - If mutating runtimes are blocked, tell the user to switch local file access to Read&Write with F3. - If a request times out or the CLI disconnects, poll once if a session may still be running; otherwise summarize the failure and wait for reconnection. diff --git a/plugins/_a0_connector/skills/text-editor-remote/SKILL.md b/plugins/_a0_connector/skills/text-editor-remote/SKILL.md index 457469e55..e8f2170b6 100644 --- a/plugins/_a0_connector/skills/text-editor-remote/SKILL.md +++ b/plugins/_a0_connector/skills/text-editor-remote/SKILL.md @@ -10,6 +10,11 @@ trigger_patterns: - "edit my local files through a0 cli" - "read files on the cli host" - "patch files on the cli host" + - "connected local files" + - "connected local machine files" + - "local files not docker" + - "a0 cli files" + - "cli host files" allowed_tools: - text_editor_remote --- @@ -46,6 +51,6 @@ If the task belongs inside Agent Zero's own runtime, use the normal server-side ## Failure Handling -- If no CLI is connected or subscribed, ask the user to connect A0 CLI to this chat. +- If no CLI is connected, ask the user to connect A0 CLI to this Agent Zero instance. - If writes are blocked, tell the user to switch local file access to Read&Write with F3. - If a request times out or the CLI disconnects, summarize the failure and wait for reconnection. diff --git a/plugins/_a0_connector/tools/code_execution_remote.py b/plugins/_a0_connector/tools/code_execution_remote.py index f69757283..248b62e5c 100644 --- a/plugins/_a0_connector/tools/code_execution_remote.py +++ b/plugins/_a0_connector/tools/code_execution_remote.py @@ -13,9 +13,9 @@ from plugins._a0_connector.helpers.ws_runtime import ( clear_pending_exec_op, remote_exec_metadata_for_sid, remote_file_metadata_for_sid, + remote_tool_sids_for_context, select_remote_exec_target_sid, store_pending_exec_op, - subscribed_sids_for_context, ) @@ -67,24 +67,21 @@ class CodeExecutionRemote(Tool): ) context_id = self.agent.context.id - subscribers = subscribed_sids_for_context(context_id) + candidates = remote_tool_sids_for_context(context_id) require_writes = self._runtime_requires_write_access(runtime) sid = select_remote_exec_target_sid(context_id, require_writes=require_writes) if not sid: exec_enabled = False write_blocked = False - for subscriber_sid in subscribers: - exec_metadata = remote_exec_metadata_for_sid(subscriber_sid) - if exec_metadata is None: - exec_enabled = True - continue - if not exec_metadata.get("enabled"): + for candidate_sid in candidates: + exec_metadata = remote_exec_metadata_for_sid(candidate_sid) + if exec_metadata is None or not exec_metadata.get("enabled"): continue exec_enabled = True if not require_writes: break - file_metadata = remote_file_metadata_for_sid(subscriber_sid) - if file_metadata is not None and ( + file_metadata = remote_file_metadata_for_sid(candidate_sid) + if file_metadata is None or ( not file_metadata.get("enabled", True) or not file_metadata.get("write_enabled") ): @@ -92,16 +89,16 @@ class CodeExecutionRemote(Tool): return Response( message=( - "code_execution_remote: no subscribed CLI in this context currently allows " + "code_execution_remote: no connected CLI currently allows " "shell-backed execution that may modify local files. Press F3 to switch " "the CLI to Read&Write. `runtime=output` and `runtime=reset` remain " "available for existing sessions." - if subscribers and require_writes and exec_enabled and write_blocked - else "code_execution_remote: no subscribed CLI in this context currently has " + if candidates and require_writes and exec_enabled and write_blocked + else "code_execution_remote: no connected CLI currently has " "remote execution enabled. Connect the CLI and press F4 to switch exec on." - if subscribers - else "code_execution_remote: no CLI client connected to this context. " - "Make sure the CLI is connected and subscribed." + if candidates + else "code_execution_remote: no CLI client connected to Agent Zero. " + "Make sure the CLI is connected to this instance." ), break_loop=False, ) diff --git a/plugins/_a0_connector/tools/computer_use_remote.py b/plugins/_a0_connector/tools/computer_use_remote.py index 470011048..c43cf7e92 100644 --- a/plugins/_a0_connector/tools/computer_use_remote.py +++ b/plugins/_a0_connector/tools/computer_use_remote.py @@ -69,8 +69,8 @@ class ComputerUseRemote(Tool): if not sid: return Response( message=( - "computer_use_remote: no subscribed CLI in this context currently advertises " - "enabled local computer use. Enable it in the CLI with F2 and choose a trust mode first." + "computer_use_remote: no connected CLI currently advertises enabled local " + "computer use. Enable it in the CLI and choose a trust mode first." ), break_loop=False, ) diff --git a/plugins/_a0_connector/tools/text_editor_remote.py b/plugins/_a0_connector/tools/text_editor_remote.py index d8c9ff956..f4140ca3d 100644 --- a/plugins/_a0_connector/tools/text_editor_remote.py +++ b/plugins/_a0_connector/tools/text_editor_remote.py @@ -18,9 +18,10 @@ from plugins._a0_connector.helpers.text_editor_freshness import ( ) from plugins._a0_connector.helpers.ws_runtime import ( clear_pending_file_op, + remote_file_metadata_for_sid, + remote_tool_sids_for_context, select_remote_file_target_sid, store_pending_file_op, - subscribed_sids_for_context, ) from plugins._text_editor.helpers.patch_request import parse_patch_request @@ -37,15 +38,23 @@ class TextEditorRemote(Tool): """Send file-editing operations to the connected CLI machine.""" async def execute(self, **kwargs: Any) -> Response: - op = str(self.args.get("op") or self.args.get("operation", "")).strip().lower() + op = ( + str( + self.args.get("action") + or "" + ) + .strip() + .lower() + .replace("-", "_") + ) if not op: return Response( - message="op is required (read, write, or patch)", + message="action is required (read, write, or patch)", break_loop=False, ) if op not in {"read", "write", "patch"}: return Response( - message=f"Unknown operation: {op!r}. Use read, write, or patch.", + message=f"Unknown action: {op!r}. Use read, write, or patch.", break_loop=False, ) @@ -150,23 +159,30 @@ class TextEditorRemote(Tool): ) -> dict[str, Any]: context_id = self.agent.context.id require_writes = op in {"write", "patch"} - subscribers = subscribed_sids_for_context(context_id) + candidates = remote_tool_sids_for_context(context_id) sid = select_remote_file_target_sid(context_id, require_writes=require_writes) if not sid: - if not subscribers: + if not candidates: error = ( - "text_editor_remote: no CLI client connected to this context. " - "Make sure the CLI is connected and subscribed." + "text_editor_remote: no CLI client connected to Agent Zero. " + "Make sure the CLI is connected to this instance." ) elif require_writes: + write_blocked = any( + (metadata := remote_file_metadata_for_sid(candidate_sid)) + and metadata.get("enabled", True) + and not metadata.get("write_enabled") + for candidate_sid in candidates + ) error = ( - "text_editor_remote: no subscribed CLI in this context currently allows " - "remote file writes. Press F3 to switch the CLI to Read&Write." + "text_editor_remote: no connected CLI currently allows remote file writes. " + "Press F3 to switch the CLI to Read&Write." + if write_blocked + else "text_editor_remote: no connected CLI currently advertises remote file access." ) else: error = ( - "text_editor_remote: no subscribed CLI in this context currently advertises " - "remote file access." + "text_editor_remote: no connected CLI currently advertises remote file access." ) return { "ok": False, diff --git a/plugins/_browser/prompts/agent.system.tool.browser.md b/plugins/_browser/prompts/agent.system.tool.browser.md index ced78e382..851eaa77a 100644 --- a/plugins/_browser/prompts/agent.system.tool.browser.md +++ b/plugins/_browser/prompts/agent.system.tool.browser.md @@ -1,180 +1,33 @@ ### browser -direct Playwright browser control with optional visible WebUI viewer -use for web browsing, page inspection, forms, downloads, and browser-only tasks -state stays open per chat context -refs come from content as typed markers: [link 3], [button 6], [image 1], [input text 8] -Depending on project settings, the same browser tool may run in container mode or host mode. Container mode runs inside the project's container browser; host mode runs through A0 CLI against a Chrome-family browser on the user's host machine. Check or change the mode in the Browser project settings or plugin config. In host mode, page content and screenshots may be blocked to protect local browser data when remote models are active; local models are allowed by the host-content policy. +Rendered browser automation for pages that need interaction, JavaScript, forms, downloads, screenshots, or visual inspection. -Browser tool actions must not open a Browser surface automatically. Use the tool headlessly unless the user opens the Browser surface or explicitly asks for a visible browser view; if the Browser surface is already open, it may reflect the active page. +Prefer `search_engine` or `document_query` for plain text research. Use the browser headlessly unless the user opens the Browser surface or asks for a visible browser. -Browser does not automatically load screenshots or surface images into model context. Screenshots are explicit only. +The browser may run in Docker container mode or A0 CLI host-browser mode depending on settings. Container-mode paths resolve inside Agent Zero; host-mode paths resolve on the connected A0 CLI host. -resource hygiene: -- reuse an existing tab with navigate for serial research instead of opening a new tab for every result -- keep only a small working set of tabs open; close pages with close or close_all after extracting what you need -- avoid list with include_content:true when many tabs are open; call content on the specific tab instead -- avoid large multi fan-outs unless the user explicitly needs parallel browsing -- prefer search_engine/document_query for text research and use browser for pages that need interaction, rendering, login, forms, or visual inspection +For complex browser workflows, load skill `browser-tool`. For fragile forms, load skill `browser-forms`. -actions: open list state set_active navigate back forward reload content detail screenshot click hover double_click right_click drag type submit type_submit scroll evaluate key_chord mouse wheel keyboard clipboard set_viewport select_option set_checked upload_file multi close close_all -common args: action browser_id url ref target_ref text selector selectors script modifiers keys key include_content focus_popup event_type x y to_x to_y offset_x offset_y target_offset_x target_offset_y delta_x delta_y button quality full_page path paths value values checked width height calls +Actions: `open`, `list`, `state`, `set_active`, `navigate`, `back`, `forward`, `reload`, `content`, `detail`, `screenshot`, `click`, `hover`, `double_click`, `right_click`, `drag`, `type`, `submit`, `type_submit`, `scroll`, `evaluate`, `key_chord`, `mouse`, `wheel`, `keyboard`, `clipboard`, `set_viewport`, `select_option`, `set_checked`, `upload_file`, `multi`, `close`, `close_all`. -workflow: -- open creates a new browser and returns id/state -- navigate reuses an existing browser_id and should be preferred during serial browsing -- content returns readable page markdown with typed refs -- detail inspects one ref, including link/image/input/button metadata -- click/type/type_submit/submit/scroll use refs from latest content capture and return {action,state} -- navigate/back/forward/reload return fresh state -- list shows open browsers; pass include_content: true for one-call bulk read +Common args: `action`, `browser_id`, `url`, `ref`, `target_ref`, `text`, `selector`, `selectors`, `script`, `modifiers`, `keys`, `key`, `include_content`, `focus_popup`, `event_type`, `x`, `y`, `to_x`, `to_y`, `delta_x`, `delta_y`, `button`, `quality`, `full_page`, `path`, `paths`, `value`, `values`, `checked`, `width`, `height`, `calls`. -explicit vision workflow: -1. call browser with action: "screenshot" -2. call vision_load with the returned path -3. reason from the latest loaded screenshot, not an older screenshot +Workflow: +- `open` creates a tab and returns id/state. +- `content` returns markdown with refs like `[link 3]`, `[button 6]`, `[input text 8]`. +- Interactions use refs from the latest `content` capture. +- `navigate` reuses an existing `browser_id` and is preferred for serial browsing. +- Screenshots are explicit only; call `vision_load` with the returned path before reasoning visually. +- Keep the tab set small; close pages after extracting what you need. -screenshot: -- saves a JPEG by default and returns path, a0_path, mime, state, and a ready vision_load tool_args object -- pass quality 20..95, full_page true/false, or path -- PNG is used only when path ends in .png -- no base64 image data is returned in the tool message +`multi` is only a browser action: use `tool_name: "browser"` with `tool_args.action: "multi"`. Never use `tool_name: "multi"`. -pointer and raw input: -- hover moves to a ref center or x/y viewport CSS pixels -- double_click and right_click accept ref or x/y; double_click accepts button and modifiers -- drag moves from ref or x/y to target_ref or to_x/to_y -- wheel scrolls at x/y with delta_x and delta_y -- keyboard presses key or types text into the active page -- clipboard is copy, cut, or paste; for browser:clipboard pass action: "paste" and optional text -- set_viewport resizes the page viewport with width and height -- coordinates are Chromium viewport CSS pixels and match screenshots/Browser surface -- ref offsets are relative to the target element top-left; refs default to element center - -forms: -- use select_option for native select and safely detectable ARIA listbox/combobox controls -- use set_checked for checkbox, radio, switch, and toggle-like refs -- use upload_file for file input refs or associated labels; in container mode paths resolve inside the running container, while in host mode paths resolve on the A0 CLI host machine; verify the file exists in that environment before upload -- for complex forms, load browser-forms first with skills_tool:load - -modifier clicks: -- click accepts modifiers like ["Control"], ["Shift"], ["Alt"], ["Meta"] -- ctrl/meta-click opens link in new tab in background (Chrome rule) -- override with focus_popup: true (focus follows new tab) or false (always background) -- the new tab id is reported in action.opened_browser_ids; list shows all tabs - -popup awareness: -- tabs opened by site (window.open, target=_blank, ctrl-click) auto-register -- list returns every tab; last_interacted_browser_id tracks current focus - -background work (do not steal focus): -- operations on a non-active tab (read, click, type, evaluate, etc.) target that tab WITHOUT moving focus -- last_interacted_browser_id (and the WebUI viewer that follows it) only changes on: - - open (new tab created) - - explicit set_active action - - action on the already-active tab - - chrome popup-focus rule (plain click on target=_blank -> follow; ctrl-click -> stay) -- to switch focus deliberately: {"action":"set_active","browser_id":N} - -key_chord: -- presses keys in order, releases in reverse; safe across exceptions -- example: {"action":"key_chord","keys":["Control","a"]} selects all - -multi (parallel batch): -- run many actions concurrently across tabs in one tool call -- pass calls: array of action objects (each has its own action+args) -- different browser_ids run in parallel; same browser_id runs in submit order -- returns array of {"ok":true,"result":...} or {"ok":false,"error":"..."} matching input order -- ideal for: scrape N tabs at once, fan-out reads, parallel evaluate -- new v1 actions such as screenshot, hover, wheel, keyboard, select_option, set_checked, and upload_file are accepted -- avoid mutating same tab twice in one batch unless serial order is intended - -examples: +Example: ~~~json { - "tool_name": "browser", - "tool_args": { - "action": "open", - "url": "https://example.com" - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "content", - "browser_id": 1 - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "screenshot", - "browser_id": 1, - "quality": 80 - } -} -~~~ - -~~~json -{ - "tool_name": "vision_load", - "tool_args": { - "paths": ["/absolute/local/path.jpg"] - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "select_option", - "browser_id": 1, - "ref": 8, - "value": "Canada" - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "set_checked", - "browser_id": 1, - "ref": 9, - "checked": true - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "upload_file", - "browser_id": 1, - "ref": 10, - "path": "/a0/usr/workdir/resume.pdf" - } -} -~~~ - -~~~json -{ - "tool_name": "browser", - "tool_args": { - "action": "multi", - "calls": [ - {"action": "content", "browser_id": 1}, - {"action": "screenshot", "browser_id": 2}, - {"action": "evaluate", "browser_id": 3, "script": "document.title"} - ] - } + "tool_name": "browser", + "tool_args": { + "action": "open", + "url": "https://example.com" + } } ~~~ diff --git a/plugins/_browser/skills/browser-tool/SKILL.md b/plugins/_browser/skills/browser-tool/SKILL.md new file mode 100644 index 000000000..ac0b0ee00 --- /dev/null +++ b/plugins/_browser/skills/browser-tool/SKILL.md @@ -0,0 +1,76 @@ +--- +name: browser-tool +description: Use for complex Agent Zero browser automation, including multi-tab browsing, screenshots, forms, uploads, raw pointer/keyboard actions, host-vs-container browser mode, and visual verification workflows. +--- + +# Browser Tool + +Use the `browser` tool for rendered pages, forms, logins, downloads, JavaScript-heavy sites, screenshots, and visual inspection. Prefer `search_engine` or `document_query` for plain text research. + +## Core Workflow + +1. `open` creates a browser tab and returns a `browser_id`. +2. `content` returns readable markdown plus typed refs like `[link 3]`, `[button 6]`, `[input text 8]`. +3. Interact with refs using `click`, `type`, `submit`, `scroll`, etc. +4. Use `navigate` on an existing `browser_id` for serial browsing. +5. Keep only a small working tab set; close pages when finished. + +## Modes + +The same tool may run in Docker container mode or A0 CLI host-browser mode, depending on project/plugin settings. + +- Container mode: browser and upload paths resolve inside the Agent Zero container. +- Host mode: browser and upload paths resolve on the connected A0 CLI host machine. + +In host mode, page content and screenshots may be blocked by host-content policy when remote models are active. + +## Screenshots And Vision + +Screenshots are explicit only; the browser does not automatically load images into model context. + +1. Call `browser` with `action: "screenshot"`. +2. Call `vision_load` with the returned path. +3. Reason from the latest loaded screenshot. + +Screenshot args include `quality`, `full_page`, and optional `path`. PNG is used when `path` ends with `.png`; otherwise JPEG is used. + +## Forms And Files + +- `select_option` works for native selects and detectable ARIA listbox/combobox controls. +- `set_checked` works for checkbox, radio, switch, and toggle-like refs. +- `upload_file` works for file input refs or associated labels; verify the file exists in the active browser environment. +- For fragile forms, load skill `browser-forms`. + +## Pointer And Keyboard + +- `hover`, `double_click`, `right_click`, and `drag` accept refs or viewport coordinates. +- Coordinates are Chromium viewport CSS pixels and match screenshots. +- `key_chord` presses keys in order and releases in reverse. +- `clipboard` actions are copy, cut, or paste. +- `set_viewport` resizes the page viewport. + +## Tabs And Popups + +- Popups and target-blank tabs are auto-registered. +- `list` shows open tabs; pass `include_content: true` sparingly. +- `set_active` deliberately changes focus. +- Operations on a non-active tab do not steal focus unless browser rules require it. + +## Browser Action Multi + +`multi` is only a browser action, never a top-level tool. Use: + +```json +{ + "tool_name": "browser", + "tool_args": { + "action": "multi", + "calls": [ + {"action": "content", "browser_id": 1}, + {"action": "screenshot", "browser_id": 2} + ] + } +} +``` + +Use browser action `multi` for parallel reads across tabs. Avoid mutating the same tab twice in one batch unless serial order is intended. diff --git a/plugins/_code_execution/tools/code_execution_tool.py b/plugins/_code_execution/tools/code_execution_tool.py index 188f03aa1..6e06dd461 100644 --- a/plugins/_code_execution/tools/code_execution_tool.py +++ b/plugins/_code_execution/tools/code_execution_tool.py @@ -278,7 +278,7 @@ class CodeExecution(Tool): if partial_output: PrintStyle(font_color="#85C1E9").stream(partial_output) truncated_output = self.fix_full_output(full_output) - self.set_progress(truncated_output) + await self.set_progress(truncated_output) heading = self.get_heading_from_output(truncated_output, 0) self.log.update(content=prefix + truncated_output, heading=heading) last_output_time = now @@ -397,7 +397,7 @@ class CodeExecution(Tool): return None raise truncated_output = self.fix_full_output(full_output) - self.set_progress(truncated_output) + await self.set_progress(truncated_output) heading = self.get_heading_from_output(truncated_output, 0) last_lines = ( diff --git a/plugins/_memory/tools/behaviour_adjustment.py b/plugins/_memory/tools/behaviour_adjustment.py index ef43d9c80..ace5f50a9 100644 --- a/plugins/_memory/tools/behaviour_adjustment.py +++ b/plugins/_memory/tools/behaviour_adjustment.py @@ -39,11 +39,12 @@ async def update_behaviour(agent: Agent, log_item: LogItem, adjustments: str): message=msg, callback=log_callback, ) + adjustments_merge = normalize_ruleset(adjustments_merge) # update rules file rules_file = get_custom_rules_file(agent) files.write_file(rules_file, adjustments_merge) - log_item.update(result="Behaviour updated") + log_item.update(ruleset=adjustments_merge, result="Behaviour updated") def get_custom_rules_file(agent: Agent): @@ -53,8 +54,47 @@ def get_custom_rules_file(agent: Agent): def read_rules(agent: Agent): rules_file = get_custom_rules_file(agent) if files.exists(rules_file): - rules = agent.read_prompt(rules_file) - return agent.read_prompt("agent.system.behaviour.md", rules=rules) + return agent.read_prompt(rules_file) else: - rules = agent.read_prompt("agent.system.behaviour_default.md") - return agent.read_prompt("agent.system.behaviour.md", rules=rules) + return agent.read_prompt("agent.system.behaviour_default.md") + + +def normalize_ruleset(ruleset: str): + text = str(ruleset or "").strip() + + if text.startswith("```") and text.endswith("```"): + lines = text.splitlines() + text = "\n".join(lines[1:-1]).strip() + + text = text.replace("\r\n", "\n").replace("\r", "\n") + text = text.replace("!!!", "") + text = text.replace(".## ", ".\n## ") + + normalized_lines = [] + seen_structural_lines = set() + previous_blank = False + + for raw_line in text.splitlines(): + line = raw_line.rstrip() + stripped = line.strip() + + if not stripped: + if normalized_lines and not previous_blank: + normalized_lines.append("") + previous_blank = True + continue + + if stripped.startswith("# ") and not stripped.startswith("## "): + stripped = "#" + stripped + line = stripped + + dedupe_key = stripped.casefold() + if stripped.startswith(("## ", "* ")) and dedupe_key in seen_structural_lines: + continue + if stripped.startswith(("## ", "* ")): + seen_structural_lines.add(dedupe_key) + + normalized_lines.append(line) + previous_blank = False + + return "\n".join(normalized_lines).strip() + "\n" diff --git a/plugins/_memory/tools/memory_forget.py b/plugins/_memory/tools/memory_forget.py index 3449815f9..39babb9fd 100644 --- a/plugins/_memory/tools/memory_forget.py +++ b/plugins/_memory/tools/memory_forget.py @@ -1,7 +1,7 @@ from helpers.tool import Tool, Response from plugins._memory.helpers.memory import Memory -from tools.memory_load import DEFAULT_THRESHOLD +from plugins._memory.tools.memory_load import DEFAULT_THRESHOLD class MemoryForget(Tool): diff --git a/plugins/_office/helpers/canvas_context.py b/plugins/_office/helpers/canvas_context.py index 395194269..8c1028354 100644 --- a/plugins/_office/helpers/canvas_context.py +++ b/plugins/_office/helpers/canvas_context.py @@ -13,12 +13,12 @@ def build_context(max_items: int = 6) -> str: return desktop_context lines = [ - "These document artifacts have active document sessions. Content is omitted; load skill `document-artifacts` for edit workflow, then use document_artifact:read before content-sensitive edits.", + "These document artifacts have active document sessions. Content is omitted; load skill `document-artifacts` for edit workflow, then use `document_artifact` with action `read` before content-sensitive edits.", ] for doc in documents: lines.append(format_document_line(doc)) lines.append( - "Use document_artifact:edit with file_id or path for saved edits; tool results refresh the document canvas." + "Use `document_artifact` with action `edit` and file_id or path for saved edits; tool results refresh the document canvas." ) if desktop_context: lines.extend(["", desktop_context]) diff --git a/plugins/_office/prompts/agent.system.tool.document_artifact.md b/plugins/_office/prompts/agent.system.tool.document_artifact.md index 3c51ab3e0..d247543a9 100644 --- a/plugins/_office/prompts/agent.system.tool.document_artifact.md +++ b/plugins/_office/prompts/agent.system.tool.document_artifact.md @@ -2,10 +2,9 @@ create/open/read/edit reusable document artifacts in Agent Zero formats: md odt ods odp docx xlsx pptx default format: md -methods: create open read edit inspect export version_history restore_version status -common args: method action kind title format content path file_id +actions: create open read edit inspect export version_history restore_version status +common args: action kind title format content path file_id optional UI intent args: open_in_canvas open_in_desktop -`method` is accepted as an alias for action when the tool_name has no suffix create/read/edit results save or update artifacts only; they do not open a surface automatically unless the user explicitly asks to open the document UI use action `open`, `open_in_canvas: true`, or `open_in_desktop: true` only when the user explicitly asks to open the document/editor/Desktop created/updated artifacts are shown with explicit Download, Open Document, or Desktop edit message actions diff --git a/plugins/_office/skills/calc-spreadsheets/SKILL.md b/plugins/_office/skills/calc-spreadsheets/SKILL.md index bdc8f4044..43930e20b 100644 --- a/plugins/_office/skills/calc-spreadsheets/SKILL.md +++ b/plugins/_office/skills/calc-spreadsheets/SKILL.md @@ -33,8 +33,9 @@ Create a workbook: ```json { - "tool_name": "document_artifact:create", + "tool_name": "document_artifact", "tool_args": { + "action": "create", "kind": "spreadsheet", "title": "Budget", "format": "ods", @@ -49,8 +50,9 @@ Edit cells: ```json { - "tool_name": "document_artifact:edit", + "tool_name": "document_artifact", "tool_args": { + "action": "edit", "file_id": "abc123", "operation": "set_cells", "cells": { diff --git a/plugins/_office/skills/document-artifacts/SKILL.md b/plugins/_office/skills/document-artifacts/SKILL.md index bf492ada9..b6ccc9f8c 100644 --- a/plugins/_office/skills/document-artifacts/SKILL.md +++ b/plugins/_office/skills/document-artifacts/SKILL.md @@ -26,7 +26,7 @@ allowed_tools: Use `document_artifact` for substantial deliverables that should remain editable in the custom document editor or LibreOffice Desktop. Markdown remains the default for ordinary writing, notes, reports, briefs, and drafts when no binary office file is needed. For LibreOffice office files, ODF is first-class: use ODT for Writer, ODS for Spreadsheet/Calc, and ODP for Presentation/Impress. Use DOCX, XLSX, or PPTX only when the user explicitly asks for OOXML compatibility, provides an existing file in that format, or needs that compatibility format. -The document UI and Desktop are user-owned. Creating, reading, or editing an artifact must save the file and update its state, but it must not open a document modal or Desktop surface automatically if the user has not asked for that UI. Tool results provide explicit Download, Open Document, or Desktop edit actions for the user. Use `document_artifact:open`, `open_in_canvas: true`, or `open_in_desktop: true` only when the user explicitly asks to open the document/editor/Desktop. +The document UI and Desktop are user-owned. Creating, reading, or editing an artifact must save the file and update its state, but it must not open a document modal or Desktop surface automatically if the user has not asked for that UI. Tool results provide explicit Download, Open Document, or Desktop edit actions for the user. Use the `open` action, `open_in_canvas: true`, or `open_in_desktop: true` only when the user explicitly asks to open the document/editor/Desktop. For format-specific work, prefer the matching skill when available: @@ -37,9 +37,9 @@ For format-specific work, prefer the matching skill when available: ## Workflow -1. Create or open the artifact with `document_artifact:create` / `document_artifact:open`, or with `tool_name: "document_artifact"` plus `method: "create"` / `method: "open"`. -2. Before content-sensitive edits, call `document_artifact:read` with `file_id` or `path`. -3. Apply saved changes with `document_artifact:edit`. +1. Create or open the artifact with `tool_name: "document_artifact"` and `tool_args.action: "create"` or `"open"`. +2. Before content-sensitive edits, call the `read` action with `file_id` or `path`. +3. Apply saved changes with the `edit` action. 4. Use `version_history` or `restore_version` when the user asks to audit or roll back. Document context may list opened files with `file_id`, path, version, size, and timestamp. It intentionally omits full file contents; use `read` when the content matters. @@ -49,8 +49,9 @@ Document context may list opened files with `file_id`, path, version, size, and Create: ```json { - "tool_name": "document_artifact:create", + "tool_name": "document_artifact", "tool_args": { + "action": "create", "kind": "document", "title": "Project Brief", "format": "md", @@ -64,8 +65,9 @@ For spreadsheets, `content` can be CSV, TSV, or a Markdown table; the tool write Read: ```json { - "tool_name": "document_artifact:read", + "tool_name": "document_artifact", "tool_args": { + "action": "read", "file_id": "abc123" } } @@ -74,8 +76,9 @@ Read: Edit text in a Markdown, ODT, DOCX, ODP, or PPTX file: ```json { - "tool_name": "document_artifact:edit", + "tool_name": "document_artifact", "tool_args": { + "action": "edit", "file_id": "abc123", "operation": "replace_text", "find": "old phrase", @@ -87,8 +90,9 @@ Edit text in a Markdown, ODT, DOCX, ODP, or PPTX file: Set spreadsheet cells: ```json { - "tool_name": "document_artifact:edit", + "tool_name": "document_artifact", "tool_args": { + "action": "edit", "path": "/a0/usr/workdir/documents/Budget.ods", "operation": "set_cells", "cells": { @@ -102,8 +106,9 @@ Set spreadsheet cells: Create an embedded spreadsheet chart: ```json { - "tool_name": "document_artifact:edit", + "tool_name": "document_artifact", "tool_args": { + "action": "edit", "file_id": "abc123", "operation": "create_chart", "sheet": "Sheet1", diff --git a/plugins/_office/skills/impress-presentations/SKILL.md b/plugins/_office/skills/impress-presentations/SKILL.md index 7608347fe..457e5dc5e 100644 --- a/plugins/_office/skills/impress-presentations/SKILL.md +++ b/plugins/_office/skills/impress-presentations/SKILL.md @@ -33,8 +33,9 @@ Create: ```json { - "tool_name": "document_artifact:create", + "tool_name": "document_artifact", "tool_args": { + "action": "create", "kind": "presentation", "title": "Roadmap", "format": "odp", @@ -47,8 +48,9 @@ Edit slides: ```json { - "tool_name": "document_artifact:edit", + "tool_name": "document_artifact", "tool_args": { + "action": "edit", "file_id": "abc123", "operation": "set_slides", "slides": [ diff --git a/plugins/_office/skills/markdown-documents/SKILL.md b/plugins/_office/skills/markdown-documents/SKILL.md index f6cdbdbc4..1b07d9c04 100644 --- a/plugins/_office/skills/markdown-documents/SKILL.md +++ b/plugins/_office/skills/markdown-documents/SKILL.md @@ -25,16 +25,17 @@ The document editor is user-owned UI. Create or update the saved Markdown artifa ## Workflow 1. Decide whether a saved editable artifact is useful. Create one for substantial, reusable, or collaborative writing; do not create one for tiny one-shot edits or answers that can be completed cleanly in chat. -2. Create Markdown with `document_artifact:create` using `kind: "document"` and `format: "md"`. -3. For edits to an existing Markdown artifact, read first when content matters, then use `document_artifact:edit`. +2. Create Markdown with `document_artifact` using `action: "create"`, `kind: "document"`, and `format: "md"`. +3. For edits to an existing Markdown artifact, read first when content matters, then use the `edit` action. 4. Report the saved file path briefly. Do not say it was opened unless the user explicitly opened it. Minimal create: ```json { - "tool_name": "document_artifact:create", + "tool_name": "document_artifact", "tool_args": { + "action": "create", "kind": "document", "title": "Project Brief", "format": "md", diff --git a/plugins/_office/skills/writer-documents/SKILL.md b/plugins/_office/skills/writer-documents/SKILL.md index b2427e527..e30f9b85a 100644 --- a/plugins/_office/skills/writer-documents/SKILL.md +++ b/plugins/_office/skills/writer-documents/SKILL.md @@ -31,8 +31,9 @@ Create: ```json { - "tool_name": "document_artifact:create", + "tool_name": "document_artifact", "tool_args": { + "action": "create", "kind": "document", "title": "Board Memo", "format": "odt", @@ -43,8 +44,8 @@ Create: Edit: -1. Use `document_artifact:read` with `file_id` or `path` before content-sensitive edits. -2. Use `document_artifact:edit` for deterministic saved changes: `set_text`, `append_text`, `prepend_text`, `replace_text`, or `delete_text`. +1. Use the `read` action with `file_id` or `path` before content-sensitive edits. +2. Use the `edit` action for deterministic saved changes: `set_text`, `append_text`, `prepend_text`, `replace_text`, or `delete_text`. 3. Use the Desktop only when the user asks to see Writer or when layout cannot be handled reliably through structured edits. Practical rules: diff --git a/plugins/_text_editor/prompts/agent.system.tool.text_editor.md b/plugins/_text_editor/prompts/agent.system.tool.text_editor.md index a30868528..b92488a16 100644 --- a/plugins/_text_editor/prompts/agent.system.tool.text_editor.md +++ b/plugins/_text_editor/prompts/agent.system.tool.text_editor.md @@ -2,19 +2,23 @@ file read write patch with numbered lines not code execution rejects binary terminal (grep find sed) advance search/replace +actions: read write patch +common args: action path -#### text_editor:read +#### read read file with numbered lines args path line_from line_to (inclusive optional) -no range → first {{default_line_count}} lines +no range -> first {{default_line_count}} lines long lines cropped output may trim by token limit read surrounding context before patching usage: ~~~json { - ... - "tool_name": "text_editor:read", + "thoughts": ["I need file context before editing."], + "headline": "Reading file", + "tool_name": "text_editor", "tool_args": { + "action": "read", "path": "/path/file.py", "line_from": 1, "line_to": 50 @@ -22,22 +26,24 @@ usage: } ~~~ -#### text_editor:write +#### write create/overwrite file auto-creates dirs args path content usage: ~~~json { - ... - "tool_name": "text_editor:write", + "thoughts": ["I need to create or replace the file content."], + "headline": "Writing file", + "tool_name": "text_editor", "tool_args": { + "action": "write", "path": "/path/file.py", "content": "import os\nprint('hello')\n" } } ~~~ -#### text_editor:patch +#### patch edit existing file. prefer patch_text; use edits only right after read for tiny line edits args path plus exactly one of: patch_text string OR edits [{from to content}] patch_text uses current file content, no prior read required @@ -54,9 +60,11 @@ ensure valid syntax in content (all braces brackets tags closed) usage: ~~~json { - ... - "tool_name": "text_editor:patch", + "thoughts": ["A context patch is safer than line-number surgery here."], + "headline": "Patching file", + "tool_name": "text_editor", "tool_args": { + "action": "patch", "path": "/path/file.py", "patch_text": "*** Begin Patch\n*** Update File: file.py\n@@ def run():\n+ print('ready')\n*** End Patch" } diff --git a/plugins/_text_editor/tools/text_editor.py b/plugins/_text_editor/tools/text_editor.py index b7108d445..a943f2a04 100644 --- a/plugins/_text_editor/tools/text_editor.py +++ b/plugins/_text_editor/tools/text_editor.py @@ -28,14 +28,18 @@ _MTIME_KEY = LOCAL_FRESHNESS_KEY class TextEditor(Tool): async def execute(self, **kwargs): - if self.method == "read": + action = _current_action(self, kwargs) + if action == "read": return await self._read(**kwargs) - elif self.method == "write": + elif action == "write": return await self._write(**kwargs) - elif self.method == "patch": + elif action == "patch": return await self._patch(**kwargs) return Response( - message=f"unknown method '{self.name}:{self.method}'", + message=( + f"unknown action '{action or self.method or ''}'. " + "Supported actions: read, write, patch." + ), break_loop=False, ) @@ -379,3 +383,16 @@ def _get_config(agent) -> dict: "default_line_count": int(config.get("default_line_count", 100)), "max_total_read_tokens": int(config.get("max_total_read_tokens", 4000)), } + + +def _current_action(tool: TextEditor, kwargs: dict) -> str: + return ( + str( + kwargs.get("action") + or tool.args.get("action") + or "" + ) + .strip() + .lower() + .replace("-", "_") + ) diff --git a/prompts/agent.system.main.communication.md b/prompts/agent.system.main.communication.md index 4c4221d7f..fde41cdbb 100644 --- a/prompts/agent.system.main.communication.md +++ b/prompts/agent.system.main.communication.md @@ -9,6 +9,8 @@ - headline: short headline summary of the response - tool_name: use tool name - tool_args: key value pairs tool arguments +- `tool_name` must be one listed tool name, never an action name such as `read`, `write`, `terminal`, or `multi` +- To do two operations, call one tool now, then call the next tool after the first result - No text output before or after the JSON object diff --git a/prompts/agent.system.skills.md b/prompts/agent.system.skills.md index 6e74baed3..1fadb48f4 100644 --- a/prompts/agent.system.skills.md +++ b/prompts/agent.system.skills.md @@ -1,4 +1,8 @@ ## skills -use `skills_tool:search` when the user's wording sounds like a task, trigger phrase, or keyword match for a skill -use `skills_tool:list` when you need a broader catalog view -use `skills_tool:load` before following a skill +use `skills_tool` action `search` when the user's wording sounds like a task, trigger phrase, or keyword match for a skill +use `skills_tool` action `list` when you need a broader catalog view +use `skills_tool` action `load` before following a skill +loaded skills may document beta/specialized tools not in the always-on tool list; use them only after loading the skill + +available: +{{skills}} diff --git a/prompts/agent.system.skills.relevant.md b/prompts/agent.system.skills.relevant.md index 05f73b8b3..c6105fd2c 100644 --- a/prompts/agent.system.skills.relevant.md +++ b/prompts/agent.system.skills.relevant.md @@ -1,5 +1,6 @@ # relevant skills - the following skills matched the user's current request by lexical search, including trigger phrases -- use `skills_tool:load` to load one before following it +- if the current request depends on one of these skills, use `skills_tool` with action `load` before following it +- remote tool stubs are self-contained for routine use; load the matching remote skill for complex remote workflows {{skills}} diff --git a/prompts/agent.system.tool.notify_user.md b/prompts/agent.system.tool.notify_user.md index 2714a7a60..cb2193635 100644 --- a/prompts/agent.system.tool.notify_user.md +++ b/prompts/agent.system.tool.notify_user.md @@ -2,4 +2,5 @@ send an out-of-band notification without ending the current task args: `message`, optional `title`, `detail`, `type`, `priority`, `timeout` types: `info`, `success`, `warning`, `error`, `progress` +priority values: `20` high urgency, `10` normal urgency; omit for high use for progress or alerts, not as the final answer diff --git a/prompts/agent.system.tool.scheduler.md b/prompts/agent.system.tool.scheduler.md index 001f1d4c0..fff64e762 100644 --- a/prompts/agent.system.tool.scheduler.md +++ b/prompts/agent.system.tool.scheduler.md @@ -1,27 +1,13 @@ ### scheduler -manage saved tasks and schedules -rules: -- before `scheduler:create_*` or `scheduler:run_task`, inspect existing tasks with `scheduler:find_task_by_name` or `scheduler:list_tasks` -- do not manually run a task just because it is scheduled or planned unless user asks to run now -- do not create recursive task prompts that schedule more tasks -methods: -- `scheduler:list_tasks`: optional `state[]`, `type[]`, `next_run_within`, `next_run_after` -- `scheduler:find_task_by_name`: `name` -- `scheduler:show_task`: `uuid` -- `scheduler:run_task`: `uuid`, optional `context` -- `scheduler:delete_task`: `uuid` -- `scheduler:create_scheduled_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, `schedule{minute,hour,day,month,weekday}`, optional `dedicated_context` -- `scheduler:create_adhoc_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, optional `dedicated_context` -- `scheduler:create_planned_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, `plan[]` iso datetimes like `2025-04-29T18:25:00`, optional `dedicated_context` -- `scheduler:wait_for_task`: `uuid`; works for dedicated-context tasks -example: -~~~json -{ - "thoughts": ["I should check for an existing task before I create or run anything."], - "headline": "Looking up scheduled task", - "tool_name": "scheduler:find_task_by_name", - "tool_args": { - "name": "daily backup" - } -} -~~~ +Manage saved tasks and schedules. For complex task work, load skill `scheduler-tasks`. + +Actions: `list_tasks`, `find_task_by_name`, `show_task`, `run_task`, `update_task`, `delete_task`, `create_scheduled_task`, `create_adhoc_task`, `create_planned_task`, `wait_for_task`. + +Common args: `action`, `name`, `uuid`, `system_prompt`, `prompt`, `attachments`, `schedule`, `timezone`, `plan`, `dedicated_context`. + +Rules: +- Before `create_*`, `update_task`, `delete_task`, or `run_task`, inspect existing tasks with `find_task_by_name` or `list_tasks`. +- Do not run scheduled/planned tasks unless the user asks to run now. +- Do not create recursive task prompts that schedule more tasks. +- New tasks use a dedicated context unless `dedicated_context` is `false`. +- Use IANA timezones like `Europe/Rome`; omit timezone to use the current user timezone. diff --git a/prompts/agent.system.tool.skills.md b/prompts/agent.system.tool.skills.md index 19839d912..cd449136b 100644 --- a/prompts/agent.system.tool.skills.md +++ b/prompts/agent.system.tool.skills.md @@ -1,9 +1,12 @@ ### skills_tool use skills only when relevant +actions: list search load read_file +common args: action skill_name query file_path workflow: -- `skills_tool:search`: find candidate skills by keywords or trigger phrases from the current task -- `skills_tool:list`: discover available skills -- `skills_tool:load`: load one skill by `skill_name` +- action `search`: find candidate skills by keywords or trigger phrases from the current task +- action `list`: discover available skills +- action `load`: load one skill by `skill_name` +- action `read_file`: open one file inside a loaded skill directory after loading a skill, follow its instructions and use referenced files or scripts with other tools reload a skill if its instructions are no longer in context example: @@ -11,8 +14,9 @@ example: { "thoughts": ["The user's request sounds like a skill trigger phrase, so I should search first."], "headline": "Searching for relevant skill", - "tool_name": "skills_tool:search", + "tool_name": "skills_tool", "tool_args": { + "action": "search", "query": "set up a0 cli connector" } } diff --git a/prompts/agent.system.tools.md b/prompts/agent.system.tools.md index 3f18e527e..2a083963c 100644 --- a/prompts/agent.system.tools.md +++ b/prompts/agent.system.tools.md @@ -1,3 +1,4 @@ ## available tools use ONLY the tools listed below. match names exactly. do NOT invent tool names. +Action names are not tool names. There is no top-level `multi` or batch tool; call one listed tool at a time. If a tool has an action named `multi`, keep that action inside `tool_args.action` for that specific tool. {{tools}} diff --git a/skills/computer-use-remote/SKILL.md b/skills/computer-use-remote/SKILL.md index fc19e4337..d8c860e3e 100644 --- a/skills/computer-use-remote/SKILL.md +++ b/skills/computer-use-remote/SKILL.md @@ -1,6 +1,6 @@ --- name: computer-use-remote -description: Detailed operating guide for using computer_use_remote on the connected local machine. Load this skill before using computer_use_remote for desktop control, screenshots, menus, browser chrome, or other native UI tasks. +description: Beta local desktop control through a connected A0 CLI host; use for screenshots, menus, browser chrome, and native UI tasks. version: 1.1.0 author: Agent Zero Team tags: ["computer-use", "desktop", "local-ui", "screenshots", "native-ui"] @@ -18,6 +18,8 @@ allowed_tools: # Computer Use Remote +This skill unlocks the beta `computer_use_remote` tool for connected local desktop control through A0 CLI. + ## When to Use Load this skill before using `computer_use_remote` for local desktop and native UI tasks on the connected machine. @@ -26,6 +28,31 @@ If the task is browser-only and the user is flexible, prefer direct browser tool If the task needs shell execution on the CLI host, load `code-execution-remote` separately rather than treating desktop control and shell execution as one affordance. +## Tool Contract + +Use: + +```json +{ + "tool_name": "computer_use_remote", + "tool_args": { + "action": "start_session" + } +} +``` + +Arguments: + +- `action`: `start_session`, `status`, `capture`, `move`, `click`, `scroll`, `key`, `type`, `stop_session` +- `session_id`: optional after `start_session` +- `move`: `x`, `y` normalized to `[0,1]` +- `click`: optional `x`, `y`, optional `button` (`left`, `right`, `middle`), optional `count` +- `scroll`: `dx`, `dy` +- `key`: `key` or `keys` +- `type`: `text`, optional `submit` boolean + +Availability, backend support, and trust mode are checked when the tool runs. If no CLI is connected or local computer use is disabled, tell the user what to enable instead of using the server environment. + ## Core Loop 1. Call `start_session` first. diff --git a/skills/scheduler-tasks/SKILL.md b/skills/scheduler-tasks/SKILL.md new file mode 100644 index 000000000..b05db681c --- /dev/null +++ b/skills/scheduler-tasks/SKILL.md @@ -0,0 +1,53 @@ +--- +name: scheduler-tasks +description: Use for complex Agent Zero scheduler work, including creating, updating, deleting, running, waiting for, timezone-correcting, or auditing scheduled, planned, and adhoc tasks. +--- + +# Scheduler Tasks + +Use the `scheduler` tool to manage saved tasks. Always inspect existing tasks before creating, updating, deleting, or running one. + +## Actions + +- `list_tasks`: optional `state[]`, `type[]`, `next_run_within`, `next_run_after` +- `find_task_by_name`: `name` +- `show_task`: `uuid` +- `run_task`: `uuid`, optional `context` +- `update_task`: `uuid`, optional `name`, `system_prompt`, `prompt`, `attachments[]`, `schedule`, `timezone`, `plan[]`, `state`, `dedicated_context` +- `delete_task`: `uuid` +- `create_scheduled_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, `schedule`, `timezone`, `dedicated_context` +- `create_adhoc_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, `dedicated_context` +- `create_planned_task`: `name`, `system_prompt`, `prompt`, optional `attachments[]`, `plan[]`, `dedicated_context` +- `wait_for_task`: `uuid` + +## Schedule Fields + +Schedules use cron-like fields: + +- `minute` +- `hour` +- `day` +- `month` +- `weekday` +- `timezone` + +Use IANA timezones such as `Europe/Rome`. Omit timezone to use the current user timezone. Planned task datetimes should be ISO strings such as `2026-05-09T18:25:00`. + +## Safety + +- Do not create recursive task prompts that schedule more tasks. +- Do not run a task just because it is scheduled; run only if the user asks. +- Created tasks use a dedicated context unless `dedicated_context` is explicitly `false`. +- For destructive operations, identify the task by UUID after lookup. + +## Example + +```json +{ + "tool_name": "scheduler", + "tool_args": { + "action": "find_task_by_name", + "name": "daily backup" + } +} +``` diff --git a/tests/test_a0_connector_prompt_gating.py b/tests/test_a0_connector_prompt_gating.py index 790ecde42..a1e53f1e1 100644 --- a/tests/test_a0_connector_prompt_gating.py +++ b/tests/test_a0_connector_prompt_gating.py @@ -1,5 +1,6 @@ import importlib.util import sys +import time import uuid from pathlib import Path @@ -113,7 +114,7 @@ def _subscribe( return sid -def test_remote_tool_stubs_absent_without_subscribed_cli(): +def test_legacy_dynamic_remote_tool_gate_is_noop(): prompt = _apply_gate(_context_id()) assert "text_editor_remote tool" not in prompt @@ -121,81 +122,156 @@ def test_remote_tool_stubs_absent_without_subscribed_cli(): assert "computer_use_remote tool" not in prompt -def test_file_only_cli_adds_text_editor_stub(): +def test_remote_file_and_exec_tools_are_standard_tool_prompts_independent_from_context(): + text_stub = (PROMPT_ROOT / "agent.system.tool.text_editor_remote.md").read_text(encoding="utf-8") + exec_stub = (PROMPT_ROOT / "agent.system.tool.code_execution_remote.md").read_text(encoding="utf-8") + + assert '"tool_name": "text_editor_remote"' in text_stub + assert '"tool_name": "code_execution_remote"' in exec_stub + assert "Availability and permissions are checked when the tool runs" in text_stub + assert "Availability and permissions are checked when the tool runs" in exec_stub + + +def test_beta_computer_use_remote_is_skill_only_not_standard_tool_prompt(): + skill = PROJECT_ROOT / "skills" / "computer-use-remote" / "SKILL.md" + + assert not (PROMPT_ROOT / "agent.system.tool.computer_use_remote.md").exists() + assert '"tool_name": "computer_use_remote"' in skill.read_text(encoding="utf-8") + + +def test_old_connector_prompt_files_removed(): + assert not (PROMPT_ROOT / "agent.connector_tool.text_editor_remote.md").exists() + assert not (PROMPT_ROOT / "agent.connector_tool.code_execution_remote.md").exists() + assert not (PROMPT_ROOT / "agent.connector_tool.computer_use_remote.md").exists() + + +def test_remote_tool_selection_prefers_context_cli_then_global_cli(): context_id = _context_id() - sid = _subscribe( - context_id, - remote_files={"enabled": True, "write_enabled": True}, + sid_context = _sid() + sid_global = _sid() + for sid in (sid_context, sid_global): + ws_runtime.register_sid(sid) + ws_runtime.store_sid_remote_exec_metadata(sid, {"enabled": True}) + ws_runtime.store_sid_remote_file_metadata( + sid, + {"enabled": True, "write_enabled": True, "mode": "read_write"}, + ) + ws_runtime.subscribe_sid_to_context(sid_context, context_id) + try: + assert ws_runtime.remote_tool_sids_for_context(context_id) == [ + sid_context, + sid_global, + ] + assert ws_runtime.select_remote_exec_target_sid(context_id) == sid_context + assert ( + ws_runtime.select_remote_exec_target_sid(context_id, require_writes=True) + == sid_context + ) + assert ws_runtime.select_remote_file_target_sid(context_id) == sid_context + finally: + ws_runtime.unregister_sid(sid_context) + ws_runtime.unregister_sid(sid_global) + + +def test_remote_tool_selection_falls_back_to_global_cli(): + context_id = _context_id() + sid = _sid() + ws_runtime.register_sid(sid) + ws_runtime.store_sid_remote_exec_metadata(sid, {"enabled": True}) + ws_runtime.store_sid_remote_file_metadata( + sid, + {"enabled": True, "write_enabled": True, "mode": "read_write"}, ) try: - prompt = _apply_gate(context_id) + assert ws_runtime.select_remote_exec_target_sid(context_id) == sid + assert ( + ws_runtime.select_remote_exec_target_sid(context_id, require_writes=True) + == sid + ) + assert ws_runtime.select_remote_file_target_sid(context_id) == sid finally: ws_runtime.unregister_sid(sid) - assert "text_editor_remote tool" in prompt - assert "Current access mode: `Read&Write`" in prompt - assert "code_execution_remote tool" not in prompt - assert "computer_use_remote tool" not in prompt - -def test_exec_enabled_cli_adds_execution_stub(): +def test_latest_remote_tree_falls_back_to_global_cli_snapshot(): context_id = _context_id() - sid = _subscribe( - context_id, - remote_exec={"enabled": True}, - ) - try: - prompt = _apply_gate(context_id) - finally: - ws_runtime.unregister_sid(sid) - - assert "code_execution_remote tool" in prompt - assert "text_editor_remote tool" not in prompt - assert "computer_use_remote tool" not in prompt - - -def test_read_only_mode_marks_mutating_operations_disabled(): - context_id = _context_id() - sid = _subscribe( - context_id, - remote_files={"enabled": True, "write_enabled": False, "mode": "read_only"}, - remote_exec={"enabled": True}, - ) - try: - prompt = _apply_gate(context_id) - finally: - ws_runtime.unregister_sid(sid) - - assert "text_editor_remote tool" in prompt - assert "code_execution_remote tool" in prompt - assert "Current access mode: `Read only`" in prompt - assert "Writes and patches are disabled" in prompt - assert "Mutating runtimes are disabled" in prompt - - -def test_computer_use_enabled_cli_adds_computer_stub(): - context_id = _context_id() - sid = _subscribe( - context_id, - computer_use={ - "supported": True, - "enabled": True, - "trust_mode": "ask", - "backend_id": "local", - "backend_family": "desktop", - "features": ["screenshots", "keyboard"], + sid = _sid() + ws_runtime.register_sid(sid) + ws_runtime.store_remote_tree_snapshot( + sid, + { + "root_path": "/home/example", + "tree": "README.md", + "generated_at": "2026-05-09T12:00:00Z", }, ) try: - prompt = _apply_gate(context_id) + snapshot = ws_runtime.latest_remote_tree_for_context( + context_id, + max_age_seconds=90, + ) finally: ws_runtime.unregister_sid(sid) - assert "computer_use_remote tool" in prompt - assert "Backend: `local/desktop`" in prompt - assert "Features: `screenshots, keyboard`" in prompt - assert "text_editor_remote tool" not in prompt - assert "code_execution_remote tool" not in prompt + assert snapshot is not None + assert snapshot["sid"] == sid + assert snapshot["tree"] == "README.md" + + +def test_latest_remote_tree_prefers_context_cli_snapshot(): + context_id = _context_id() + sid_context = _sid() + sid_global = _sid() + now = time.time() + for sid in (sid_context, sid_global): + ws_runtime.register_sid(sid) + ws_runtime.subscribe_sid_to_context(sid_context, context_id) + ws_runtime.store_remote_tree_snapshot( + sid_context, + { + "root_path": "/context", + "tree": "context.txt", + "generated_at": "2026-05-09T12:00:00Z", + }, + ) + ws_runtime.store_remote_tree_snapshot( + sid_global, + { + "root_path": "/global", + "tree": "global.txt", + "generated_at": "2026-05-09T12:00:01Z", + }, + ) + try: + # Make the global snapshot newer; context affinity should still win. + ws_runtime._remote_tree_snapshots[sid_global] = ws_runtime.RemoteTreeSnapshot( + sid=sid_global, + payload=ws_runtime._remote_tree_snapshots[sid_global].payload, + updated_at=now + 5, + ) + snapshot = ws_runtime.latest_remote_tree_for_context( + context_id, + max_age_seconds=90, + ) + finally: + ws_runtime.unregister_sid(sid_context) + ws_runtime.unregister_sid(sid_global) + + assert snapshot is not None + assert snapshot["sid"] == sid_context + assert snapshot["tree"] == "context.txt" + + +def test_remote_exec_mutating_runtime_requires_explicit_write_access(): + context_id = _context_id() + sid = _sid() + ws_runtime.register_sid(sid) + ws_runtime.store_sid_remote_exec_metadata(sid, {"enabled": True}) + try: + assert ws_runtime.select_remote_exec_target_sid(context_id) == sid + assert ws_runtime.select_remote_exec_target_sid(context_id, require_writes=True) is None + finally: + ws_runtime.unregister_sid(sid) def test_remote_affordance_skills_parse(): @@ -230,17 +306,29 @@ def test_remote_affordance_skills_parse(): assert not legacy_connector_skill.exists() assert text_editor_skill["name"] == "text-editor-remote" assert text_editor_skill["allowed_tools"] == ["text_editor_remote"] + assert "connected local files" in text_editor_skill["trigger_patterns"] + assert "not docker" in code_execution_skill["trigger_patterns"] + assert "connected local terminal" in code_execution_skill["trigger_patterns"] assert code_execution_skill["name"] == "code-execution-remote" assert code_execution_skill["allowed_tools"] == ["code_execution_remote"] assert computer_skill["name"] == "computer-use-remote" assert computer_skill["allowed_tools"] == ["computer_use_remote"] -def test_remote_tool_stubs_point_to_per_tool_skills(): - text_stub = (PROMPT_ROOT / "agent.connector_tool.text_editor_remote.md").read_text(encoding="utf-8") - exec_stub = (PROMPT_ROOT / "agent.connector_tool.code_execution_remote.md").read_text(encoding="utf-8") +def test_remote_tool_stubs_are_self_contained_and_reference_per_tool_skills(): + text_stub = (PROMPT_ROOT / "agent.system.tool.text_editor_remote.md").read_text(encoding="utf-8") + exec_stub = (PROMPT_ROOT / "agent.system.tool.code_execution_remote.md").read_text(encoding="utf-8") + computer_skill = (PROJECT_ROOT / "skills" / "computer-use-remote" / "SKILL.md").read_text(encoding="utf-8") - assert "Load `text-editor-remote`" in text_stub - assert "Load `code-execution-remote`" in exec_stub + assert "optionally load skill `text-editor-remote`" in text_stub + assert "optionally load skill `code-execution-remote`" in exec_stub + assert '"tool_name": "text_editor_remote"' in text_stub + assert '"tool_name": "code_execution_remote"' in exec_stub + assert '"tool_name": "computer_use_remote"' in computer_skill + assert "Availability, backend support, and trust mode are checked when the tool runs" in computer_skill + assert "not `code_execution_tool`" in exec_stub + assert "not to" in exec_stub + assert "Docker/server/container execution" in exec_stub assert "a0-cli-remote-workflows" not in text_stub assert "a0-cli-remote-workflows" not in exec_stub + assert "a0-cli-remote-workflows" not in computer_skill diff --git a/tests/test_default_prompt_budget.py b/tests/test_default_prompt_budget.py index 4d9ff1cb6..9da4ece4a 100644 --- a/tests/test_default_prompt_budget.py +++ b/tests/test_default_prompt_budget.py @@ -51,19 +51,21 @@ async def test_default_agent0_prompt_budget_and_guardrails(): assert "tool_args` must stay a json object" in system_text assert '"tool_name": "call_subordinate"' in system_text assert '"reset": true' in system_text - assert '"tool_name": "text_editor:read"' in system_text + assert '"tool_name": "text_editor"' in system_text + assert '"action": "read"' in system_text assert '"tool_name": "code_execution_tool"' in system_text assert '"tool_name": "memory_load"' in system_text assert "informative but tight" in system_text - assert "# code_execution_remote tool" not in system_text - assert "# text_editor_remote tool" not in system_text - assert "# computer_use_remote tool" not in system_text + assert '"tool_name": "code_execution_remote"' in system_text + assert '"tool_name": "text_editor_remote"' in system_text + assert '"tool_name": "computer_use_remote"' not in system_text + assert "computer-use-remote" in system_text def test_a0_small_profile_removed_and_prompt_text_generic(): assert not (PROJECT_ROOT / "agents" / "a0_small").exists() assert not (PROJECT_ROOT / "knowledge" / "main" / "a0_small_tool_call_examples.md").exists() - assert (PROJECT_ROOT / "knowledge" / "main" / "tool_call_reference_examples.md").exists() + assert not (PROJECT_ROOT / "knowledge" / "main" / "tool_call_reference_examples.md").exists() for path in _iter_prompt_files(): assert "a0_small" not in path.read_text(encoding="utf-8") diff --git a/tests/test_office_document_store.py b/tests/test_office_document_store.py index d6076224b..f44ee49eb 100644 --- a/tests/test_office_document_store.py +++ b/tests/test_office_document_store.py @@ -312,7 +312,7 @@ def test_odf_is_advertised_and_docx_remains_explicit_compatibility(office_state) assert "formats: md odt ods odp docx xlsx pptx" in prompt assert "ODF is first-class for LibreOffice" in prompt assert "DOCX/XLSX/PPTX are compatibility formats" in prompt - assert "`method` is accepted as an alias for action" in prompt + assert "`method` is accepted as an alias for action" not in prompt assert "they do not open a surface automatically" in prompt assert "explicit Download, Open Document, or Desktop edit message actions" in prompt doc = document_store.create_document("document", "Use ODT", "odt", "") diff --git a/tests/test_task_scheduler_timezone.py b/tests/test_task_scheduler_timezone.py new file mode 100644 index 000000000..6cde12dd0 --- /dev/null +++ b/tests/test_task_scheduler_timezone.py @@ -0,0 +1,65 @@ +from datetime import datetime, timezone +from pathlib import Path +import sys +from types import SimpleNamespace + +PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +from helpers import task_scheduler +from helpers.task_scheduler import ScheduledTask, TaskSchedule + + +class FixedDateTime(datetime): + @classmethod + def now(cls, tz=None): + value = cls(2026, 5, 9, 10, 0, tzinfo=timezone.utc) + if tz is None: + return value.replace(tzinfo=None) + return value.astimezone(tz) + + +def test_scheduled_task_next_run_uses_schedule_timezone(monkeypatch): + monkeypatch.setattr(task_scheduler, "datetime", FixedDateTime) + task = ScheduledTask.create( + name="rome morning", + system_prompt="", + prompt="remind me", + schedule=TaskSchedule( + minute="30", + hour="9", + day="10", + month="5", + weekday="*", + timezone="Europe/Rome", + ), + timezone="Europe/Rome", + ) + + assert task.get_next_run() == datetime(2026, 5, 10, 7, 30, tzinfo=timezone.utc) + + +def test_scheduled_task_normalizes_legacy_local_timezone(monkeypatch): + monkeypatch.setattr(task_scheduler, "datetime", FixedDateTime) + monkeypatch.setattr( + task_scheduler, + "Localization", + SimpleNamespace(get=lambda: SimpleNamespace(get_timezone=lambda: "Europe/Rome")), + ) + task = ScheduledTask.create( + name="legacy local", + system_prompt="", + prompt="remind me", + schedule=TaskSchedule( + minute="30", + hour="9", + day="10", + month="5", + weekday="*", + timezone="local", + ), + ) + + assert task.schedule.timezone == "Europe/Rome" + assert task.get_next_run() == datetime(2026, 5, 10, 7, 30, tzinfo=timezone.utc) diff --git a/tests/test_text_editor_context_patch.py b/tests/test_text_editor_context_patch.py index 03a47c226..a2f015b56 100644 --- a/tests/test_text_editor_context_patch.py +++ b/tests/test_text_editor_context_patch.py @@ -327,6 +327,11 @@ class _FakeAgent: self.data = {} def read_prompt(self, name: str, **kwargs) -> str: + if name.endswith("read_ok.md"): + return ( + f"{kwargs['path']} read {kwargs['total_lines']} lines\n" + f">>>\n{kwargs['content']}\n<<<" + ) if name.endswith("patch_ok.md"): return ( f"{kwargs['path']} patched {kwargs['edit_count']} edits applied " @@ -341,6 +346,7 @@ class _FakeAgent: def _load_text_editor_tool(monkeypatch: pytest.MonkeyPatch): calls: list[tuple[str, dict | None]] = [] + import helpers tool_stub = types.ModuleType("helpers.tool") tool_stub.Tool = _FakeTool @@ -370,6 +376,9 @@ def _load_text_editor_tool(monkeypatch: pytest.MonkeyPatch): monkeypatch.setitem(sys.modules, "helpers.extension", extension_stub) monkeypatch.setitem(sys.modules, "helpers.plugins", plugins_stub) monkeypatch.setitem(sys.modules, "helpers.runtime", runtime_stub) + monkeypatch.setattr(helpers, "extension", extension_stub, raising=False) + monkeypatch.setattr(helpers, "plugins", plugins_stub, raising=False) + monkeypatch.setattr(helpers, "runtime", runtime_stub, raising=False) sys.modules.pop("plugins._text_editor.tools.text_editor", None) module = importlib.import_module("plugins._text_editor.tools.text_editor") return module, calls @@ -426,6 +435,27 @@ def test_text_editor_patch_text_does_not_require_prior_read( assert calls[1][1]["mode"] == "patch_text" +def test_text_editor_execute_accepts_action_alias_for_read( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + module, _calls = _load_text_editor_tool(monkeypatch) + target = tmp_path / "sample.txt" + target.write_text("line-1\nline-2\n", encoding="utf-8") + tool = module.TextEditor( + _FakeAgent(), + "text_editor", + None, + {"action": "read", "path": str(target), "line_from": 1, "line_to": 1}, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "read 2 lines" in response.message + assert "line-1" in response.message + + def test_text_editor_patch_text_rejects_simultaneous_edits( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: diff --git a/tests/test_tool_action_contracts.py b/tests/test_tool_action_contracts.py new file mode 100644 index 000000000..96789e692 --- /dev/null +++ b/tests/test_tool_action_contracts.py @@ -0,0 +1,472 @@ +from __future__ import annotations + +import asyncio +import importlib +import sys +import types +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class _FakeResponse: + message: str + break_loop: bool + additional: dict | None = None + + +class _FakeTool: + def __init__( + self, + agent, + name: str, + method: str | None, + args: dict | None, + message: str, + loop_data=None, + **kwargs, + ) -> None: + self.agent = agent + self.name = name + self.method = method + self.args = args or {} + self.message = message + self.loop_data = loop_data + + +class _FakeAgent: + def __init__(self) -> None: + self.data = {} + self.context = types.SimpleNamespace(id="ctx") + + def read_prompt(self, _name: str, **kwargs) -> str: + return f"deleted {kwargs.get('memory_count', 0)}" + + +@dataclass +class _FakeSkill: + name: str + description: str + path: Path + version: str = "" + tags: list[str] | None = None + + +def _install_tool_stub(monkeypatch) -> None: + tool_stub = types.ModuleType("helpers.tool") + tool_stub.Tool = _FakeTool + tool_stub.Response = _FakeResponse + monkeypatch.setitem(sys.modules, "helpers.tool", tool_stub) + + +def _load_skills_tool(monkeypatch, skill_root: Path): + _install_tool_stub(monkeypatch) + + skills_stub = types.ModuleType("helpers.skills") + skills_stub.AGENT_DATA_NAME_LOADED_SKILLS = "loaded_skills" + skills_stub.MAX_ACTIVE_SKILLS = 20 + fake_skill = _FakeSkill( + name="browser-forms", + description="Use for complex browser forms.", + path=skill_root, + tags=[], + ) + skills_stub.list_skills = lambda *args, **kwargs: [fake_skill] + skills_stub.search_skills = lambda *args, **kwargs: [fake_skill] + skills_stub.find_skill = lambda *args, **kwargs: fake_skill + monkeypatch.setitem(sys.modules, "helpers.skills", skills_stub) + + print_style_stub = types.ModuleType("helpers.print_style") + print_style_stub.PrintStyle = lambda *args, **kwargs: types.SimpleNamespace( + print=lambda *a, **k: None + ) + monkeypatch.setitem(sys.modules, "helpers.print_style", print_style_stub) + + sys.modules.pop("tools.skills_tool", None) + return importlib.import_module("tools.skills_tool") + + +def test_skills_tool_accepts_action_alias_for_search(monkeypatch, tmp_path: Path): + module = _load_skills_tool(monkeypatch, tmp_path) + tool = module.SkillsTool( + _FakeAgent(), + "skills_tool", + None, + {"action": "search", "query": "browser forms"}, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "browser-forms" in response.message + + +def test_skills_tool_read_file_action_reads_inside_skill_dir( + monkeypatch, tmp_path: Path +): + skill_root = tmp_path / "browser-forms" + skill_root.mkdir() + (skill_root / "notes.md").write_text("Use labels before typing.\n", encoding="utf-8") + module = _load_skills_tool(monkeypatch, skill_root) + tool = module.SkillsTool( + _FakeAgent(), + "skills_tool", + None, + { + "action": "read_file", + "skill_name": "browser-forms", + "file_path": "notes.md", + }, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "Skill file: browser-forms/notes.md" in response.message + assert "Use labels before typing." in response.message + + +def test_memory_forget_tool_imports_plugin_memory_load(monkeypatch): + _install_tool_stub(monkeypatch) + monkeypatch.syspath_prepend(str(Path.cwd())) + + class FakeDb: + def __init__(self) -> None: + self.calls = [] + + async def delete_documents_by_query(self, **kwargs): + self.calls.append(kwargs) + return ["memory-1"] + + fake_db = FakeDb() + + async def get_memory(_agent): + return fake_db + + memory_stub = types.ModuleType("plugins._memory.helpers.memory") + memory_stub.Memory = types.SimpleNamespace(get=get_memory) + monkeypatch.setitem(sys.modules, "plugins._memory.helpers.memory", memory_stub) + + sys.modules.pop("plugins._memory.tools.memory_load", None) + sys.modules.pop("plugins._memory.tools.memory_forget", None) + module = importlib.import_module("plugins._memory.tools.memory_forget") + tool = module.MemoryForget( + _FakeAgent(), + "memory_forget", + None, + { + "query": "codex memory forget token", + "threshold": 0.99, + "filter": "area=='codex_sweep'", + }, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert response.message == "deleted 1" + assert fake_db.calls == [ + { + "query": "codex memory forget token", + "threshold": 0.99, + "filter": "area=='codex_sweep'", + } + ] + + +def test_behaviour_adjustment_normalizes_duplicate_rules(monkeypatch): + _install_tool_stub(monkeypatch) + monkeypatch.syspath_prepend(str(Path.cwd())) + + agent_stub = types.ModuleType("agent") + agent_stub.Agent = object + monkeypatch.setitem(sys.modules, "agent", agent_stub) + + log_stub = types.ModuleType("helpers.log") + log_stub.LogItem = object + monkeypatch.setitem(sys.modules, "helpers.log", log_stub) + + memory_stub = types.ModuleType("plugins._memory.helpers.memory") + memory_stub.get_memory_subdir_abs = lambda agent: "/tmp" + monkeypatch.setitem(sys.modules, "plugins._memory.helpers.memory", memory_stub) + + sys.modules.pop("plugins._memory.tools.behaviour_adjustment", None) + module = importlib.import_module("plugins._memory.tools.behaviour_adjustment") + + rules = module.normalize_ruleset( + "## Behavioral rules\n" + "* Favor Linux commands.\n" + "* Token rule.## Behavioral rules\n" + "* Favor Linux commands.\n" + "* Token rule." + ) + + assert rules == "## Behavioral rules\n* Favor Linux commands.\n* Token rule.\n" + + +def test_notify_user_prompt_documents_numeric_priority_values(): + prompt = Path("prompts/agent.system.tool.notify_user.md").read_text( + encoding="utf-8" + ) + + assert "priority values: `20` high urgency, `10` normal urgency" in prompt + + +def test_tool_prompts_prevent_top_level_multi_tool(): + tools_prompt = Path("prompts/agent.system.tools.md").read_text(encoding="utf-8") + communication_prompt = Path("prompts/agent.system.main.communication.md").read_text( + encoding="utf-8" + ) + browser_prompt = Path("plugins/_browser/prompts/agent.system.tool.browser.md").read_text( + encoding="utf-8" + ) + + assert "There is no top-level `multi` or batch tool" in tools_prompt + assert "never an action name such as `read`, `write`, `terminal`, or `multi`" in communication_prompt + assert 'Never use `tool_name: "multi"`' in browser_prompt + + +def _load_scheduler_tool(monkeypatch): + _install_tool_stub(monkeypatch) + + scheduler_stub = types.ModuleType("helpers.task_scheduler") + scheduler_stub.TaskScheduler = object + scheduler_stub.ScheduledTask = type("ScheduledTask", (), {}) + scheduler_stub.AdHocTask = type("AdHocTask", (), {}) + scheduler_stub.PlannedTask = type("PlannedTask", (), {}) + scheduler_stub.serialize_task = lambda task: {} + scheduler_stub.parse_datetime = lambda value: None + scheduler_stub.parse_task_plan = lambda value: None + scheduler_stub.serialize_datetime = lambda value: value + scheduler_stub.TaskState = types.SimpleNamespace( + IDLE="idle", + RUNNING="running", + ) + scheduler_stub.TaskSchedule = type("TaskSchedule", (), {}) + scheduler_stub.TaskPlan = type("TaskPlan", (), {}) + monkeypatch.setitem(sys.modules, "helpers.task_scheduler", scheduler_stub) + + agent_stub = types.ModuleType("agent") + agent_stub.AgentContext = types.SimpleNamespace( + get=lambda *args, **kwargs: None, + remove=lambda *args, **kwargs: None, + ) + monkeypatch.setitem(sys.modules, "agent", agent_stub) + + persist_chat_stub = types.ModuleType("helpers.persist_chat") + persist_chat_stub.remove_chat = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "helpers.persist_chat", persist_chat_stub) + + projects_stub = types.ModuleType("helpers.projects") + projects_stub.get_context_project_name = lambda context: "" + projects_stub.load_basic_project_data = lambda project: {} + monkeypatch.setitem(sys.modules, "helpers.projects", projects_stub) + + sys.modules.pop("tools.scheduler", None) + return importlib.import_module("tools.scheduler") + + +def test_scheduler_accepts_action_alias(monkeypatch): + module = _load_scheduler_tool(monkeypatch) + tool = module.SchedulerTool( + _FakeAgent(), + "scheduler", + None, + {"action": "list_tasks"}, + "", + None, + ) + + async def list_tasks(**kwargs): + return module.Response("listed", False) + + tool.list_tasks = list_tasks + + response = asyncio.run(tool.execute(**tool.args)) + + assert response.message == "listed" + + +def test_scheduler_requires_action_field(monkeypatch): + module = _load_scheduler_tool(monkeypatch) + tool = module.SchedulerTool( + _FakeAgent(), + "scheduler", + "list_tasks", + {}, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "Unknown scheduler action" in response.message + + +def test_scheduler_create_defaults_to_dedicated_context(monkeypatch): + module = _load_scheduler_tool(monkeypatch) + + class FakeTaskSchedule: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def to_crontab(self): + return f"{self.minute} {self.hour} {self.day} {self.month} {self.weekday}" + + class FakeScheduledTask: + @classmethod + def create(cls, **kwargs): + task = cls() + task.uuid = "task-1" + task.context_id = kwargs.get("context_id") + task.schedule = kwargs.get("schedule") + return task + + class FakeScheduler: + def __init__(self): + self.added = None + + async def add_task(self, task): + self.added = task + + fake_scheduler = FakeScheduler() + module.TaskSchedule = FakeTaskSchedule + module.ScheduledTask = FakeScheduledTask + module.TaskScheduler = types.SimpleNamespace(get=lambda: fake_scheduler) + tool = module.SchedulerTool( + _FakeAgent(), + "scheduler", + None, + { + "action": "create_scheduled_task", + "name": "check stuff", + "prompt": "tell me if anything changed", + "schedule": {"minute": "0", "hour": "9", "day": "*", "month": "*", "weekday": "*"}, + }, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "created" in response.message + assert fake_scheduler.added.context_id is None + + +def test_scheduler_local_timezone_alias_uses_current_user_timezone(monkeypatch): + module = _load_scheduler_tool(monkeypatch) + + class FakeTaskSchedule: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + module.TaskSchedule = FakeTaskSchedule + module.Localization = types.SimpleNamespace( + get=lambda: types.SimpleNamespace(get_timezone=lambda: "Europe/Rome") + ) + + assert module._schedule_timezone({"schedule": {"timezone": "local"}}) == "Europe/Rome" + schedule = module._task_schedule_from_input( + {"minute": "30", "hour": "9", "day": "*", "month": "*", "weekday": "*", "timezone": "current"} + ) + + assert schedule.timezone == "Europe/Rome" + + +def test_scheduler_invalid_timezone_returns_repairable_message(monkeypatch): + module = _load_scheduler_tool(monkeypatch) + tool = module.SchedulerTool( + _FakeAgent(), + "scheduler", + None, + { + "action": "create_scheduled_task", + "name": "bad timezone", + "prompt": "tell me something", + "schedule": { + "minute": "0", + "hour": "9", + "day": "*", + "month": "*", + "weekday": "*", + "timezone": "Mars/Base", + }, + }, + "", + None, + ) + + response = asyncio.run(tool.execute(**tool.args)) + + assert "Invalid timezone: Mars/Base" in response.message + + +def test_scheduler_prompt_includes_update_timezone_and_dedicated_context(): + project_root = Path(__file__).resolve().parents[1] + text = ( + project_root / "prompts/agent.system.tool.scheduler.md" + ).read_text(encoding="utf-8") + + assert "update_task" in text + assert "timezone" in text + assert "IANA" in text + assert "dedicated context" in text + + +def test_skills_prompt_renders_catalog_placeholder(): + project_root = Path(__file__).resolve().parents[1] + text = (project_root / "prompts/agent.system.skills.md").read_text( + encoding="utf-8" + ) + + assert "{{skills}}" in text + + +def test_corrected_tool_prompts_only_teach_action_contract(): + project_root = Path(__file__).resolve().parents[1] + prompt_paths = [ + project_root / "plugins/_text_editor/prompts/agent.system.tool.text_editor.md", + project_root / "prompts/agent.system.tool.skills.md", + project_root / "prompts/agent.system.tool.scheduler.md", + project_root / "plugins/_a0_connector/prompts/agent.system.tool.text_editor_remote.md", + project_root / "plugins/_office/prompts/agent.system.tool.document_artifact.md", + project_root / "plugins/_office/skills/document-artifacts/SKILL.md", + project_root / "plugins/_office/skills/markdown-documents/SKILL.md", + project_root / "plugins/_office/skills/writer-documents/SKILL.md", + project_root / "plugins/_office/skills/calc-spreadsheets/SKILL.md", + project_root / "plugins/_office/skills/impress-presentations/SKILL.md", + ] + forbidden = ( + "text_editor:", + "skills_tool:", + "scheduler:", + "document_artifact:", + "`method`", + "`op`", + "`operation`", + "alias", + ) + + for path in prompt_paths: + text = path.read_text(encoding="utf-8") + assert "action" in text + for token in forbidden: + assert token not in text + + +def test_computer_use_remote_is_skill_gated(): + project_root = Path(__file__).resolve().parents[1] + prompt_path = ( + project_root + / "plugins/_a0_connector/prompts/agent.system.tool.computer_use_remote.md" + ) + skill_text = ( + project_root / "skills/computer-use-remote/SKILL.md" + ).read_text(encoding="utf-8") + + assert not prompt_path.exists() + assert '"tool_name": "computer_use_remote"' in skill_text + assert "Beta local desktop control" in skill_text diff --git a/tests/test_tool_request_normalization.py b/tests/test_tool_request_normalization.py index 1aa54f11b..81472c561 100644 --- a/tests/test_tool_request_normalization.py +++ b/tests/test_tool_request_normalization.py @@ -12,6 +12,13 @@ if str(PROJECT_ROOT) not in sys.path: from helpers.extract_tools import normalize_tool_request +def test_normalize_tool_request_accepts_canonical_keys() -> None: + assert normalize_tool_request({"tool_name": "response", "tool_args": {"text": "ok"}}) == ( + "response", + {"text": "ok"}, + ) + + def test_normalize_tool_request_accepts_fallback_keys() -> None: assert normalize_tool_request({"tool": "response", "args": {"text": "ok"}}) == ( "response", @@ -31,6 +38,30 @@ def test_normalize_tool_request_uses_fallback_when_canonical_args_are_invalid() ) == ("response", {"text": "ok"}) +def test_normalize_tool_request_translates_method_suffix_to_action() -> None: + assert normalize_tool_request( + {"tool_name": "text_editor:read", "tool_args": {"path": "README.md"}} + ) == ("text_editor", {"path": "README.md", "action": "read"}) + + +def test_normalize_tool_request_translates_method_arg_to_action() -> None: + assert normalize_tool_request( + {"tool_name": "scheduler", "tool_args": {"method": "list_tasks"}} + ) == ("scheduler", {"method": "list_tasks", "action": "list_tasks"}) + + +def test_normalize_tool_request_preserves_explicit_action_over_method() -> None: + assert normalize_tool_request( + { + "tool_name": "scheduler:delete_task", + "tool_args": {"method": "list_tasks", "action": "show_task"}, + } + ) == ( + "scheduler", + {"method": "list_tasks", "action": "show_task"}, + ) + + def test_normalize_tool_request_rejects_missing_args() -> None: with pytest.raises(ValueError, match="tool_args"): normalize_tool_request({"tool_name": "response"}) diff --git a/tools/scheduler.py b/tools/scheduler.py index 9181bbe1f..37d09f8d1 100644 --- a/tools/scheduler.py +++ b/tools/scheduler.py @@ -3,41 +3,153 @@ from datetime import datetime import json import random import re +from typing import Any +import pytz from helpers.tool import Tool, Response from helpers.task_scheduler import ( TaskScheduler, ScheduledTask, AdHocTask, PlannedTask, - serialize_task, TaskState, TaskSchedule, TaskPlan, parse_datetime, serialize_datetime + serialize_task, TaskState, TaskSchedule, TaskPlan, parse_datetime, + parse_task_plan, serialize_datetime ) from agent import AgentContext from helpers import persist_chat +from helpers.localization import Localization from helpers.projects import get_context_project_name, load_basic_project_data DEFAULT_WAIT_TIMEOUT = 300 +LOCAL_TIMEZONE_ALIASES = {"local", "user", "default", "current", "current_timezone"} + + +def _current_action(tool: Tool, kwargs: dict) -> str: + return ( + str( + kwargs.get("action") + or tool.args.get("action") + or "" + ) + .strip() + .lower() + .replace("-", "_") + ) + + +def _normalize_timezone(value: Any) -> str | None: + if value is None: + return None + timezone_name = str(value).strip() + if not timezone_name: + return None + if timezone_name.lower() in LOCAL_TIMEZONE_ALIASES: + return Localization.get().get_timezone() + try: + pytz.timezone(timezone_name) + except pytz.exceptions.UnknownTimeZoneError as exc: + raise ValueError( + f"Invalid timezone: {timezone_name}. Use an IANA timezone name such as Europe/Rome, " + "or omit timezone to use the current user timezone." + ) from exc + return timezone_name + + +def _schedule_timezone(kwargs: dict) -> str | None: + schedule = kwargs.get("schedule") + if isinstance(schedule, dict) and schedule.get("timezone"): + return _normalize_timezone(schedule["timezone"]) + if kwargs.get("timezone"): + return _normalize_timezone(kwargs["timezone"]) + return None + + +def _task_schedule_from_input(schedule: Any, timezone: str | None = None) -> TaskSchedule: + if isinstance(schedule, str): + parts = schedule.split() + schedule_data: dict[str, Any] = { + "minute": parts[0] if len(parts) > 0 else "*", + "hour": parts[1] if len(parts) > 1 else "*", + "day": parts[2] if len(parts) > 2 else "*", + "month": parts[3] if len(parts) > 3 else "*", + "weekday": parts[4] if len(parts) > 4 else "*", + } + elif isinstance(schedule, dict): + schedule_data = dict(schedule) + else: + schedule_data = {} + + task_schedule_kwargs = { + "minute": str(schedule_data.get("minute", "*")), + "hour": str(schedule_data.get("hour", "*")), + "day": str(schedule_data.get("day", "*")), + "month": str(schedule_data.get("month", "*")), + "weekday": str(schedule_data.get("weekday", "*")), + } + normalized_timezone = _normalize_timezone(timezone if timezone is not None else schedule_data.get("timezone")) + if normalized_timezone: + task_schedule_kwargs["timezone"] = normalized_timezone + + return TaskSchedule(**task_schedule_kwargs) + + +def _validate_task_schedule(task_schedule: TaskSchedule) -> str: + # Validate cron expression, agent might hallucinate + cron_regex = r"^((((\d+,)+\d+|(\d+(\/|-|#)\d+)|\d+L?|\*(\/\d+)?|L(-\d+)?|\?|[A-Z]{3}(-[A-Z]{3})?) ?){5,7})$" + crontab = task_schedule.to_crontab() + return "" if re.match(cron_regex, crontab) else f"Invalid cron expression: {crontab}" + + +def _task_plan_from_input(plan: Any) -> tuple[TaskPlan | None, str]: + if isinstance(plan, dict): + try: + return parse_task_plan(plan), "" + except Exception as exc: + return None, f"Invalid plan: {exc}" + + if not isinstance(plan, list): + return None, "Plan must be an array of ISO datetimes." + + todo: list[datetime] = [] + for item in plan: + dt = parse_datetime(str(item)) + if dt is None: + return None, f"Invalid datetime: {item}" + todo.append(dt) + + return TaskPlan.create(todo=todo, in_progress=None, done=[]), "" class SchedulerTool(Tool): async def execute(self, **kwargs): - if self.method == "list_tasks": + action = _current_action(self, kwargs) + if action == "list_tasks": return await self.list_tasks(**kwargs) - elif self.method == "find_task_by_name": + elif action == "find_task_by_name": return await self.find_task_by_name(**kwargs) - elif self.method == "show_task": + elif action == "show_task": return await self.show_task(**kwargs) - elif self.method == "run_task": + elif action == "run_task": return await self.run_task(**kwargs) - elif self.method == "delete_task": + elif action == "delete_task": return await self.delete_task(**kwargs) - elif self.method == "create_scheduled_task": + elif action == "update_task": + return await self.update_task(**kwargs) + elif action == "create_scheduled_task": return await self.create_scheduled_task(**kwargs) - elif self.method == "create_adhoc_task": + elif action == "create_adhoc_task": return await self.create_adhoc_task(**kwargs) - elif self.method == "create_planned_task": + elif action == "create_planned_task": return await self.create_planned_task(**kwargs) - elif self.method == "wait_for_task": + elif action == "wait_for_task": return await self.wait_for_task(**kwargs) else: - return Response(message=f"Unknown method '{self.name}:{self.method}'", break_loop=False) + return Response( + message=( + f"Unknown scheduler action '{action or self.method or ''}'. " + "Supported actions: list_tasks, find_task_by_name, show_task, " + "run_task, delete_task, update_task, create_scheduled_task, " + "create_adhoc_task, create_planned_task, wait_for_task." + ), + break_loop=False, + ) def _resolve_project_metadata(self) -> tuple[str | None, str | None]: context = self.agent.context @@ -136,6 +248,59 @@ class SchedulerTool(Tool): else: return Response(message=f"Task failed to delete: {task_uuid}", break_loop=False) + async def update_task(self, **kwargs) -> Response: + task_uuid: str = kwargs.get("uuid", "") + if not task_uuid: + return Response(message="Task UUID is required", break_loop=False) + + scheduler = TaskScheduler.get() + await scheduler.reload() + task: ScheduledTask | AdHocTask | PlannedTask | None = scheduler.get_task_by_uuid(task_uuid) + if not task: + return Response(message=f"Task not found: {task_uuid}", break_loop=False) + + update_params: dict[str, Any] = {} + for field in ("name", "system_prompt", "prompt", "attachments"): + if field in kwargs: + update_params[field] = kwargs[field] + + if "state" in kwargs: + update_params["state"] = TaskState(kwargs.get("state", TaskState.IDLE)) + + if "dedicated_context" in kwargs: + dedicated_context = bool(kwargs.get("dedicated_context")) + update_params["context_id"] = task.uuid if dedicated_context else self.agent.context.id + + try: + timezone = _schedule_timezone(kwargs) + if isinstance(task, ScheduledTask) and ("schedule" in kwargs or timezone): + task_schedule = _task_schedule_from_input( + kwargs.get("schedule") or serialize_task(task).get("schedule") or {}, + timezone=timezone, + ) + if err := _validate_task_schedule(task_schedule): + return Response(message=err, break_loop=False) + update_params["schedule"] = task_schedule + except ValueError as exc: + return Response(message=str(exc), break_loop=False) + + if isinstance(task, ScheduledTask) and "schedule" in update_params: + task_schedule = update_params["schedule"] + if err := _validate_task_schedule(task_schedule): + return Response(message=err, break_loop=False) + elif isinstance(task, PlannedTask) and "plan" in kwargs: + task_plan, err = _task_plan_from_input(kwargs.get("plan") or []) + if err: + return Response(message=err, break_loop=False) + update_params["plan"] = task_plan + + updated_task = await scheduler.update_task(task_uuid, **update_params) + await scheduler.save() + if not updated_task: + return Response(message=f"Task failed to update: {task_uuid}", break_loop=False) + + return Response(message=json.dumps(serialize_task(updated_task), indent=4), break_loop=False) + async def create_scheduled_task(self, **kwargs) -> Response: # "name": "XXX", # "system_prompt": "You are a software developer", @@ -153,20 +318,15 @@ class SchedulerTool(Tool): prompt: str = kwargs.get("prompt", "") attachments: list[str] = kwargs.get("attachments", []) schedule: dict[str, str] = kwargs.get("schedule", {}) - dedicated_context: bool = kwargs.get("dedicated_context", False) + dedicated_context: bool = kwargs.get("dedicated_context", True) - task_schedule = TaskSchedule( - minute=schedule.get("minute", "*"), - hour=schedule.get("hour", "*"), - day=schedule.get("day", "*"), - month=schedule.get("month", "*"), - weekday=schedule.get("weekday", "*"), - ) + try: + task_schedule = _task_schedule_from_input(schedule, timezone=_schedule_timezone(kwargs)) + except ValueError as exc: + return Response(message=str(exc), break_loop=False) - # Validate cron expression, agent might hallucinate - cron_regex = "^((((\d+,)+\d+|(\d+(\/|-|#)\d+)|\d+L?|\*(\/\d+)?|L(-\d+)?|\?|[A-Z]{3}(-[A-Z]{3})?) ?){5,7})$" - if not re.match(cron_regex, task_schedule.to_crontab()): - return Response(message="Invalid cron expression: " + task_schedule.to_crontab(), break_loop=False) + if err := _validate_task_schedule(task_schedule): + return Response(message=err, break_loop=False) project_slug, project_color = self._resolve_project_metadata() @@ -176,6 +336,7 @@ class SchedulerTool(Tool): prompt=prompt, attachments=attachments, schedule=task_schedule, + timezone=getattr(task_schedule, "timezone", None), context_id=None if dedicated_context else self.agent.context.id, project_name=project_slug, project_color=project_color, @@ -189,7 +350,7 @@ class SchedulerTool(Tool): prompt: str = kwargs.get("prompt", "") attachments: list[str] = kwargs.get("attachments", []) token: str = str(random.randint(1000000000000000000, 9999999999999999999)) - dedicated_context: bool = kwargs.get("dedicated_context", False) + dedicated_context: bool = kwargs.get("dedicated_context", True) project_slug, project_color = self._resolve_project_metadata() @@ -212,22 +373,12 @@ class SchedulerTool(Tool): prompt: str = kwargs.get("prompt", "") attachments: list[str] = kwargs.get("attachments", []) plan: list[str] = kwargs.get("plan", []) - dedicated_context: bool = kwargs.get("dedicated_context", False) + dedicated_context: bool = kwargs.get("dedicated_context", True) # Convert plan to list of datetimes in UTC - todo: list[datetime] = [] - for item in plan: - dt = parse_datetime(item) - if dt is None: - return Response(message=f"Invalid datetime: {item}", break_loop=False) - todo.append(dt) - - # Create task plan with todo list - task_plan = TaskPlan.create( - todo=todo, - in_progress=None, - done=[] - ) + task_plan, err = _task_plan_from_input(plan) + if err: + return Response(message=err, break_loop=False) project_slug, project_color = self._resolve_project_metadata() diff --git a/tools/skills_tool.py b/tools/skills_tool.py index 8cb2f23bf..3c566fa67 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -1,5 +1,6 @@ from __future__ import annotations +from pathlib import Path from typing import List from helpers.tool import Tool, Response @@ -14,7 +15,7 @@ class SkillsTool(Tool): """ Manage and use SKILL.md-based Skills (Anthropic open standard). - Methods (tool_args.method): + Actions (tool_args.action): - list - search (query) - load (skill_name) @@ -23,11 +24,15 @@ class SkillsTool(Tool): Script execution is handled by code_execution_tool directly. """ - def _current_method(self) -> str: + def _current_action(self) -> str: return ( - (self.args.get("method") or self.method or "") + str( + self.args.get("action") + or "" + ) .strip() .lower() + .replace("-", "_") ) @staticmethod @@ -40,7 +45,7 @@ class SkillsTool(Tool): def get_log_object(self): import uuid - if self._current_method() == "load": + if self._current_action() == "load": skill_name = self._normalize_skill_name( str(self.args.get("skill_name") or "") ) @@ -60,14 +65,14 @@ class SkillsTool(Tool): return super().get_log_object() async def before_execution(self, **kwargs): - if self._current_method() != "load": + if self._current_action() != "load": await super().before_execution(**kwargs) return skill_name = self._normalize_skill_name( str(kwargs.get("skill_name") or self.args.get("skill_name") or "") ) - label = f"{self.name}:{self._current_method()}" + label = f"{self.name} action {self._current_action()}" if skill_name: PrintStyle( font_color="#1B4F72", @@ -85,32 +90,45 @@ class SkillsTool(Tool): self.log = self.get_log_object() async def execute(self, **kwargs) -> Response: - method = ( - (kwargs.get("method") or self.args.get("method") or self.method or "") + action = ( + str( + kwargs.get("action") + or self.args.get("action") + or "" + ) .strip() .lower() + .replace("-", "_") ) try: - if method == "list": + if action == "list": return Response(message=self._list(), break_loop=False) - if method == "search": - query = str(kwargs.get("query") or "").strip() + if action == "search": + query = str(kwargs.get("query") or self.args.get("query") or "").strip() return Response(message=self._search(query), break_loop=False) - if method == "load": + if action == "load": skill_name = self._normalize_skill_name( - str(kwargs.get("skill_name") or "") + str(kwargs.get("skill_name") or self.args.get("skill_name") or "") ) return Response(message=self._load(skill_name), break_loop=False) - # if method == "read_file": - # skill_name = str(kwargs.get("skill_name") or "").strip() - # file_path = str(kwargs.get("file_path") or "").strip() - # return Response( - # message=self._read_file(skill_name, file_path), break_loop=False - # ) + if action == "read_file": + skill_name = self._normalize_skill_name( + str(kwargs.get("skill_name") or self.args.get("skill_name") or "") + ) + file_path = str( + kwargs.get("file_path") or self.args.get("file_path") or "" + ).strip() + return Response( + message=self._read_file(skill_name, file_path), + break_loop=False, + ) return Response( - message="Error: missing/invalid 'method'. Supported: list, search, load.", + message=( + "Error: missing/invalid 'action'. Supported actions: " + "list, search, load, read_file." + ), break_loop=False, ) except ( @@ -139,12 +157,12 @@ class SkillsTool(Tool): desc = desc[:200].rstrip() + "…" lines.append(f"- {s.name}{ver}{tags}: {desc}") lines.append("") - lines.append("Tip: use skills_tool method=search or method=load for details.") + lines.append("Tip: use skills_tool action=search or action=load for details.") return "\n".join(lines) def _search(self, query: str) -> str: if not query: - return "Error: 'query' is required for method=search." + return "Error: 'query' is required for action=search." results = skills_helper.search_skills( query, @@ -163,7 +181,7 @@ class SkillsTool(Tool): lines.append(f"- {s.name}: {desc}") lines.append("") lines.append( - "Tip: use skills_tool method=load skill_name= to load full instructions." + "Tip: use skills_tool action=load skill_name= to load full instructions." ) return "\n".join(lines) @@ -171,7 +189,7 @@ class SkillsTool(Tool): skill_name = self._normalize_skill_name(skill_name) if not skill_name: - return "Error: 'skill_name' is required for method=load." + return "Error: 'skill_name' is required for action=load." # Verify skill exists skill = skills_helper.find_skill( @@ -180,7 +198,7 @@ class SkillsTool(Tool): agent=self.agent, ) if not skill: - return f"Error: skill not found: {skill_name!r}. Try skills_tool method=list or method=search." + return f"Error: skill not found: {skill_name!r}. Try skills_tool action=list or action=search." # Store skill name for fresh loading each turn if not self.agent.data.get(DATA_NAME_LOADED_SKILLS): @@ -193,6 +211,43 @@ class SkillsTool(Tool): return f"Loaded skill '{skill.name}' into EXTRAS." + def _read_file(self, skill_name: str, file_path: str) -> str: + if not skill_name: + return "Error: 'skill_name' is required for action=read_file." + if not file_path: + return "Error: 'file_path' is required for action=read_file." + + skill = skills_helper.find_skill( + skill_name, + include_content=False, + agent=self.agent, + ) + if not skill: + return f"Error: skill not found: {skill_name!r}." + + skill_root = skill.path.resolve() + target = Path(file_path) + if not target.is_absolute(): + target = skill_root / target + + try: + resolved = target.resolve() + resolved.relative_to(skill_root) + except Exception: + return "Error: file_path must stay inside the skill directory." + + if not resolved.is_file(): + return f"Error: skill file not found: {file_path!r}." + + content = resolved.read_text(encoding="utf-8", errors="replace") + if len(content) > 24000: + content = content[:24000].rstrip() + "\n\n[truncated]" + + return ( + f"Skill file: {skill.name}/{resolved.relative_to(skill_root)}\n\n" + f"{content}" + ) + def max_loaded_skills() -> int: return skills_helper.MAX_ACTIVE_SKILLS