diff --git a/packages/core/src/services/backgroundShellRegistry.test.ts b/packages/core/src/services/backgroundShellRegistry.test.ts index 193492d27..e6c7b85c2 100644 --- a/packages/core/src/services/backgroundShellRegistry.test.ts +++ b/packages/core/src/services/backgroundShellRegistry.test.ts @@ -101,6 +101,38 @@ describe('BackgroundShellRegistry', () => { }); }); + describe('requestCancel', () => { + it('aborts the signal but leaves status running and endTime undefined', () => { + const reg = new BackgroundShellRegistry(); + const ac = new AbortController(); + reg.register(makeEntry({ shellId: 'a', abortController: ac })); + + reg.requestCancel('a'); + + const e = reg.get('a')!; + expect(e.status).toBe('running'); + expect(e.endTime).toBeUndefined(); + expect(ac.signal.aborted).toBe(true); + }); + + it('is a no-op on a terminal entry', () => { + const reg = new BackgroundShellRegistry(); + const ac = new AbortController(); + reg.register(makeEntry({ shellId: 'a', abortController: ac })); + reg.complete('a', 0, 1500); + + reg.requestCancel('a'); + + expect(reg.get('a')!.status).toBe('completed'); + expect(ac.signal.aborted).toBe(false); + }); + + it('is a no-op for unknown id', () => { + const reg = new BackgroundShellRegistry(); + expect(() => reg.requestCancel('missing')).not.toThrow(); + }); + }); + describe('abortAll', () => { it('cancels every running entry and leaves terminal entries alone', () => { const reg = new BackgroundShellRegistry(); diff --git a/packages/core/src/services/backgroundShellRegistry.ts b/packages/core/src/services/backgroundShellRegistry.ts index b8b2e5951..1b74c7be1 100644 --- a/packages/core/src/services/backgroundShellRegistry.ts +++ b/packages/core/src/services/backgroundShellRegistry.ts @@ -92,6 +92,29 @@ export class BackgroundShellRegistry { entry.abortController.abort(); } + /** + * Request cancellation without marking the entry terminal. + * + * Triggers the entry's AbortController so the spawn handler can tear the + * process down, but leaves `status='running'` until the settle path + * observes the abort and records the real exit moment + outcome via + * `complete()` / `fail()` / `cancel()`. This keeps the registry honest: + * a cancelled shell only shows its terminal `endTime` once the process + * has actually drained, and a cancel-vs-exit race can't permanently hide + * a real completed/failed result. + * + * Used by the `task_stop` tool path; the immediate-mark `cancel()` above + * is reserved for `abortAll()` / shutdown, where the CLI process is + * tearing down anyway and there is no settle handler to wait for. + * + * Idempotent: no-op on entries that aren't `running`. + */ + requestCancel(shellId: string): void { + const entry = this.entries.get(shellId); + if (!entry || entry.status !== 'running') return; + entry.abortController.abort(); + } + /** * Cancel every still-running entry. Called on session/Config shutdown so * background shells don't outlive the CLI process and leak orphaned diff --git a/packages/core/src/tools/task-stop.test.ts b/packages/core/src/tools/task-stop.test.ts index d5df835a3..acd0ca140 100644 --- a/packages/core/src/tools/task-stop.test.ts +++ b/packages/core/src/tools/task-stop.test.ts @@ -7,18 +7,22 @@ import { describe, it, expect, beforeEach } from 'vitest'; import { TaskStopTool } from './task-stop.js'; import { BackgroundTaskRegistry } from '../agents/background-tasks.js'; +import { BackgroundShellRegistry } from '../services/backgroundShellRegistry.js'; import type { Config } from '../config/config.js'; import { ToolErrorType } from './tool-error.js'; describe('TaskStopTool', () => { let registry: BackgroundTaskRegistry; + let shellRegistry: BackgroundShellRegistry; let config: Config; let tool: TaskStopTool; beforeEach(() => { registry = new BackgroundTaskRegistry(); + shellRegistry = new BackgroundShellRegistry(); config = { getBackgroundTaskRegistry: () => registry, + getBackgroundShellRegistry: () => shellRegistry, } as unknown as Config; tool = new TaskStopTool(config); }); @@ -91,4 +95,91 @@ describe('TaskStopTool', () => { expect(result.llmContent).toContain('Search for auth code'); expect(result.returnDisplay).toContain('Search for auth code'); }); + + describe('background shell support', () => { + it('cancels a running background shell', async () => { + const ac = new AbortController(); + shellRegistry.register({ + shellId: 'bg_a1b2c3d4', + command: 'npm run dev', + cwd: '/work', + status: 'running', + startTime: Date.now(), + outputPath: '/tmp/bg-out/shell-bg_a1b2c3d4.output', + abortController: ac, + }); + + const result = await tool.validateBuildAndExecute( + { task_id: 'bg_a1b2c3d4' }, + new AbortController().signal, + ); + + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain('background shell "bg_a1b2c3d4"'); + expect(result.llmContent).toContain('npm run dev'); + expect(result.llmContent).toContain('/tmp/bg-out/shell-bg_a1b2c3d4.output'); + // task_stop only requests cancellation — the entry stays `running` + // until the spawn handler observes the abort and settles the entry + // with the real exit moment. Without this guarantee, /tasks would + // report a terminal-but-still-draining shell. + expect(shellRegistry.get('bg_a1b2c3d4')!.status).toBe('running'); + expect(shellRegistry.get('bg_a1b2c3d4')!.endTime).toBeUndefined(); + expect(ac.signal.aborted).toBe(true); + }); + + it('returns NOT_RUNNING when the shell already exited', async () => { + shellRegistry.register({ + shellId: 'bg_done', + command: 'true', + cwd: '/work', + status: 'running', + startTime: Date.now() - 1000, + outputPath: '/tmp/bg-out/shell-bg_done.output', + abortController: new AbortController(), + }); + shellRegistry.complete('bg_done', 0, Date.now()); + + const result = await tool.validateBuildAndExecute( + { task_id: 'bg_done' }, + new AbortController().signal, + ); + + expect(result.error?.type).toBe(ToolErrorType.TASK_STOP_NOT_RUNNING); + expect(result.llmContent).toContain('Background shell "bg_done"'); + expect(result.llmContent).toContain('completed'); + }); + + it('prefers an agent over a shell when both have the same id (defensive)', async () => { + // IDs cannot collide in practice (different naming schemes), but the + // tool's lookup order should still be deterministic if they ever do. + const agentAc = new AbortController(); + const shellAc = new AbortController(); + registry.register({ + agentId: 'shared-id', + description: 'agent', + status: 'running', + startTime: Date.now(), + abortController: agentAc, + }); + shellRegistry.register({ + shellId: 'shared-id', + command: 'shell-cmd', + cwd: '/work', + status: 'running', + startTime: Date.now(), + outputPath: '/tmp/x.out', + abortController: shellAc, + }); + + const result = await tool.validateBuildAndExecute( + { task_id: 'shared-id' }, + new AbortController().signal, + ); + + expect(result.llmContent).toContain('background agent'); + expect(agentAc.signal.aborted).toBe(true); + expect(shellAc.signal.aborted).toBe(false); + expect(shellRegistry.get('shared-id')!.status).toBe('running'); + }); + }); }); diff --git a/packages/core/src/tools/task-stop.ts b/packages/core/src/tools/task-stop.ts index 655b70450..c67ed2153 100644 --- a/packages/core/src/tools/task-stop.ts +++ b/packages/core/src/tools/task-stop.ts @@ -40,45 +40,83 @@ class TaskStopInvocation extends BaseToolInvocation< } async execute(_signal: AbortSignal): Promise { - const registry = this.config.getBackgroundTaskRegistry(); - const entry = registry.get(this.params.task_id); + const taskId = this.params.task_id; - if (!entry) { + // Subagent registry first (Phase A control plane). Agent IDs follow the + // pattern `-`, so they cannot collide with shell + // IDs (which are `bg_<8 hex chars>` from the background shell pool). + const agentRegistry = this.config.getBackgroundTaskRegistry(); + const agentEntry = agentRegistry.get(taskId); + if (agentEntry) { + if (agentEntry.status !== 'running') { + return notRunningError('agent', taskId, agentEntry.status); + } + agentRegistry.cancel(taskId); + // The terminal task-notification is emitted by the agent's own handler + // (via registry.complete/fail) rather than cancel(), so the parent + // model still receives the agent's real partial/final result — not just + // a bare "cancelled" message — once the reasoning loop unwinds. + const desc = agentEntry.description; return { - llmContent: `Error: No background task found with ID "${this.params.task_id}".`, - returnDisplay: 'Task not found.', - error: { - message: `Task not found: ${this.params.task_id}`, - type: ToolErrorType.TASK_STOP_NOT_FOUND, - }, + llmContent: + `Cancellation requested for background agent "${taskId}". ` + + `A final task-notification carrying the agent's last result will ` + + `follow.\nDescription: ${desc}`, + returnDisplay: `Cancelled: ${desc}`, }; } - if (entry.status !== 'running') { + // Background shell registry (Phase B). Settles asynchronously when the + // child process exits in response to the AbortController; the registry + // entry's terminal state (`cancelled`) and final exit code/output stay + // observable via `/tasks` and the on-disk output file. + const shellRegistry = this.config.getBackgroundShellRegistry(); + const shellEntry = shellRegistry.get(taskId); + if (shellEntry) { + if (shellEntry.status !== 'running') { + return notRunningError('shell', taskId, shellEntry.status); + } + // requestCancel triggers the AbortController only — the registry's + // settle path records the real terminal status + endTime once the + // process actually drains. Calling cancel(id, Date.now()) here would + // mark the entry terminal immediately and lose the real exit info. + shellRegistry.requestCancel(taskId); return { - llmContent: `Error: Background task "${this.params.task_id}" is not running (status: ${entry.status}).`, - returnDisplay: `Task not running (${entry.status}).`, - error: { - message: `Task is ${entry.status}: ${this.params.task_id}`, - type: ToolErrorType.TASK_STOP_NOT_RUNNING, - }, + llmContent: + `Cancellation requested for background shell "${taskId}". ` + + `Final status will be visible via /tasks once the process drains; ` + + `captured output remains at ${shellEntry.outputPath}.\n` + + `Command: ${shellEntry.command}`, + returnDisplay: `Cancelled shell: ${shellEntry.command}`, }; } - registry.cancel(this.params.task_id); - - // The terminal task-notification is emitted by the task's own handler - // (via registry.complete/fail) rather than cancel(), so the parent model - // still receives the task's real partial/final result — not just a bare - // "cancelled" message — once the reasoning loop unwinds. - const desc = entry.description; return { - llmContent: `Cancellation requested for background task "${this.params.task_id}". A final task-notification carrying the task's last result will follow.\nDescription: ${desc}`, - returnDisplay: `Cancelled: ${desc}`, + llmContent: `Error: No background task found with ID "${taskId}".`, + returnDisplay: 'Task not found.', + error: { + message: `Task not found: ${taskId}`, + type: ToolErrorType.TASK_STOP_NOT_FOUND, + }, }; } } +function notRunningError( + kind: 'agent' | 'shell', + taskId: string, + status: string, +): ToolResult { + return { + llmContent: `Error: Background ${kind} "${taskId}" is not running (status: ${status}).`, + returnDisplay: `Task not running (${status}).`, + error: { + message: `${kind} is ${status}: ${taskId}`, + type: ToolErrorType.TASK_STOP_NOT_RUNNING, + }, + }; +} + export class TaskStopTool extends BaseDeclarativeTool< TaskStopParams, ToolResult