feat(core): wire background shells into the task_stop tool (#3687)

* feat(core): wire background shells into the task_stop tool Phase B follow-up #1 from #3634, unblocked by #3471 (control plane) merging in. The model can now cancel a managed background shell with the same `task_stop` tool it uses for subagents — no more falling back to `kill <pid>` via BashTool. Lookup order: subagent registry first (existing behavior), then the background shell registry as a fallback. Agent IDs follow `<subagentName>-<suffix>` and shell IDs follow `bg_<8 hex chars>`, so the two namespaces cannot collide in practice; the order is fixed for determinism (a defensive test pins agent-wins-over-shell). The shell cancel path resolves through the entry's own AbortController (which `BackgroundShellRegistry.cancel` triggers); the child process exit handler then settles the registry to `cancelled` and the on-disk output file is preserved for inspection via `/tasks` or a direct `Read`. This matches Phase B's "registry's own AbortController is the cancellation source of truth" design without needing the in-flight notification framework that subagents use. Tests: 7 task-stop tests (was 4) — added cancel-shell happy path, NOT_RUNNING for already-exited shell, and a defensive agent-takes-precedence-on-id-collision case. * fix(core): defer shell terminal transition until spawn handler settles @doudouOUC noticed that the previous task_stop path called `BackgroundShellRegistry.cancel(id, Date.now())`, which marked the entry `cancelled` immediately. The spawn handler's settle path only records real exit info via cancel/complete/fail when the entry is still `running`, so the cancel-vs-exit race could permanently hide a real completed/failed result and `/tasks` would show a terminal endTime while the process was still draining. Add a `requestCancel(id)` method to `BackgroundShellRegistry` that triggers the entry's AbortController only; status stays `running` until the settle path observes the abort and records the real terminal state. The immediate-mark `cancel(id, endTime)` is reserved for `abortAll()` / shutdown, where the CLI process is tearing down anyway and there is no settle handler to wait for. Tests updated: - `task-stop.test.ts` cancel-shell happy path now asserts the entry stays `running` with `endTime` undefined post-stop, and the abort signal fires (the settle path's contract, not task_stop's, is the one that flips status). - 3 new `requestCancel` tests in `backgroundShellRegistry.test.ts`: running → abort+still-running, terminal entry no-op, unknown id no-op. --------- Co-authored-by: wenshao <wenshao@U-K7F6PQY3-2157.local>
2026-05-05 23:42:03 +00:00 · 2026-04-29 10:10:41 +08:00 · 2026-04-29 10:10:41 +08:00 · 1b9e8ec45d
commit 1b9e8ec45d
parent 2ee014e347
4 changed files with 209 additions and 25 deletions
--- a/packages/core/src/services/backgroundShellRegistry.test.ts
+++ b/packages/core/src/services/backgroundShellRegistry.test.ts
@ -101,6 +101,38 @@ describe('BackgroundShellRegistry', () => {
    });
  });

+  describe('requestCancel', () => {
+    it('aborts the signal but leaves status running and endTime undefined', () => {
+      const reg = new BackgroundShellRegistry();
+      const ac = new AbortController();
+      reg.register(makeEntry({ shellId: 'a', abortController: ac }));
+
+      reg.requestCancel('a');
+
+      const e = reg.get('a')!;
+      expect(e.status).toBe('running');
+      expect(e.endTime).toBeUndefined();
+      expect(ac.signal.aborted).toBe(true);
+    });
+
+    it('is a no-op on a terminal entry', () => {
+      const reg = new BackgroundShellRegistry();
+      const ac = new AbortController();
+      reg.register(makeEntry({ shellId: 'a', abortController: ac }));
+      reg.complete('a', 0, 1500);
+
+      reg.requestCancel('a');
+
+      expect(reg.get('a')!.status).toBe('completed');
+      expect(ac.signal.aborted).toBe(false);
+    });
+
+    it('is a no-op for unknown id', () => {
+      const reg = new BackgroundShellRegistry();
+      expect(() => reg.requestCancel('missing')).not.toThrow();
+    });
+  });
+
  describe('abortAll', () => {
    it('cancels every running entry and leaves terminal entries alone', () => {
      const reg = new BackgroundShellRegistry();
--- a/packages/core/src/services/backgroundShellRegistry.ts
+++ b/packages/core/src/services/backgroundShellRegistry.ts
@ -92,6 +92,29 @@ export class BackgroundShellRegistry {
    entry.abortController.abort();
  }

+  /**
+   * Request cancellation without marking the entry terminal.
+   *
+   * Triggers the entry's AbortController so the spawn handler can tear the
+   * process down, but leaves `status='running'` until the settle path
+   * observes the abort and records the real exit moment + outcome via
+   * `complete()` / `fail()` / `cancel()`. This keeps the registry honest:
+   * a cancelled shell only shows its terminal `endTime` once the process
+   * has actually drained, and a cancel-vs-exit race can't permanently hide
+   * a real completed/failed result.
+   *
+   * Used by the `task_stop` tool path; the immediate-mark `cancel()` above
+   * is reserved for `abortAll()` / shutdown, where the CLI process is
+   * tearing down anyway and there is no settle handler to wait for.
+   *
+   * Idempotent: no-op on entries that aren't `running`.
+   */
+  requestCancel(shellId: string): void {
+    const entry = this.entries.get(shellId);
+    if (!entry || entry.status !== 'running') return;
+    entry.abortController.abort();
+  }
+
  /**
   * Cancel every still-running entry. Called on session/Config shutdown so
   * background shells don't outlive the CLI process and leak orphaned
--- a/packages/core/src/tools/task-stop.test.ts
+++ b/packages/core/src/tools/task-stop.test.ts
@ -7,18 +7,22 @@
 import { describe, it, expect, beforeEach } from 'vitest';
 import { TaskStopTool } from './task-stop.js';
 import { BackgroundTaskRegistry } from '../agents/background-tasks.js';
+import { BackgroundShellRegistry } from '../services/backgroundShellRegistry.js';
 import type { Config } from '../config/config.js';
 import { ToolErrorType } from './tool-error.js';

 describe('TaskStopTool', () => {
  let registry: BackgroundTaskRegistry;
+  let shellRegistry: BackgroundShellRegistry;
  let config: Config;
  let tool: TaskStopTool;

  beforeEach(() => {
    registry = new BackgroundTaskRegistry();
+    shellRegistry = new BackgroundShellRegistry();
    config = {
      getBackgroundTaskRegistry: () => registry,
+      getBackgroundShellRegistry: () => shellRegistry,
    } as unknown as Config;
    tool = new TaskStopTool(config);
  });
@ -91,4 +95,91 @@ describe('TaskStopTool', () => {
    expect(result.llmContent).toContain('Search for auth code');
    expect(result.returnDisplay).toContain('Search for auth code');
  });
+
+  describe('background shell support', () => {
+    it('cancels a running background shell', async () => {
+      const ac = new AbortController();
+      shellRegistry.register({
+        shellId: 'bg_a1b2c3d4',
+        command: 'npm run dev',
+        cwd: '/work',
+        status: 'running',
+        startTime: Date.now(),
+        outputPath: '/tmp/bg-out/shell-bg_a1b2c3d4.output',
+        abortController: ac,
+      });
+
+      const result = await tool.validateBuildAndExecute(
+        { task_id: 'bg_a1b2c3d4' },
+        new AbortController().signal,
+      );
+
+      expect(result.error).toBeUndefined();
+      expect(result.llmContent).toContain('background shell "bg_a1b2c3d4"');
+      expect(result.llmContent).toContain('npm run dev');
+      expect(result.llmContent).toContain('/tmp/bg-out/shell-bg_a1b2c3d4.output');
+      // task_stop only requests cancellation — the entry stays `running`
+      // until the spawn handler observes the abort and settles the entry
+      // with the real exit moment. Without this guarantee, /tasks would
+      // report a terminal-but-still-draining shell.
+      expect(shellRegistry.get('bg_a1b2c3d4')!.status).toBe('running');
+      expect(shellRegistry.get('bg_a1b2c3d4')!.endTime).toBeUndefined();
+      expect(ac.signal.aborted).toBe(true);
+    });
+
+    it('returns NOT_RUNNING when the shell already exited', async () => {
+      shellRegistry.register({
+        shellId: 'bg_done',
+        command: 'true',
+        cwd: '/work',
+        status: 'running',
+        startTime: Date.now() - 1000,
+        outputPath: '/tmp/bg-out/shell-bg_done.output',
+        abortController: new AbortController(),
+      });
+      shellRegistry.complete('bg_done', 0, Date.now());
+
+      const result = await tool.validateBuildAndExecute(
+        { task_id: 'bg_done' },
+        new AbortController().signal,
+      );
+
+      expect(result.error?.type).toBe(ToolErrorType.TASK_STOP_NOT_RUNNING);
+      expect(result.llmContent).toContain('Background shell "bg_done"');
+      expect(result.llmContent).toContain('completed');
+    });
+
+    it('prefers an agent over a shell when both have the same id (defensive)', async () => {
+      // IDs cannot collide in practice (different naming schemes), but the
+      // tool's lookup order should still be deterministic if they ever do.
+      const agentAc = new AbortController();
+      const shellAc = new AbortController();
+      registry.register({
+        agentId: 'shared-id',
+        description: 'agent',
+        status: 'running',
+        startTime: Date.now(),
+        abortController: agentAc,
+      });
+      shellRegistry.register({
+        shellId: 'shared-id',
+        command: 'shell-cmd',
+        cwd: '/work',
+        status: 'running',
+        startTime: Date.now(),
+        outputPath: '/tmp/x.out',
+        abortController: shellAc,
+      });
+
+      const result = await tool.validateBuildAndExecute(
+        { task_id: 'shared-id' },
+        new AbortController().signal,
+      );
+
+      expect(result.llmContent).toContain('background agent');
+      expect(agentAc.signal.aborted).toBe(true);
+      expect(shellAc.signal.aborted).toBe(false);
+      expect(shellRegistry.get('shared-id')!.status).toBe('running');
+    });
+  });
 });
--- a/packages/core/src/tools/task-stop.ts
+++ b/packages/core/src/tools/task-stop.ts
@ -40,45 +40,83 @@ class TaskStopInvocation extends BaseToolInvocation<
  }

  async execute(_signal: AbortSignal): Promise<ToolResult> {
-    const registry = this.config.getBackgroundTaskRegistry();
-    const entry = registry.get(this.params.task_id);
+    const taskId = this.params.task_id;

-    if (!entry) {
+    // Subagent registry first (Phase A control plane). Agent IDs follow the
+    // pattern `<subagentName>-<suffix>`, so they cannot collide with shell
+    // IDs (which are `bg_<8 hex chars>` from the background shell pool).
+    const agentRegistry = this.config.getBackgroundTaskRegistry();
+    const agentEntry = agentRegistry.get(taskId);
+    if (agentEntry) {
+      if (agentEntry.status !== 'running') {
+        return notRunningError('agent', taskId, agentEntry.status);
+      }
+      agentRegistry.cancel(taskId);
+      // The terminal task-notification is emitted by the agent's own handler
+      // (via registry.complete/fail) rather than cancel(), so the parent
+      // model still receives the agent's real partial/final result — not just
+      // a bare "cancelled" message — once the reasoning loop unwinds.
+      const desc = agentEntry.description;
      return {
-        llmContent: `Error: No background task found with ID "${this.params.task_id}".`,
-        returnDisplay: 'Task not found.',
-        error: {
-          message: `Task not found: ${this.params.task_id}`,
-          type: ToolErrorType.TASK_STOP_NOT_FOUND,
-        },
+        llmContent:
+          `Cancellation requested for background agent "${taskId}". ` +
+          `A final task-notification carrying the agent's last result will ` +
+          `follow.\nDescription: ${desc}`,
+        returnDisplay: `Cancelled: ${desc}`,
      };
    }

-    if (entry.status !== 'running') {
+    // Background shell registry (Phase B). Settles asynchronously when the
+    // child process exits in response to the AbortController; the registry
+    // entry's terminal state (`cancelled`) and final exit code/output stay
+    // observable via `/tasks` and the on-disk output file.
+    const shellRegistry = this.config.getBackgroundShellRegistry();
+    const shellEntry = shellRegistry.get(taskId);
+    if (shellEntry) {
+      if (shellEntry.status !== 'running') {
+        return notRunningError('shell', taskId, shellEntry.status);
+      }
+      // requestCancel triggers the AbortController only — the registry's
+      // settle path records the real terminal status + endTime once the
+      // process actually drains. Calling cancel(id, Date.now()) here would
+      // mark the entry terminal immediately and lose the real exit info.
+      shellRegistry.requestCancel(taskId);
      return {
-        llmContent: `Error: Background task "${this.params.task_id}" is not running (status: ${entry.status}).`,
-        returnDisplay: `Task not running (${entry.status}).`,
-        error: {
-          message: `Task is ${entry.status}: ${this.params.task_id}`,
-          type: ToolErrorType.TASK_STOP_NOT_RUNNING,
-        },
+        llmContent:
+          `Cancellation requested for background shell "${taskId}". ` +
+          `Final status will be visible via /tasks once the process drains; ` +
+          `captured output remains at ${shellEntry.outputPath}.\n` +
+          `Command: ${shellEntry.command}`,
+        returnDisplay: `Cancelled shell: ${shellEntry.command}`,
      };
    }

-    registry.cancel(this.params.task_id);
-
-    // The terminal task-notification is emitted by the task's own handler
-    // (via registry.complete/fail) rather than cancel(), so the parent model
-    // still receives the task's real partial/final result — not just a bare
-    // "cancelled" message — once the reasoning loop unwinds.
-    const desc = entry.description;
    return {
-      llmContent: `Cancellation requested for background task "${this.params.task_id}". A final task-notification carrying the task's last result will follow.\nDescription: ${desc}`,
-      returnDisplay: `Cancelled: ${desc}`,
+      llmContent: `Error: No background task found with ID "${taskId}".`,
+      returnDisplay: 'Task not found.',
+      error: {
+        message: `Task not found: ${taskId}`,
+        type: ToolErrorType.TASK_STOP_NOT_FOUND,
+      },
    };
  }
 }

+function notRunningError(
+  kind: 'agent' | 'shell',
+  taskId: string,
+  status: string,
+): ToolResult {
+  return {
+    llmContent: `Error: Background ${kind} "${taskId}" is not running (status: ${status}).`,
+    returnDisplay: `Task not running (${status}).`,
+    error: {
+      message: `${kind} is ${status}: ${taskId}`,
+      type: ToolErrorType.TASK_STOP_NOT_RUNNING,
+    },
+  };
+}
+
 export class TaskStopTool extends BaseDeclarativeTool<
  TaskStopParams,
  ToolResult