feat(core): detect tool validation retry loops and inject stop directive (#3178)

Primary change: prevent the model from burning tokens in an infinite retry loop when a tool call repeatedly fails schema validation with the same error (observed with ask_user_question and a malformed `questions` parameter retrying 10+ times with the same validation error). - Track consecutive validation failures per (tool name, error message) pair in CoreToolScheduler via a `validationRetryCounts` Map. - After 3 consecutive failures for the same (tool, error) pair, append a RETRY LOOP DETECTED directive to the error response instructing the model to stop, re-examine the schema, try a fundamentally different approach, or surface the issue to the user. - Reset per-tool counters when the tool invocation succeeds; reset globally when an incoming batch shares no tool name with any previously failing tool; reset the per-tool counter when the tool returns a different validation error so unrelated mistakes do not accumulate toward the threshold. - Distinct from LoopDetectionService, which tracks model-behavior loops (repeated thoughts, stagnant actions); this change catches tool-API misuse loops at the scheduler layer. Piggyback fixes bundled in the same PR: - packages/cli/index.ts, packages/core/src/services/shellExecutionService.ts: treat PTY `EAGAIN` on the read path as an expected read error alongside `EIO`, avoiding noisy surface-level failures from transient non-blocking reads. - scripts/build.js: switch the settings-schema generation step from `npx tsx` to `node --import tsx/esm` for Bun compatibility. Tests: - Unit tests in coreToolScheduler.test.ts cover: directive injection on the 3rd consecutive failure, counter reset when a different tool is called, and counter reset after a successful invocation of the same tool (fail → fail → succeed → fail → fail must not trip the directive).
2026-04-28 03:30:40 +00:00 · 2026-04-17 22:24:46 -04:00 · 2026-04-17 22:24:46 -04:00 · 5facd8738b
commit 5facd8738b
parent 699cb05206
5 changed files with 310 additions and 5 deletions
--- a/packages/cli/index.ts
+++ b/packages/cli/index.ts
@ -51,6 +51,14 @@ const isExpectedPtyRaceError = (error: unknown): boolean => {
    return true;
  }

+  // EAGAIN: transient non-blocking read error from PTY fd
+  if (
+    (code === 'EAGAIN' && message.includes('read')) ||
+    message.includes('read EAGAIN')
+  ) {
+    return true;
+  }
+
  // PTY-specific resize/exit race errors - require PTY context in message
  if (
    message.includes('ioctl(2) failed, EBADF') ||
--- a/packages/core/src/core/coreToolScheduler.test.ts
+++ b/packages/core/src/core/coreToolScheduler.test.ts
@ -3849,3 +3849,236 @@ describe('CoreToolScheduler IDE interaction', () => {
    expect(mockIdeClient.openDiff).not.toHaveBeenCalled();
  });
 });
+
+describe('CoreToolScheduler validation retry loop detection', () => {
+  const RETRY_LOOP_STOP_DIRECTIVE = 'RETRY LOOP DETECTED';
+
+  /** Tool with a schema that requires a string `value` param. */
+  class StrictStringTool extends BaseDeclarativeTool<
+    { value: string },
+    ToolResult
+  > {
+    static readonly Name = 'strictStringTool';
+
+    constructor() {
+      super(
+        StrictStringTool.Name,
+        'StrictStringTool',
+        'A tool that requires a string value param.',
+        Kind.Other,
+        {
+          type: 'object',
+          properties: { value: { type: 'string' } },
+          required: ['value'],
+        },
+      );
+    }
+
+    protected createInvocation(params: {
+      value: string;
+    }): ToolInvocation<{ value: string }, ToolResult> {
+      return new (class extends BaseToolInvocation<
+        { value: string },
+        ToolResult
+      > {
+        constructor(p: { value: string }) {
+          super(p);
+        }
+        getDescription(): string {
+          return 'strictStringTool invocation';
+        }
+        async execute(): Promise<ToolResult> {
+          return { llmContent: 'ok', returnDisplay: 'ok' };
+        }
+      })(params);
+    }
+  }
+
+  function createSchedulerWithTool(tool: StrictStringTool) {
+    const mockToolRegistry = {
+      getTool: () => tool,
+      getFunctionDeclarations: () => [],
+      tools: new Map(),
+      discovery: {},
+      registerTool: () => {},
+      getToolByName: () => tool,
+      getToolByDisplayName: () => tool,
+      getTools: () => [],
+      discoverTools: async () => {},
+      getAllTools: () => [],
+      getToolsByServer: () => [],
+    } as unknown as ToolRegistry;
+
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getUsageStatisticsEnabled: () => true,
+      getDebugMode: () => false,
+      getApprovalMode: () => ApprovalMode.YOLO,
+      getPermissionsAllow: () => [],
+      getContentGeneratorConfig: () => ({
+        model: 'test-model',
+        authType: 'gemini',
+      }),
+      getShellExecutionConfig: () => ({
+        terminalWidth: 90,
+        terminalHeight: 30,
+      }),
+      storage: { getProjectTempDir: () => '/tmp' },
+      getTruncateToolOutputThreshold: () => 100,
+      getTruncateToolOutputLines: () => 10,
+      getToolRegistry: () => mockToolRegistry,
+      getUseModelRouter: () => false,
+      getGeminiClient: () => null,
+      isInteractive: () => true,
+      getIdeMode: () => false,
+      getExperimentalZedIntegration: () => false,
+      getChatRecordingService: () => undefined,
+      getMessageBus: vi.fn().mockReturnValue(undefined),
+      getDisableAllHooks: vi.fn().mockReturnValue(true),
+      setApprovalMode: vi.fn(),
+    } as unknown as Config;
+
+    const onAllToolCallsComplete = vi.fn();
+    const onToolCallsUpdate = vi.fn();
+
+    const scheduler = new CoreToolScheduler({
+      config: mockConfig,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+      getPreferredEditor: () => 'vscode',
+      onEditorClose: vi.fn(),
+    });
+
+    return { scheduler, onToolCallsUpdate, onAllToolCallsComplete };
+  }
+
+  function makeRequest(
+    callId: string,
+    name: string,
+    args: Record<string, unknown>,
+  ) {
+    return {
+      callId,
+      name,
+      args,
+      isClientInitiated: false,
+      prompt_id: `prompt-${callId}`,
+    };
+  }
+
+  function getLastErrorMessage(onToolCallsUpdate: Mock): string | undefined {
+    const calls = onToolCallsUpdate.mock.calls;
+    for (let i = calls.length - 1; i >= 0; i--) {
+      const toolCalls = calls[i][0] as ToolCall[];
+      for (const call of toolCalls) {
+        if (call.status === 'error' && call.response?.responseParts) {
+          for (const part of call.response.responseParts) {
+            if ('functionResponse' in part) {
+              const resp = part.functionResponse as {
+                response?: { error?: string };
+              };
+              if (resp.response?.error) return resp.response.error;
+            }
+          }
+        }
+      }
+    }
+    return undefined;
+  }
+
+  it('should inject RETRY LOOP DETECTED directive after 3 consecutive validation failures', async () => {
+    const tool = new StrictStringTool();
+    const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool);
+
+    // Turn 1: bad params (value is number, not string)
+    await scheduler.schedule(
+      [makeRequest('c1', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    let msg = getLastErrorMessage(onToolCallsUpdate);
+    expect(msg).toBeDefined();
+    expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE);
+
+    // Turn 2: same bad params
+    await scheduler.schedule(
+      [makeRequest('c2', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    msg = getLastErrorMessage(onToolCallsUpdate);
+    expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE);
+
+    // Turn 3: same bad params — should trigger directive
+    await scheduler.schedule(
+      [makeRequest('c3', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    msg = getLastErrorMessage(onToolCallsUpdate);
+    expect(msg).toContain(RETRY_LOOP_STOP_DIRECTIVE);
+  });
+
+  it('should reset retry counter when a different tool is called', async () => {
+    const tool = new StrictStringTool();
+    const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool);
+
+    // Turn 1-2: tool fails twice
+    await scheduler.schedule(
+      [makeRequest('c1', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    await scheduler.schedule(
+      [makeRequest('c2', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+
+    // Turn 3: switch to a different tool that also fails
+    // We simulate by calling with a tool name that won't be found
+    await scheduler.schedule(
+      [makeRequest('c3', 'nonexistentTool', {})],
+      new AbortController().signal,
+    );
+
+    // Turn 4: back to tool — should be count 1 again (no directive)
+    await scheduler.schedule(
+      [makeRequest('c4', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    const msg = getLastErrorMessage(onToolCallsUpdate);
+    expect(msg).toBeDefined();
+    expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE);
+  });
+
+  it('should reset retry counter after a successful invocation of the same tool', async () => {
+    const tool = new StrictStringTool();
+    const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool);
+
+    // Two validation failures with the same error.
+    await scheduler.schedule(
+      [makeRequest('c1', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    await scheduler.schedule(
+      [makeRequest('c2', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+
+    // A valid invocation succeeds, which must clear the per-tool counter.
+    await scheduler.schedule(
+      [makeRequest('c3', 'strictStringTool', { value: 'ok' })],
+      new AbortController().signal,
+    );
+
+    // Two more failures — count should restart at 1, not jump to 3+.
+    await scheduler.schedule(
+      [makeRequest('c4', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+    await scheduler.schedule(
+      [makeRequest('c5', 'strictStringTool', { value: 123 })],
+      new AbortController().signal,
+    );
+
+    const msg = getLastErrorMessage(onToolCallsUpdate);
+    expect(msg).toBeDefined();
+    expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE);
+  });
+});
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@ -299,6 +299,15 @@ function toParts(input: PartListUnion): Part[] {
  return parts;
 }

+const VALIDATION_RETRY_LOOP_THRESHOLD = 3;
+
+/** Directive injected when a tool call repeatedly fails validation. */
+const RETRY_LOOP_STOP_DIRECTIVE =
+  '\n\n⚠️ RETRY LOOP DETECTED: This tool call has failed validation multiple times with the same error. ' +
+  'STOP retrying the same approach. Re-examine the tool schema and parameter requirements, then try a ' +
+  'fundamentally different approach. If you cannot resolve the validation error, explain the issue to the user ' +
+  'instead of retrying.';
+
 const createErrorResponse = (
  request: ToolCallRequestInfo,
  error: Error,
@ -397,6 +406,7 @@ export class CoreToolScheduler {
  private chatRecordingService?: ChatRecordingService;
  private isFinalizingToolCalls = false;
  private isScheduling = false;
+  private validationRetryCounts = new Map<string, number>();
  private requestQueue: Array<{
    request: ToolCallRequestInfo | ToolCallRequestInfo[];
    signal: AbortSignal;
@ -463,6 +473,8 @@ export class CoreToolScheduler {

      switch (newStatus) {
        case 'success': {
+          // Successful execution only resets retry state for this tool
+          this.clearRetryCountsForTool(currentCall.request.name);
          const durationMs = existingStartTime
            ? Date.now() - existingStartTime
            : undefined;
@ -751,6 +763,20 @@ export class CoreToolScheduler {
    return this._schedule(request, signal);
  }

+  /**
+   * Removes all validation retry counters for the given tool. Keys are
+   * "<toolName>:<errorMessage>", so a plain `Map.delete(toolName)` would not
+   * match anything.
+   */
+  private clearRetryCountsForTool(toolName: string): void {
+    const prefix = `${toolName}:`;
+    for (const key of this.validationRetryCounts.keys()) {
+      if (key.startsWith(prefix)) {
+        this.validationRetryCounts.delete(key);
+      }
+    }
+  }
+
  private async _schedule(
    request: ToolCallRequestInfo | ToolCallRequestInfo[],
    signal: AbortSignal,
@ -764,6 +790,23 @@ export class CoreToolScheduler {
      }
      const requestsToProcess = Array.isArray(request) ? request : [request];

+      // Check if this batch continues a validation retry loop.
+      // Keys are "<toolName>:<errorMessage>"; if no request reuses a tool name
+      // that previously failed validation, reset the tracker.
+      if (this.validationRetryCounts.size > 0) {
+        const prevTools = new Set<string>();
+        for (const key of this.validationRetryCounts.keys()) {
+          const sep = key.indexOf(':');
+          prevTools.add(sep === -1 ? key : key.slice(0, sep));
+        }
+        const hasPrevFailingTool = requestsToProcess.some((r) =>
+          prevTools.has(r.name),
+        );
+        if (!hasPrevFailingTool) {
+          this.validationRetryCounts.clear();
+        }
+      }
+
      const newToolCalls: ToolCall[] = [];
      for (const reqInfo of requestsToProcess) {
        // Check if the tool is excluded due to permissions/environment restrictions
@ -839,18 +882,36 @@ export class CoreToolScheduler {
          reqInfo.callId,
        );
        if (invocationOrError instanceof Error) {
-          const error = reqInfo.wasOutputTruncated
+          const baseError = reqInfo.wasOutputTruncated
            ? new Error(
                `${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`,
              )
            : invocationOrError;
+
+          // Track validation retry for loop detection. Counts accumulate per
+          // (tool, error message) pair so a different validation mistake on
+          // the same tool starts fresh rather than tripping the threshold.
+          const errorKey = `${reqInfo.name}:${baseError.message}`;
+          const count = (this.validationRetryCounts.get(errorKey) ?? 0) + 1;
+          for (const key of this.validationRetryCounts.keys()) {
+            if (key.startsWith(`${reqInfo.name}:`) && key !== errorKey) {
+              this.validationRetryCounts.delete(key);
+            }
+          }
+          this.validationRetryCounts.set(errorKey, count);
+
+          const finalError =
+            count >= VALIDATION_RETRY_LOOP_THRESHOLD
+              ? new Error(`${baseError.message}${RETRY_LOOP_STOP_DIRECTIVE}`)
+              : baseError;
+
          newToolCalls.push({
            status: 'error',
            request: reqInfo,
            tool: toolInstance,
            response: createErrorResponse(
              reqInfo,
-              error,
+              finalError,
              ToolErrorType.INVALID_TOOL_PARAMS,
            ),
            durationMs: 0,
@ -858,6 +919,9 @@ export class CoreToolScheduler {
          continue;
        }

+        // Reset all validation retry counters for this tool since it passed validation
+        this.clearRetryCountsForTool(reqInfo.name);
+
        // Reject file-modifying calls when truncated to prevent
        // writing incomplete content.
        if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) {
--- a/packages/core/src/services/shellExecutionService.ts
+++ b/packages/core/src/services/shellExecutionService.ts
@ -198,12 +198,12 @@ const getErrorMessage = (error: unknown): string =>

 const isExpectedPtyReadExitError = (error: unknown): boolean => {
  const code = getErrnoCode(error);
-  if (code === 'EIO') {
+  if (code === 'EIO' || code === 'EAGAIN') {
    return true;
  }

  const message = getErrorMessage(error);
-  return message.includes('read EIO');
+  return message.includes('read EIO') || message.includes('EAGAIN');
 };

 const isExpectedPtyExitRaceError = (error: unknown): boolean => {
--- a/scripts/build.js
+++ b/scripts/build.js
@ -65,7 +65,7 @@ for (const workspace of buildOrder) {
  // After cli is built, generate the JSON Schema for settings
  // so the vscode-ide-companion extension can provide IntelliSense
  if (workspace === 'packages/cli') {
-    execSync('npx tsx scripts/generate-settings-schema.ts', {
+    execSync('node --import tsx/esm scripts/generate-settings-schema.ts', {
      stdio: 'inherit',
      cwd: root,
    });