diff --git a/packages/cli/index.ts b/packages/cli/index.ts index bcf8034e6..192eb75bf 100644 --- a/packages/cli/index.ts +++ b/packages/cli/index.ts @@ -51,6 +51,14 @@ const isExpectedPtyRaceError = (error: unknown): boolean => { return true; } + // EAGAIN: transient non-blocking read error from PTY fd + if ( + (code === 'EAGAIN' && message.includes('read')) || + message.includes('read EAGAIN') + ) { + return true; + } + // PTY-specific resize/exit race errors - require PTY context in message if ( message.includes('ioctl(2) failed, EBADF') || diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 77fdb9397..6efa17c32 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -3849,3 +3849,236 @@ describe('CoreToolScheduler IDE interaction', () => { expect(mockIdeClient.openDiff).not.toHaveBeenCalled(); }); }); + +describe('CoreToolScheduler validation retry loop detection', () => { + const RETRY_LOOP_STOP_DIRECTIVE = 'RETRY LOOP DETECTED'; + + /** Tool with a schema that requires a string `value` param. */ + class StrictStringTool extends BaseDeclarativeTool< + { value: string }, + ToolResult + > { + static readonly Name = 'strictStringTool'; + + constructor() { + super( + StrictStringTool.Name, + 'StrictStringTool', + 'A tool that requires a string value param.', + Kind.Other, + { + type: 'object', + properties: { value: { type: 'string' } }, + required: ['value'], + }, + ); + } + + protected createInvocation(params: { + value: string; + }): ToolInvocation<{ value: string }, ToolResult> { + return new (class extends BaseToolInvocation< + { value: string }, + ToolResult + > { + constructor(p: { value: string }) { + super(p); + } + getDescription(): string { + return 'strictStringTool invocation'; + } + async execute(): Promise { + return { llmContent: 'ok', returnDisplay: 'ok' }; + } + })(params); + } + } + + function createSchedulerWithTool(tool: StrictStringTool) { + const mockToolRegistry = { + getTool: () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.YOLO, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getTruncateToolOutputThreshold: () => 100, + getTruncateToolOutputLines: () => 10, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + setApprovalMode: vi.fn(), + } as unknown as Config; + + const onAllToolCallsComplete = vi.fn(); + const onToolCallsUpdate = vi.fn(); + + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete, + onToolCallsUpdate, + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + + return { scheduler, onToolCallsUpdate, onAllToolCallsComplete }; + } + + function makeRequest( + callId: string, + name: string, + args: Record, + ) { + return { + callId, + name, + args, + isClientInitiated: false, + prompt_id: `prompt-${callId}`, + }; + } + + function getLastErrorMessage(onToolCallsUpdate: Mock): string | undefined { + const calls = onToolCallsUpdate.mock.calls; + for (let i = calls.length - 1; i >= 0; i--) { + const toolCalls = calls[i][0] as ToolCall[]; + for (const call of toolCalls) { + if (call.status === 'error' && call.response?.responseParts) { + for (const part of call.response.responseParts) { + if ('functionResponse' in part) { + const resp = part.functionResponse as { + response?: { error?: string }; + }; + if (resp.response?.error) return resp.response.error; + } + } + } + } + } + return undefined; + } + + it('should inject RETRY LOOP DETECTED directive after 3 consecutive validation failures', async () => { + const tool = new StrictStringTool(); + const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool); + + // Turn 1: bad params (value is number, not string) + await scheduler.schedule( + [makeRequest('c1', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + let msg = getLastErrorMessage(onToolCallsUpdate); + expect(msg).toBeDefined(); + expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE); + + // Turn 2: same bad params + await scheduler.schedule( + [makeRequest('c2', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + msg = getLastErrorMessage(onToolCallsUpdate); + expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE); + + // Turn 3: same bad params — should trigger directive + await scheduler.schedule( + [makeRequest('c3', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + msg = getLastErrorMessage(onToolCallsUpdate); + expect(msg).toContain(RETRY_LOOP_STOP_DIRECTIVE); + }); + + it('should reset retry counter when a different tool is called', async () => { + const tool = new StrictStringTool(); + const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool); + + // Turn 1-2: tool fails twice + await scheduler.schedule( + [makeRequest('c1', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + await scheduler.schedule( + [makeRequest('c2', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + + // Turn 3: switch to a different tool that also fails + // We simulate by calling with a tool name that won't be found + await scheduler.schedule( + [makeRequest('c3', 'nonexistentTool', {})], + new AbortController().signal, + ); + + // Turn 4: back to tool — should be count 1 again (no directive) + await scheduler.schedule( + [makeRequest('c4', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + const msg = getLastErrorMessage(onToolCallsUpdate); + expect(msg).toBeDefined(); + expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE); + }); + + it('should reset retry counter after a successful invocation of the same tool', async () => { + const tool = new StrictStringTool(); + const { scheduler, onToolCallsUpdate } = createSchedulerWithTool(tool); + + // Two validation failures with the same error. + await scheduler.schedule( + [makeRequest('c1', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + await scheduler.schedule( + [makeRequest('c2', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + + // A valid invocation succeeds, which must clear the per-tool counter. + await scheduler.schedule( + [makeRequest('c3', 'strictStringTool', { value: 'ok' })], + new AbortController().signal, + ); + + // Two more failures — count should restart at 1, not jump to 3+. + await scheduler.schedule( + [makeRequest('c4', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + await scheduler.schedule( + [makeRequest('c5', 'strictStringTool', { value: 123 })], + new AbortController().signal, + ); + + const msg = getLastErrorMessage(onToolCallsUpdate); + expect(msg).toBeDefined(); + expect(msg).not.toContain(RETRY_LOOP_STOP_DIRECTIVE); + }); +}); diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 6e2098dd0..8269770de 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -299,6 +299,15 @@ function toParts(input: PartListUnion): Part[] { return parts; } +const VALIDATION_RETRY_LOOP_THRESHOLD = 3; + +/** Directive injected when a tool call repeatedly fails validation. */ +const RETRY_LOOP_STOP_DIRECTIVE = + '\n\n⚠️ RETRY LOOP DETECTED: This tool call has failed validation multiple times with the same error. ' + + 'STOP retrying the same approach. Re-examine the tool schema and parameter requirements, then try a ' + + 'fundamentally different approach. If you cannot resolve the validation error, explain the issue to the user ' + + 'instead of retrying.'; + const createErrorResponse = ( request: ToolCallRequestInfo, error: Error, @@ -397,6 +406,7 @@ export class CoreToolScheduler { private chatRecordingService?: ChatRecordingService; private isFinalizingToolCalls = false; private isScheduling = false; + private validationRetryCounts = new Map(); private requestQueue: Array<{ request: ToolCallRequestInfo | ToolCallRequestInfo[]; signal: AbortSignal; @@ -463,6 +473,8 @@ export class CoreToolScheduler { switch (newStatus) { case 'success': { + // Successful execution only resets retry state for this tool + this.clearRetryCountsForTool(currentCall.request.name); const durationMs = existingStartTime ? Date.now() - existingStartTime : undefined; @@ -751,6 +763,20 @@ export class CoreToolScheduler { return this._schedule(request, signal); } + /** + * Removes all validation retry counters for the given tool. Keys are + * ":", so a plain `Map.delete(toolName)` would not + * match anything. + */ + private clearRetryCountsForTool(toolName: string): void { + const prefix = `${toolName}:`; + for (const key of this.validationRetryCounts.keys()) { + if (key.startsWith(prefix)) { + this.validationRetryCounts.delete(key); + } + } + } + private async _schedule( request: ToolCallRequestInfo | ToolCallRequestInfo[], signal: AbortSignal, @@ -764,6 +790,23 @@ export class CoreToolScheduler { } const requestsToProcess = Array.isArray(request) ? request : [request]; + // Check if this batch continues a validation retry loop. + // Keys are ":"; if no request reuses a tool name + // that previously failed validation, reset the tracker. + if (this.validationRetryCounts.size > 0) { + const prevTools = new Set(); + for (const key of this.validationRetryCounts.keys()) { + const sep = key.indexOf(':'); + prevTools.add(sep === -1 ? key : key.slice(0, sep)); + } + const hasPrevFailingTool = requestsToProcess.some((r) => + prevTools.has(r.name), + ); + if (!hasPrevFailingTool) { + this.validationRetryCounts.clear(); + } + } + const newToolCalls: ToolCall[] = []; for (const reqInfo of requestsToProcess) { // Check if the tool is excluded due to permissions/environment restrictions @@ -839,18 +882,36 @@ export class CoreToolScheduler { reqInfo.callId, ); if (invocationOrError instanceof Error) { - const error = reqInfo.wasOutputTruncated + const baseError = reqInfo.wasOutputTruncated ? new Error( `${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`, ) : invocationOrError; + + // Track validation retry for loop detection. Counts accumulate per + // (tool, error message) pair so a different validation mistake on + // the same tool starts fresh rather than tripping the threshold. + const errorKey = `${reqInfo.name}:${baseError.message}`; + const count = (this.validationRetryCounts.get(errorKey) ?? 0) + 1; + for (const key of this.validationRetryCounts.keys()) { + if (key.startsWith(`${reqInfo.name}:`) && key !== errorKey) { + this.validationRetryCounts.delete(key); + } + } + this.validationRetryCounts.set(errorKey, count); + + const finalError = + count >= VALIDATION_RETRY_LOOP_THRESHOLD + ? new Error(`${baseError.message}${RETRY_LOOP_STOP_DIRECTIVE}`) + : baseError; + newToolCalls.push({ status: 'error', request: reqInfo, tool: toolInstance, response: createErrorResponse( reqInfo, - error, + finalError, ToolErrorType.INVALID_TOOL_PARAMS, ), durationMs: 0, @@ -858,6 +919,9 @@ export class CoreToolScheduler { continue; } + // Reset all validation retry counters for this tool since it passed validation + this.clearRetryCountsForTool(reqInfo.name); + // Reject file-modifying calls when truncated to prevent // writing incomplete content. if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) { diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 88b42d4bb..e2b178805 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -198,12 +198,12 @@ const getErrorMessage = (error: unknown): string => const isExpectedPtyReadExitError = (error: unknown): boolean => { const code = getErrnoCode(error); - if (code === 'EIO') { + if (code === 'EIO' || code === 'EAGAIN') { return true; } const message = getErrorMessage(error); - return message.includes('read EIO'); + return message.includes('read EIO') || message.includes('EAGAIN'); }; const isExpectedPtyExitRaceError = (error: unknown): boolean => { diff --git a/scripts/build.js b/scripts/build.js index 342e21b6c..f471bf25e 100644 --- a/scripts/build.js +++ b/scripts/build.js @@ -65,7 +65,7 @@ for (const workspace of buildOrder) { // After cli is built, generate the JSON Schema for settings // so the vscode-ide-companion extension can provide IntelliSense if (workspace === 'packages/cli') { - execSync('npx tsx scripts/generate-settings-schema.ts', { + execSync('node --import tsx/esm scripts/generate-settings-schema.ts', { stdio: 'inherit', cwd: root, });