From 685296e97843f2846fada9cbc0d4179598f89ca2 Mon Sep 17 00:00:00 2001 From: jinye Date: Wed, 22 Apr 2026 15:01:42 +0800 Subject: [PATCH] fix(core): reject truncated subagent write_file calls (#3505) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): reject truncated subagent write_file calls Propagate MAX_TOKENS truncation from subagent responses into tool requests and reject truncated edit calls before schema validation can surface misleading missing-parameter errors. * fix(core): reset per-attempt stream state on retry in agent-core When a streaming response hits MAX_TOKENS and is retried, accumulated state variables (functionCalls, wasOutputTruncated, roundText, etc.) were not cleared. This caused a successful retry to inherit the stale wasOutputTruncated=true flag from the failed attempt, incorrectly rejecting all Edit tool calls with a truncation error. Reset all per-attempt state on retry events, matching the existing behaviour in turn.ts. Add a regression test covering the truncated-then-retried-successfully scenario. 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code) --------- Co-authored-by: Qwen-Coder --- .../core/src/agents/runtime/agent-core.ts | 18 +- .../src/agents/runtime/agent-headless.test.ts | 214 ++++++++++++++++++ .../core/src/core/coreToolScheduler.test.ts | 57 ++++- packages/core/src/core/coreToolScheduler.ts | 36 +-- 4 files changed, 305 insertions(+), 20 deletions(-) diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index 8e88a82f7..763594540 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -31,6 +31,7 @@ import type { ToolCallConfirmationDetails, } from '../../tools/tools.js'; import { getInitialChatHistory } from '../../utils/environmentContext.js'; +import { FinishReason } from '@google/genai'; import type { Content, Part, @@ -475,6 +476,7 @@ export class AgentCore { let lastUsage: GenerateContentResponseUsageMetadata | undefined = undefined; let currentResponseId: string | undefined = undefined; + let wasOutputTruncated = false; for await (const streamEvent of responseStream) { if (roundAbortController.signal.aborted) { @@ -486,8 +488,16 @@ export class AgentCore { }; } - // Handle retry events + // Handle retry events — reset all per-attempt state so a successful + // retry does not inherit stale data (e.g. wasOutputTruncated) from a + // previous attempt that may have hit MAX_TOKENS. if (streamEvent.type === 'retry') { + functionCalls.length = 0; + roundText = ''; + roundThoughtText = ''; + lastUsage = undefined; + currentResponseId = undefined; + wasOutputTruncated = false; continue; } @@ -499,6 +509,9 @@ export class AgentCore { currentResponseId = resp.responseId; } if (resp.functionCalls) functionCalls.push(...resp.functionCalls); + if (resp.candidates?.[0]?.finishReason === FinishReason.MAX_TOKENS) { + wasOutputTruncated = true; + } const content = resp.candidates?.[0]?.content; const parts = content?.parts || []; for (const p of parts) { @@ -552,6 +565,7 @@ export class AgentCore { turnCounter, toolsList, currentResponseId, + wasOutputTruncated, ); } else { // No tool calls — treat this as the model's final answer. @@ -619,6 +633,7 @@ export class AgentCore { currentRound: number, toolsList: FunctionDeclaration[], responseId?: string, + wasOutputTruncated = false, ): Promise { const toolResponseParts: Part[] = []; @@ -848,6 +863,7 @@ export class AgentCore { isClientInitiated: true, prompt_id: promptId, response_id: responseId, + wasOutputTruncated, }; const description = this.getToolDescription(toolName, args); diff --git a/packages/core/src/agents/runtime/agent-headless.test.ts b/packages/core/src/agents/runtime/agent-headless.test.ts index f377a12a3..16e806c9a 100644 --- a/packages/core/src/agents/runtime/agent-headless.test.ts +++ b/packages/core/src/agents/runtime/agent-headless.test.ts @@ -48,6 +48,7 @@ import type { ToolConfig, } from './agent-types.js'; import { AgentTerminateMode } from './agent-types.js'; +import { WriteFileTool } from '../../tools/write-file.js'; vi.mock('../../core/geminiChat.js'); vi.mock('../../core/contentGenerator.js', async (importOriginal) => { @@ -1227,6 +1228,219 @@ describe('subagent.ts', () => { expect(readResult).toBeDefined(); expect(readResult!.success).toBe(true); }); + + it('should mark truncated subagent write_file calls as output-truncated errors', async () => { + const writeFileToolDef: FunctionDeclaration = { + name: WriteFileTool.Name, + description: 'Writes a file', + parameters: { type: Type.OBJECT, properties: {} }, + }; + + const { config } = await createMockConfig({ + getFunctionDeclarationsFiltered: vi + .fn() + .mockReturnValue([writeFileToolDef]), + getTool: vi.fn().mockImplementation((name: string) => { + if (name === WriteFileTool.Name) { + return new WriteFileTool(config); + } + return undefined; + }), + }); + + const toolConfig: ToolConfig = { tools: [WriteFileTool.Name] }; + const toolResultEvents: AgentToolResultEvent[] = []; + const eventEmitter = new AgentEventEmitter(); + eventEmitter.on(AgentEventType.TOOL_RESULT, (event: unknown) => { + toolResultEvents.push(event as AgentToolResultEvent); + }); + + mockSendMessageStream.mockImplementation(async () => + (async function* () { + yield { + type: 'chunk', + value: { + functionCalls: [ + { + id: 'call_write', + name: WriteFileTool.Name, + args: { file_path: '/tmp/truncated.txt' }, + }, + ], + }, + }; + yield { + type: 'chunk', + value: { + candidates: [ + { + finishReason: 'MAX_TOKENS', + content: { parts: [] }, + }, + ], + }, + }; + yield { + type: 'chunk', + value: { + candidates: [ + { + content: { + parts: [{ text: 'done' }], + }, + }, + ], + }, + }; + })(), + ); + + const scope = await AgentHeadless.create( + 'test-agent', + config, + promptConfig, + defaultModelConfig, + defaultRunConfig, + toolConfig, + eventEmitter, + ); + + await scope.execute(new ContextState()); + + const writeResult = toolResultEvents.find( + (event) => event.name === WriteFileTool.Name, + ); + expect(writeResult).toBeDefined(); + expect(writeResult!.success).toBe(false); + expect(writeResult!.error).toContain( + 'truncated due to max_tokens limit', + ); + expect(writeResult!.error).toContain( + 'rejected to prevent writing truncated content', + ); + expect(writeResult!.error).not.toContain( + "params must have required property 'content'", + ); + }); + + it('should NOT reject write_file when truncated attempt is followed by successful retry', async () => { + const writeFileToolDef: FunctionDeclaration = { + name: WriteFileTool.Name, + description: 'Writes a file', + parameters: { type: Type.OBJECT, properties: {} }, + }; + + const { config } = await createMockConfig({ + getFunctionDeclarationsFiltered: vi + .fn() + .mockReturnValue([writeFileToolDef]), + getTool: vi.fn().mockImplementation((name: string) => { + if (name === WriteFileTool.Name) { + return new WriteFileTool(config); + } + return undefined; + }), + }); + + const toolConfig: ToolConfig = { tools: [WriteFileTool.Name] }; + const toolResultEvents: AgentToolResultEvent[] = []; + const eventEmitter = new AgentEventEmitter(); + eventEmitter.on(AgentEventType.TOOL_RESULT, (event: unknown) => { + toolResultEvents.push(event as AgentToolResultEvent); + }); + + // First call: truncated (MAX_TOKENS). Retry resets state, second call: + // complete write_file. The scheduler should see wasOutputTruncated=false + // for the retried response and allow the tool to proceed. + let callCount = 0; + mockSendMessageStream.mockImplementation(async () => { + callCount++; + if (callCount === 1) { + // First round: truncated response with incomplete write_file args + return (async function* () { + yield { + type: 'chunk', + value: { + functionCalls: [ + { + id: 'call_write_truncated', + name: WriteFileTool.Name, + args: { file_path: '/tmp/retry-test.txt' }, + }, + ], + }, + }; + yield { + type: 'retry', + }; + // After retry, complete response with all required args + yield { + type: 'chunk', + value: { + functionCalls: [ + { + id: 'call_write_complete', + name: WriteFileTool.Name, + args: { + file_path: '/tmp/retry-test.txt', + content: 'hello', + }, + }, + ], + }, + }; + yield { + type: 'chunk', + value: { + candidates: [ + { finishReason: 'STOP', content: { parts: [] } }, + ], + }, + }; + })(); + } + // Second round: plain text response to end the agent loop + return (async function* () { + yield { + type: 'chunk', + value: { + candidates: [ + { + finishReason: 'STOP', + content: { parts: [{ text: 'done' }] }, + }, + ], + }, + }; + })(); + }); + + const scope = await AgentHeadless.create( + 'test-agent', + config, + promptConfig, + defaultModelConfig, + defaultRunConfig, + toolConfig, + eventEmitter, + ); + + await scope.execute(new ContextState()); + + const writeResult = toolResultEvents.find( + (event) => event.name === WriteFileTool.Name, + ); + expect(writeResult).toBeDefined(); + // After retry the wasOutputTruncated flag must have been cleared, so + // the call should NOT be rejected with a truncation error — even if + // execution fails for unrelated reasons (e.g. mock filesystem). + expect(writeResult!.error).not.toContain( + 'truncated due to max_tokens limit', + ); + expect(writeResult!.error).not.toContain( + 'rejected to prevent writing truncated content', + ); + }); }); }); }); diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 1c2f9fa2a..1c917df12 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -7,6 +7,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { Mock } from 'vitest'; import type { + AnyDeclarativeTool, Config, ToolCallConfirmationDetails, ToolConfirmationPayload, @@ -43,6 +44,7 @@ import type { HookExecutionResponse } from '../confirmation-bus/types.js'; import { type NotificationType } from '../hooks/types.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; import { IdeClient } from '../ide/ide-client.js'; +import { WriteFileTool } from '../tools/write-file.js'; vi.mock('fs/promises', () => ({ writeFile: vi.fn(), @@ -1822,7 +1824,7 @@ describe('CoreToolScheduler request queueing', () => { describe('CoreToolScheduler truncated output protection', () => { function createTruncationTestScheduler( - tool: TestApprovalTool | MockTool, + tool: AnyDeclarativeTool, toolNames: string[], ) { const onAllToolCallsComplete = vi.fn(); @@ -1990,6 +1992,59 @@ describe('CoreToolScheduler truncated output protection', () => { // Non-Edit tools should still execute even when output was truncated expect(completedCalls[0].status).toBe('success'); }); + + it('should prefer truncation rejection over validation errors for truncated write_file calls', async () => { + const writeFileConfig = { + getProjectRoot: () => '/tmp', + getTargetDir: () => '/tmp', + getFileSystemService: () => ({ + readTextFile: vi.fn(), + writeTextFile: vi.fn(), + }), + getDefaultFileEncoding: () => undefined, + setApprovalMode: vi.fn(), + } as unknown as Config; + const writeFileTool = new WriteFileTool(writeFileConfig); + const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler( + writeFileTool, + [WriteFileTool.Name], + ); + + await scheduler.schedule( + [ + { + callId: '1', + name: WriteFileTool.Name, + args: { file_path: '/tmp/test.txt' }, + isClientInitiated: false, + prompt_id: 'prompt-id-write-file-truncated', + wasOutputTruncated: true, + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(1); + const completedCall = completedCalls[0]; + expect(completedCall.status).toBe('error'); + + if (completedCall.status === 'error') { + const errorMessage = completedCall.response.error?.message; + expect(errorMessage).toContain('truncated due to max_tokens limit'); + expect(errorMessage).toContain( + 'rejected to prevent writing truncated content', + ); + expect(errorMessage).not.toContain( + "params must have required property 'content'", + ); + } + }); }); describe('CoreToolScheduler Sequential Execution', () => { diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 78bccb21c..09a6c4ccf 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -890,6 +890,24 @@ export class CoreToolScheduler { continue; } + // Reject file-modifying calls when truncated to prevent + // writing incomplete content, even if params failed schema validation. + if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) { + const truncationError = new Error(TRUNCATION_EDIT_REJECTION); + newToolCalls.push({ + status: 'error', + request: reqInfo, + tool: toolInstance, + response: createErrorResponse( + reqInfo, + truncationError, + ToolErrorType.OUTPUT_TRUNCATED, + ), + durationMs: 0, + }); + continue; + } + const invocationOrError = this.buildInvocation( toolInstance, reqInfo.args, @@ -936,24 +954,6 @@ export class CoreToolScheduler { // Reset all validation retry counters for this tool since it passed validation this.clearRetryCountsForTool(reqInfo.name); - // Reject file-modifying calls when truncated to prevent - // writing incomplete content. - if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) { - const truncationError = new Error(TRUNCATION_EDIT_REJECTION); - newToolCalls.push({ - status: 'error', - request: reqInfo, - tool: toolInstance, - response: createErrorResponse( - reqInfo, - truncationError, - ToolErrorType.OUTPUT_TRUNCATED, - ), - durationMs: 0, - }); - continue; - } - newToolCalls.push({ status: 'validating', request: reqInfo,