From a6cbb8e1142f93bf124d8baea7e7cdcea052084e Mon Sep 17 00:00:00 2001 From: SunDapeng Date: Sat, 28 Feb 2026 15:47:25 +0800 Subject: [PATCH] fix: detect and protect against truncated tool call output When LLM streaming output exceeds token limits, JSON arguments for tool calls can be truncated mid-stream. This causes validation errors or silent data corruption when the truncated JSON passes validation but writes incomplete files. The fix adds truncation detection at the streaming parser level and overrides misleading finish_reason values from providers (e.g., DashScope/Qwen reporting 'stop' instead of 'length'). This ensures downstream code correctly identifies truncated responses and provides clear guidance to the LLM for retrying with split content. Changes: - turn.ts: Add wasOutputTruncated flag to ToolCallRequestInfo - coreToolScheduler.ts: Reject truncated edit tool calls, append guidance for write_file - converter.ts: Override finish_reason when streaming parser detects incomplete JSON - streamingToolCallParser.ts: Add hasIncompleteToolCalls() method - Tests: Add comprehensive test coverage for truncation detection scenarios Co-authored-by: Qwen-Coder --- .../core/src/core/coreToolScheduler.test.ts | 169 +++++++++++++ packages/core/src/core/coreToolScheduler.ts | 42 +++- .../openaiContentGenerator/converter.test.ts | 235 ++++++++++++++++++ .../core/openaiContentGenerator/converter.ts | 18 +- .../streamingToolCallParser.test.ts | 66 +++++ .../streamingToolCallParser.ts | 28 +++ packages/core/src/core/turn.test.ts | 137 ++++++++++ packages/core/src/core/turn.ts | 24 +- packages/core/src/tools/tool-error.ts | 3 + 9 files changed, 712 insertions(+), 10 deletions(-) diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 4a19aec2f..1f810430f 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -1859,6 +1859,175 @@ describe('CoreToolScheduler request queueing', () => { }); }); +describe('CoreToolScheduler truncated output protection', () => { + function createTruncationTestScheduler( + tool: TestApprovalTool | MockTool, + toolNames: string[], + ) { + const onAllToolCallsComplete = vi.fn(); + const onToolCallsUpdate = vi.fn(); + + const mockToolRegistry = { + getTool: () => tool, + getAllToolNames: () => toolNames, + getFunctionDeclarations: () => [], + tools: new Map(), + } as unknown as ToolRegistry; + + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.AUTO_EDIT, + getAllowedTools: () => [], + getExcludeTools: () => undefined, + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { + getProjectTempDir: () => '/tmp', + }, + getTruncateToolOutputThreshold: () => + DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, + getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + getChatRecordingService: () => undefined, + isInteractive: () => true, + } as unknown as Config; + + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete, + onToolCallsUpdate, + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + + return { scheduler, onAllToolCallsComplete }; + } + + it('should reject Kind.Edit tool calls when wasOutputTruncated is true', async () => { + const declarativeTool = new TestApprovalTool({ + getApprovalMode: () => ApprovalMode.AUTO_EDIT, + } as unknown as Config); + const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler( + declarativeTool, + [TestApprovalTool.Name], + ); + + await scheduler.schedule( + [ + { + callId: '1', + name: TestApprovalTool.Name, + args: { id: 'test-truncated' }, + isClientInitiated: false, + prompt_id: 'prompt-id-truncated', + wasOutputTruncated: true, + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(1); + const completedCall = completedCalls[0]; + expect(completedCall.status).toBe('error'); + + if (completedCall.status === 'error') { + const errorMessage = completedCall.response.error?.message; + expect(errorMessage).toContain('truncated due to max_tokens limit'); + expect(errorMessage).toContain( + 'rejected to prevent writing truncated content', + ); + } + }); + + it('should allow Kind.Edit tool calls when wasOutputTruncated is false', async () => { + const declarativeTool = new TestApprovalTool({ + getApprovalMode: () => ApprovalMode.AUTO_EDIT, + } as unknown as Config); + const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler( + declarativeTool, + [TestApprovalTool.Name], + ); + + await scheduler.schedule( + [ + { + callId: '1', + name: TestApprovalTool.Name, + args: { id: 'test-normal' }, + isClientInitiated: false, + prompt_id: 'prompt-id-normal', + wasOutputTruncated: false, + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(1); + // Should succeed (not error) since wasOutputTruncated is false + expect(completedCalls[0].status).toBe('success'); + }); + + it('should allow non-Edit tools when wasOutputTruncated is true', async () => { + const mockTool = new MockTool({ + name: 'mockReadTool', + execute: async () => ({ + llmContent: 'read result', + returnDisplay: 'read result', + }), + }); + const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler( + mockTool, + ['mockReadTool'], + ); + + await scheduler.schedule( + [ + { + callId: '1', + name: 'mockReadTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-read-truncated', + wasOutputTruncated: true, + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const completedCalls = onAllToolCallsComplete.mock + .calls[0][0] as ToolCall[]; + expect(completedCalls).toHaveLength(1); + // Non-Edit tools should still execute even when output was truncated + expect(completedCalls[0].status).toBe('success'); + }); +}); + describe('CoreToolScheduler Sequential Execution', () => { it('should execute tool calls in a batch sequentially', async () => { // Arrange diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index fc0455a8a..9f330ad20 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -32,6 +32,7 @@ import { logToolOutputTruncated, ToolOutputTruncatedEvent, InputFormat, + Kind, SkillTool, } from '../index.js'; import type { @@ -55,6 +56,23 @@ import levenshtein from 'fast-levenshtein'; import { getPlanModeSystemReminder } from './prompts.js'; import { ShellToolInvocation } from '../tools/shell.js'; +const TRUNCATION_PARAM_GUIDANCE = + 'Note: Your previous response was truncated due to max_tokens limit, ' + + 'which likely caused incomplete tool call parameters. ' + + 'Please retry the tool call with complete parameters. ' + + 'If the content is too large for a single response, ' + + 'consider splitting it into smaller parts.'; + +const TRUNCATION_EDIT_REJECTION = + 'Your previous response was truncated due to max_tokens limit, ' + + 'which likely produced incomplete file content. ' + + 'The tool call has been rejected to prevent writing ' + + 'truncated content to the file. ' + + 'Please retry the tool call with complete content. ' + + 'If the content is too large for a single response, ' + + 'consider splitting it into smaller parts ' + + '(e.g., write_file for initial content, then edit for additions).'; + export type ValidatingToolCall = { status: 'validating'; request: ToolCallRequestInfo; @@ -773,19 +791,41 @@ export class CoreToolScheduler { reqInfo.args, ); if (invocationOrError instanceof Error) { + const error = reqInfo.wasOutputTruncated + ? new Error( + `${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`, + ) + : invocationOrError; return { status: 'error', request: reqInfo, tool: toolInstance, response: createErrorResponse( reqInfo, - invocationOrError, + error, ToolErrorType.INVALID_TOOL_PARAMS, ), durationMs: 0, }; } + // Reject file-modifying calls when truncated to prevent + // writing incomplete content. + if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) { + const truncationError = new Error(TRUNCATION_EDIT_REJECTION); + return { + status: 'error', + request: reqInfo, + tool: toolInstance, + response: createErrorResponse( + reqInfo, + truncationError, + ToolErrorType.OUTPUT_TRUNCATED, + ), + durationMs: 0, + }; + } + return { status: 'validating', request: reqInfo, diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts index 36bbc812d..32b1f5440 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.test.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts @@ -9,6 +9,7 @@ import { OpenAIContentConverter } from './converter.js'; import type { StreamingToolCallParser } from './streamingToolCallParser.js'; import { Type, + FinishReason, type GenerateContentParameters, type Content, type Part, @@ -1957,3 +1958,237 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', () expect(contentArray[1].image_url?.url).toContain('data:image/png'); }); }); + +describe('Truncated tool call detection in streaming', () => { + let converter: OpenAIContentConverter; + + beforeEach(() => { + converter = new OpenAIContentConverter('test-model'); + }); + + /** + * Helper: feed streaming chunks then a final chunk with finish_reason, + * and return the Gemini response for the final chunk. + */ + function feedToolCallChunks( + conv: OpenAIContentConverter, + toolCallChunks: Array<{ + index: number; + id?: string; + name?: string; + arguments: string; + }>, + finishReason: string, + ) { + // Feed argument chunks (no finish_reason yet) + for (const tc of toolCallChunks) { + conv.convertOpenAIChunkToGemini({ + object: 'chat.completion.chunk', + id: 'chunk-stream', + created: 100, + model: 'test-model', + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: tc.index, + id: tc.id, + type: 'function' as const, + function: { + name: tc.name, + arguments: tc.arguments, + }, + }, + ], + }, + finish_reason: null, + logprobs: null, + }, + ], + } as unknown as OpenAI.Chat.ChatCompletionChunk); + } + + // Final chunk with finish_reason + return conv.convertOpenAIChunkToGemini({ + object: 'chat.completion.chunk', + id: 'chunk-final', + created: 101, + model: 'test-model', + choices: [ + { + index: 0, + delta: {}, + finish_reason: finishReason, + logprobs: null, + }, + ], + } as unknown as OpenAI.Chat.ChatCompletionChunk); + } + + it('should override finishReason to MAX_TOKENS when tool call JSON is truncated and provider reports "stop"', () => { + // Simulate: write_file call truncated mid-JSON, provider says "stop" + const result = feedToolCallChunks( + converter, + [ + { + index: 0, + id: 'call_1', + name: 'write_file', + arguments: '{"file_path": "/tmp/test.cpp"', + // Missing closing brace and content field — truncated + }, + ], + 'stop', + ); + + expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS); + }); + + it('should override finishReason to MAX_TOKENS when provider reports "tool_calls" but JSON is truncated', () => { + const result = feedToolCallChunks( + converter, + [ + { + index: 0, + id: 'call_1', + name: 'write_file', + arguments: '{"file_path": "/tmp/test.cpp", "content": "partial content', + // Truncated mid-string + }, + ], + 'tool_calls', + ); + + expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS); + }); + + it('should preserve finishReason STOP when tool call JSON is complete', () => { + const result = feedToolCallChunks( + converter, + [ + { + index: 0, + id: 'call_1', + name: 'write_file', + arguments: '{"file_path": "/tmp/test.cpp", "content": "hello"}', + }, + ], + 'stop', + ); + + expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.STOP); + }); + + it('should preserve finishReason MAX_TOKENS when provider already reports "length"', () => { + const result = feedToolCallChunks( + converter, + [ + { + index: 0, + id: 'call_1', + name: 'write_file', + arguments: '{"file_path": "/tmp/test.cpp"', + }, + ], + 'length', + ); + + expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS); + }); + + it('should still emit the (repaired) function call even when truncated', () => { + const result = feedToolCallChunks( + converter, + [ + { + index: 0, + id: 'call_1', + name: 'write_file', + arguments: '{"file_path": "/tmp/test.cpp"', + }, + ], + 'stop', + ); + + const parts = result.candidates?.[0]?.content?.parts ?? []; + const fnCall = parts.find((p: Part) => p.functionCall); + expect(fnCall).toBeDefined(); + expect(fnCall?.functionCall?.name).toBe('write_file'); + expect(fnCall?.functionCall?.args).toEqual({ + file_path: '/tmp/test.cpp', + }); + }); + + it('should detect truncation with multi-chunk streaming arguments', () => { + // Feed arguments in multiple small chunks like real streaming + const conv = new OpenAIContentConverter('test-model'); + + // Chunk 1: start of JSON with tool metadata + conv.convertOpenAIChunkToGemini({ + object: 'chat.completion.chunk', + id: 'c1', + created: 100, + model: 'test-model', + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + type: 'function' as const, + function: { name: 'write_file', arguments: '{"file_' }, + }, + ], + }, + finish_reason: null, + logprobs: null, + }, + ], + } as unknown as OpenAI.Chat.ChatCompletionChunk); + + // Chunk 2: more arguments + conv.convertOpenAIChunkToGemini({ + object: 'chat.completion.chunk', + id: 'c2', + created: 100, + model: 'test-model', + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: 'path": "/tmp/f.txt", "conten' }, + }, + ], + }, + finish_reason: null, + logprobs: null, + }, + ], + } as unknown as OpenAI.Chat.ChatCompletionChunk); + + // Final chunk: finish_reason "stop" but JSON is still incomplete + const result = conv.convertOpenAIChunkToGemini({ + object: 'chat.completion.chunk', + id: 'c3', + created: 101, + model: 'test-model', + choices: [ + { + index: 0, + delta: {}, + finish_reason: 'stop', + logprobs: null, + }, + ], + } as unknown as OpenAI.Chat.ChatCompletionChunk); + + expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS); + }); +}); diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts index 2ca7428bd..50c2ab625 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.ts @@ -911,7 +911,14 @@ export class OpenAIContentConverter { } // Only emit function calls when streaming is complete (finish_reason is present) + let toolCallsTruncated = false; if (choice.finish_reason) { + // Detect truncation the provider may not report correctly. + // Some providers (e.g. DashScope/Qwen) send "stop" or "tool_calls" + // even when output was cut off mid-JSON due to max_tokens. + toolCallsTruncated = + this.streamingToolCallParser.hasIncompleteToolCalls(); + const completedToolCalls = this.streamingToolCallParser.getCompletedToolCalls(); @@ -933,6 +940,13 @@ export class OpenAIContentConverter { this.streamingToolCallParser.reset(); } + // If tool call JSON was truncated, override to "length" so downstream + // (turn.ts) correctly sets wasOutputTruncated=true. + const effectiveFinishReason = + toolCallsTruncated && choice.finish_reason !== 'length' + ? 'length' + : choice.finish_reason; + // Only include finishReason key if finish_reason is present const candidate: Candidate = { content: { @@ -942,9 +956,9 @@ export class OpenAIContentConverter { index: 0, safetyRatings: [], }; - if (choice.finish_reason) { + if (effectiveFinishReason) { candidate.finishReason = this.mapOpenAIFinishReasonToGemini( - choice.finish_reason, + effectiveFinishReason, ); } response.candidates = [candidate]; diff --git a/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.test.ts b/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.test.ts index 14da87d7e..dc4d696d5 100644 --- a/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.test.ts +++ b/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.test.ts @@ -790,4 +790,70 @@ describe('StreamingToolCallParser', () => { expect(call2?.args).toEqual({ param2: 'value2' }); }); }); + + describe('hasIncompleteToolCalls', () => { + it('should return false when no tool calls exist', () => { + expect(parser.hasIncompleteToolCalls()).toBe(false); + }); + + it('should return false when all tool calls have complete JSON', () => { + parser.addChunk(0, '{"key": "value"}', 'call_1', 'write_file'); + expect(parser.hasIncompleteToolCalls()).toBe(false); + }); + + it('should return true when a tool call has depth > 0 (unclosed braces)', () => { + parser.addChunk( + 0, + '{"file_path": "/tmp/test.txt", "content": "partial', + 'call_1', + 'write_file', + ); + expect(parser.hasIncompleteToolCalls()).toBe(true); + }); + + it('should return true when a tool call is inside a string literal', () => { + // Simulate truncation mid-string: {"file_path": "/tmp/test.txt", "content": "some text + parser.addChunk(0, '{"file_path": "/tmp/test.txt"', 'call_1', 'write_file'); + parser.addChunk(0, ', "content": "some text'); + const state = parser.getState(0); + expect(state.inString).toBe(true); + expect(parser.hasIncompleteToolCalls()).toBe(true); + }); + + it('should return false for tool calls without name metadata', () => { + // Tool calls without a name should be ignored + parser.addChunk(0, '{"key": "incomplete', undefined, undefined); + expect(parser.hasIncompleteToolCalls()).toBe(false); + }); + + it('should detect incomplete among multiple tool calls', () => { + // First tool call is complete + parser.addChunk(0, '{"key": "value"}', 'call_1', 'func_a'); + // Second tool call is incomplete + parser.addChunk(1, '{"key": "val', 'call_2', 'func_b'); + expect(parser.hasIncompleteToolCalls()).toBe(true); + }); + + it('should return false after reset', () => { + parser.addChunk(0, '{"key": "incomplete', 'call_1', 'write_file'); + expect(parser.hasIncompleteToolCalls()).toBe(true); + parser.reset(); + expect(parser.hasIncompleteToolCalls()).toBe(false); + }); + + it('should detect real-world truncation: write_file with only file_path', () => { + // Reproduces the actual bug: LLM output truncated mid-JSON, + // only file_path key received, content never arrived. + // Buffer: {"file_path": "/path/to/file.cpp" + // depth=1 because outer brace is unclosed + parser.addChunk( + 0, + '{"file_path": "/path/to/file.cpp"', + 'call_1', + 'write_file', + ); + expect(parser.hasIncompleteToolCalls()).toBe(true); + expect(parser.getState(0).depth).toBe(1); + }); + }); }); diff --git a/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.ts b/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.ts index 31fe75283..19a659ab3 100644 --- a/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.ts +++ b/packages/core/src/core/openaiContentGenerator/streamingToolCallParser.ts @@ -411,4 +411,32 @@ export class StreamingToolCallParser { escape: this.escapes.get(index) || false, }; } + + /** + * Checks whether any buffered tool call has incomplete JSON at stream end. + * + * A tool call is considered incomplete when its JSON parsing state indicates + * the buffer was truncated mid-stream: + * - depth > 0: unclosed braces/brackets remain + * - inString === true: still inside a string literal + * + * This is critical for detecting output truncation that the LLM provider + * may not report correctly via finish_reason (e.g. reporting "stop" or + * "tool_calls" instead of "length" when output was actually cut off). + * + * @returns true if at least one tool call buffer has incomplete JSON + */ + hasIncompleteToolCalls(): boolean { + for (const [index] of this.buffers.entries()) { + const meta = this.toolCallMeta.get(index); + if (!meta?.name) continue; + + const depth = this.depths.get(index) || 0; + const inString = this.inStrings.get(index) || false; + if (depth > 0 || inString) { + return true; + } + } + return false; + } } diff --git a/packages/core/src/core/turn.test.ts b/packages/core/src/core/turn.test.ts index 7d687a17b..148a19d63 100644 --- a/packages/core/src/core/turn.test.ts +++ b/packages/core/src/core/turn.test.ts @@ -873,4 +873,141 @@ describe('Turn', () => { expect(turn.getDebugResponses()).toEqual([resp1, resp2]); }); }); + + describe('wasOutputTruncated flag', () => { + it('should set wasOutputTruncated=true on pending tool calls when finishReason is MAX_TOKENS', async () => { + const mockResponseStream = (async function* () { + // Yield a tool call request + yield { + type: StreamEventType.CHUNK, + value: { + functionCalls: [ + { + name: 'write_file', + args: { file_path: '/test.txt', content: 'hello' }, + }, + ], + } as unknown as GenerateContentResponse, + }; + // Yield finish with MAX_TOKENS + yield { + type: StreamEventType.CHUNK, + value: { + candidates: [ + { + finishReason: 'MAX_TOKENS', + content: { parts: [] }, + }, + ], + } as unknown as GenerateContentResponse, + }; + })(); + mockSendMessageStream.mockResolvedValue(mockResponseStream); + + const reqParts: Part[] = [{ text: 'Test prompt' }]; + const events = []; + for await (const event of turn.run( + 'test-model', + reqParts, + new AbortController().signal, + )) { + events.push(event); + } + + // Verify that pending tool calls have wasOutputTruncated flag set + expect(turn.pendingToolCalls).toHaveLength(1); + expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true); + expect(turn.pendingToolCalls[0].name).toBe('write_file'); + }); + + it('should NOT set wasOutputTruncated when finishReason is STOP', async () => { + const mockResponseStream = (async function* () { + yield { + type: StreamEventType.CHUNK, + value: { + functionCalls: [ + { + name: 'read_file', + args: { file_path: '/test.txt' }, + }, + ], + } as unknown as GenerateContentResponse, + }; + // Yield finish with STOP (normal completion) + yield { + type: StreamEventType.CHUNK, + value: { + candidates: [ + { + finishReason: 'STOP', + content: { parts: [] }, + }, + ], + } as unknown as GenerateContentResponse, + }; + })(); + mockSendMessageStream.mockResolvedValue(mockResponseStream); + + const reqParts: Part[] = [{ text: 'Test prompt' }]; + for await (const _ of turn.run( + 'test-model', + reqParts, + new AbortController().signal, + )) { + // consume stream + } + + // Verify that pending tool calls do NOT have wasOutputTruncated flag + expect(turn.pendingToolCalls).toHaveLength(1); + expect(turn.pendingToolCalls[0].wasOutputTruncated).toBeUndefined(); + }); + + it('should handle multiple pending tool calls with MAX_TOKENS', async () => { + const mockResponseStream = (async function* () { + // Yield two tool calls + yield { + type: StreamEventType.CHUNK, + value: { + functionCalls: [ + { + name: 'write_file', + args: { file_path: '/test1.txt', content: 'content1' }, + }, + { + name: 'edit', + args: { file_path: '/test2.txt', original_text: 'old' }, + }, + ], + } as unknown as GenerateContentResponse, + }; + // Yield finish with MAX_TOKENS + yield { + type: StreamEventType.CHUNK, + value: { + candidates: [ + { + finishReason: 'MAX_TOKENS', + content: { parts: [] }, + }, + ], + } as unknown as GenerateContentResponse, + }; + })(); + mockSendMessageStream.mockResolvedValue(mockResponseStream); + + const reqParts: Part[] = [{ text: 'Test prompt' }]; + for await (const _ of turn.run( + 'test-model', + reqParts, + new AbortController().signal, + )) { + // consume stream + } + + // Verify both tool calls have wasOutputTruncated flag set + expect(turn.pendingToolCalls).toHaveLength(2); + expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true); + expect(turn.pendingToolCalls[1].wasOutputTruncated).toBe(true); + }); + }); }); diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index 17c6c47de..99eb983de 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -4,14 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { - Part, - PartListUnion, - GenerateContentResponse, - FunctionCall, - FunctionDeclaration, +import { FinishReason, - GenerateContentResponseUsageMetadata, + type Part, + type PartListUnion, + type GenerateContentResponse, + type FunctionCall, + type FunctionDeclaration, + type GenerateContentResponseUsageMetadata, } from '@google/genai'; import type { ToolCallConfirmationDetails, @@ -98,6 +98,8 @@ export interface ToolCallRequestInfo { isClientInitiated: boolean; prompt_id: string; response_id?: string; + /** Set to true when the LLM response was truncated due to max_tokens. */ + wasOutputTruncated?: boolean; } export interface ToolCallResponseInfo { @@ -306,6 +308,14 @@ export class Turn { // This is the key change: Only yield 'Finished' if there is a finishReason. if (finishReason) { + // Mark pending tool calls so downstream can distinguish + // truncation from real parameter errors. + if (finishReason === FinishReason.MAX_TOKENS) { + for (const tc of this.pendingToolCalls) { + tc.wasOutputTruncated = true; + } + } + if (this.pendingCitations.size > 0) { yield { type: GeminiEventType.Citation, diff --git a/packages/core/src/tools/tool-error.ts b/packages/core/src/tools/tool-error.ts index a07de4777..96581602f 100644 --- a/packages/core/src/tools/tool-error.ts +++ b/packages/core/src/tools/tool-error.ts @@ -66,4 +66,7 @@ export enum ToolErrorType { // WebSearch-specific Errors WEB_SEARCH_FAILED = 'web_search_failed', + + // Truncation Errors + OUTPUT_TRUNCATED = 'output_truncated', }