Merge pull request #2021 from sundapeng/fix/truncated-tool-call-protection

fix: detect and protect against truncated tool call output
This commit is contained in:
tanzhenxin 2026-03-02 20:59:36 +08:00 committed by GitHub
commit f770be495f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 713 additions and 10 deletions

View file

@ -1859,6 +1859,175 @@ describe('CoreToolScheduler request queueing', () => {
});
});
describe('CoreToolScheduler truncated output protection', () => {
function createTruncationTestScheduler(
tool: TestApprovalTool | MockTool,
toolNames: string[],
) {
const onAllToolCallsComplete = vi.fn();
const onToolCallsUpdate = vi.fn();
const mockToolRegistry = {
getTool: () => tool,
getAllToolNames: () => toolNames,
getFunctionDeclarations: () => [],
tools: new Map(),
} as unknown as ToolRegistry;
const mockConfig = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => true,
getDebugMode: () => false,
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
getAllowedTools: () => [],
getExcludeTools: () => undefined,
getContentGeneratorConfig: () => ({
model: 'test-model',
authType: 'gemini',
}),
getShellExecutionConfig: () => ({
terminalWidth: 90,
terminalHeight: 30,
}),
storage: {
getProjectTempDir: () => '/tmp',
},
getTruncateToolOutputThreshold: () =>
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
getToolRegistry: () => mockToolRegistry,
getUseModelRouter: () => false,
getGeminiClient: () => null,
getChatRecordingService: () => undefined,
isInteractive: () => true,
} as unknown as Config;
const scheduler = new CoreToolScheduler({
config: mockConfig,
onAllToolCallsComplete,
onToolCallsUpdate,
getPreferredEditor: () => 'vscode',
onEditorClose: vi.fn(),
});
return { scheduler, onAllToolCallsComplete };
}
it('should reject Kind.Edit tool calls when wasOutputTruncated is true', async () => {
const declarativeTool = new TestApprovalTool({
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
} as unknown as Config);
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
declarativeTool,
[TestApprovalTool.Name],
);
await scheduler.schedule(
[
{
callId: '1',
name: TestApprovalTool.Name,
args: { id: 'test-truncated' },
isClientInitiated: false,
prompt_id: 'prompt-id-truncated',
wasOutputTruncated: true,
},
],
new AbortController().signal,
);
await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});
const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
const completedCall = completedCalls[0];
expect(completedCall.status).toBe('error');
if (completedCall.status === 'error') {
const errorMessage = completedCall.response.error?.message;
expect(errorMessage).toContain('truncated due to max_tokens limit');
expect(errorMessage).toContain(
'rejected to prevent writing truncated content',
);
}
});
it('should allow Kind.Edit tool calls when wasOutputTruncated is false', async () => {
const declarativeTool = new TestApprovalTool({
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
} as unknown as Config);
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
declarativeTool,
[TestApprovalTool.Name],
);
await scheduler.schedule(
[
{
callId: '1',
name: TestApprovalTool.Name,
args: { id: 'test-normal' },
isClientInitiated: false,
prompt_id: 'prompt-id-normal',
wasOutputTruncated: false,
},
],
new AbortController().signal,
);
await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});
const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
// Should succeed (not error) since wasOutputTruncated is false
expect(completedCalls[0].status).toBe('success');
});
it('should allow non-Edit tools when wasOutputTruncated is true', async () => {
const mockTool = new MockTool({
name: 'mockReadTool',
execute: async () => ({
llmContent: 'read result',
returnDisplay: 'read result',
}),
});
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
mockTool,
['mockReadTool'],
);
await scheduler.schedule(
[
{
callId: '1',
name: 'mockReadTool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-read-truncated',
wasOutputTruncated: true,
},
],
new AbortController().signal,
);
await vi.waitFor(() => {
expect(onAllToolCallsComplete).toHaveBeenCalled();
});
const completedCalls = onAllToolCallsComplete.mock
.calls[0][0] as ToolCall[];
expect(completedCalls).toHaveLength(1);
// Non-Edit tools should still execute even when output was truncated
expect(completedCalls[0].status).toBe('success');
});
});
describe('CoreToolScheduler Sequential Execution', () => {
it('should execute tool calls in a batch sequentially', async () => {
// Arrange

View file

@ -32,6 +32,7 @@ import {
logToolOutputTruncated,
ToolOutputTruncatedEvent,
InputFormat,
Kind,
SkillTool,
} from '../index.js';
import type {
@ -55,6 +56,23 @@ import levenshtein from 'fast-levenshtein';
import { getPlanModeSystemReminder } from './prompts.js';
import { ShellToolInvocation } from '../tools/shell.js';
const TRUNCATION_PARAM_GUIDANCE =
'Note: Your previous response was truncated due to max_tokens limit, ' +
'which likely caused incomplete tool call parameters. ' +
'Please retry the tool call with complete parameters. ' +
'If the content is too large for a single response, ' +
'consider splitting it into smaller parts.';
const TRUNCATION_EDIT_REJECTION =
'Your previous response was truncated due to max_tokens limit, ' +
'which likely produced incomplete file content. ' +
'The tool call has been rejected to prevent writing ' +
'truncated content to the file. ' +
'Please retry the tool call with complete content. ' +
'If the content is too large for a single response, ' +
'consider splitting it into smaller parts ' +
'(e.g., write_file for initial content, then edit for additions).';
export type ValidatingToolCall = {
status: 'validating';
request: ToolCallRequestInfo;
@ -773,19 +791,41 @@ export class CoreToolScheduler {
reqInfo.args,
);
if (invocationOrError instanceof Error) {
const error = reqInfo.wasOutputTruncated
? new Error(
`${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`,
)
: invocationOrError;
return {
status: 'error',
request: reqInfo,
tool: toolInstance,
response: createErrorResponse(
reqInfo,
invocationOrError,
error,
ToolErrorType.INVALID_TOOL_PARAMS,
),
durationMs: 0,
};
}
// Reject file-modifying calls when truncated to prevent
// writing incomplete content.
if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) {
const truncationError = new Error(TRUNCATION_EDIT_REJECTION);
return {
status: 'error',
request: reqInfo,
tool: toolInstance,
response: createErrorResponse(
reqInfo,
truncationError,
ToolErrorType.OUTPUT_TRUNCATED,
),
durationMs: 0,
};
}
return {
status: 'validating',
request: reqInfo,

View file

@ -9,6 +9,7 @@ import { OpenAIContentConverter } from './converter.js';
import type { StreamingToolCallParser } from './streamingToolCallParser.js';
import {
Type,
FinishReason,
type GenerateContentParameters,
type Content,
type Part,
@ -1968,6 +1969,241 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
});
});
describe('Truncated tool call detection in streaming', () => {
let converter: OpenAIContentConverter;
beforeEach(() => {
converter = new OpenAIContentConverter('test-model');
});
/**
* Helper: feed streaming chunks then a final chunk with finish_reason,
* and return the Gemini response for the final chunk.
*/
function feedToolCallChunks(
conv: OpenAIContentConverter,
toolCallChunks: Array<{
index: number;
id?: string;
name?: string;
arguments: string;
}>,
finishReason: string,
) {
// Feed argument chunks (no finish_reason yet)
for (const tc of toolCallChunks) {
conv.convertOpenAIChunkToGemini({
object: 'chat.completion.chunk',
id: 'chunk-stream',
created: 100,
model: 'test-model',
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: tc.index,
id: tc.id,
type: 'function' as const,
function: {
name: tc.name,
arguments: tc.arguments,
},
},
],
},
finish_reason: null,
logprobs: null,
},
],
} as unknown as OpenAI.Chat.ChatCompletionChunk);
}
// Final chunk with finish_reason
return conv.convertOpenAIChunkToGemini({
object: 'chat.completion.chunk',
id: 'chunk-final',
created: 101,
model: 'test-model',
choices: [
{
index: 0,
delta: {},
finish_reason: finishReason,
logprobs: null,
},
],
} as unknown as OpenAI.Chat.ChatCompletionChunk);
}
it('should override finishReason to MAX_TOKENS when tool call JSON is truncated and provider reports "stop"', () => {
// Simulate: write_file call truncated mid-JSON, provider says "stop"
const result = feedToolCallChunks(
converter,
[
{
index: 0,
id: 'call_1',
name: 'write_file',
arguments: '{"file_path": "/tmp/test.cpp"',
// Missing closing brace and content field — truncated
},
],
'stop',
);
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
});
it('should override finishReason to MAX_TOKENS when provider reports "tool_calls" but JSON is truncated', () => {
const result = feedToolCallChunks(
converter,
[
{
index: 0,
id: 'call_1',
name: 'write_file',
arguments:
'{"file_path": "/tmp/test.cpp", "content": "partial content',
// Truncated mid-string
},
],
'tool_calls',
);
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
});
it('should preserve finishReason STOP when tool call JSON is complete', () => {
const result = feedToolCallChunks(
converter,
[
{
index: 0,
id: 'call_1',
name: 'write_file',
arguments: '{"file_path": "/tmp/test.cpp", "content": "hello"}',
},
],
'stop',
);
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
});
it('should preserve finishReason MAX_TOKENS when provider already reports "length"', () => {
const result = feedToolCallChunks(
converter,
[
{
index: 0,
id: 'call_1',
name: 'write_file',
arguments: '{"file_path": "/tmp/test.cpp"',
},
],
'length',
);
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
});
it('should still emit the (repaired) function call even when truncated', () => {
const result = feedToolCallChunks(
converter,
[
{
index: 0,
id: 'call_1',
name: 'write_file',
arguments: '{"file_path": "/tmp/test.cpp"',
},
],
'stop',
);
const parts = result.candidates?.[0]?.content?.parts ?? [];
const fnCall = parts.find((p: Part) => p.functionCall);
expect(fnCall).toBeDefined();
expect(fnCall?.functionCall?.name).toBe('write_file');
expect(fnCall?.functionCall?.args).toEqual({
file_path: '/tmp/test.cpp',
});
});
it('should detect truncation with multi-chunk streaming arguments', () => {
// Feed arguments in multiple small chunks like real streaming
const conv = new OpenAIContentConverter('test-model');
// Chunk 1: start of JSON with tool metadata
conv.convertOpenAIChunkToGemini({
object: 'chat.completion.chunk',
id: 'c1',
created: 100,
model: 'test-model',
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
id: 'call_1',
type: 'function' as const,
function: { name: 'write_file', arguments: '{"file_' },
},
],
},
finish_reason: null,
logprobs: null,
},
],
} as unknown as OpenAI.Chat.ChatCompletionChunk);
// Chunk 2: more arguments
conv.convertOpenAIChunkToGemini({
object: 'chat.completion.chunk',
id: 'c2',
created: 100,
model: 'test-model',
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
function: { arguments: 'path": "/tmp/f.txt", "conten' },
},
],
},
finish_reason: null,
logprobs: null,
},
],
} as unknown as OpenAI.Chat.ChatCompletionChunk);
// Final chunk: finish_reason "stop" but JSON is still incomplete
const result = conv.convertOpenAIChunkToGemini({
object: 'chat.completion.chunk',
id: 'c3',
created: 101,
model: 'test-model',
choices: [
{
index: 0,
delta: {},
finish_reason: 'stop',
logprobs: null,
},
],
} as unknown as OpenAI.Chat.ChatCompletionChunk);
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
});
});
describe('modality filtering', () => {
function makeRequest(parts: Part[]): GenerateContentParameters {
return {

View file

@ -973,7 +973,14 @@ export class OpenAIContentConverter {
}
// Only emit function calls when streaming is complete (finish_reason is present)
let toolCallsTruncated = false;
if (choice.finish_reason) {
// Detect truncation the provider may not report correctly.
// Some providers (e.g. DashScope/Qwen) send "stop" or "tool_calls"
// even when output was cut off mid-JSON due to max_tokens.
toolCallsTruncated =
this.streamingToolCallParser.hasIncompleteToolCalls();
const completedToolCalls =
this.streamingToolCallParser.getCompletedToolCalls();
@ -995,6 +1002,13 @@ export class OpenAIContentConverter {
this.streamingToolCallParser.reset();
}
// If tool call JSON was truncated, override to "length" so downstream
// (turn.ts) correctly sets wasOutputTruncated=true.
const effectiveFinishReason =
toolCallsTruncated && choice.finish_reason !== 'length'
? 'length'
: choice.finish_reason;
// Only include finishReason key if finish_reason is present
const candidate: Candidate = {
content: {
@ -1004,9 +1018,9 @@ export class OpenAIContentConverter {
index: 0,
safetyRatings: [],
};
if (choice.finish_reason) {
if (effectiveFinishReason) {
candidate.finishReason = this.mapOpenAIFinishReasonToGemini(
choice.finish_reason,
effectiveFinishReason,
);
}
response.candidates = [candidate];

View file

@ -790,4 +790,70 @@ describe('StreamingToolCallParser', () => {
expect(call2?.args).toEqual({ param2: 'value2' });
});
});
describe('hasIncompleteToolCalls', () => {
it('should return false when no tool calls exist', () => {
expect(parser.hasIncompleteToolCalls()).toBe(false);
});
it('should return false when all tool calls have complete JSON', () => {
parser.addChunk(0, '{"key": "value"}', 'call_1', 'write_file');
expect(parser.hasIncompleteToolCalls()).toBe(false);
});
it('should return true when a tool call has depth > 0 (unclosed braces)', () => {
parser.addChunk(
0,
'{"file_path": "/tmp/test.txt", "content": "partial',
'call_1',
'write_file',
);
expect(parser.hasIncompleteToolCalls()).toBe(true);
});
it('should return true when a tool call is inside a string literal', () => {
// Simulate truncation mid-string: {"file_path": "/tmp/test.txt", "content": "some text
parser.addChunk(0, '{"file_path": "/tmp/test.txt"', 'call_1', 'write_file');
parser.addChunk(0, ', "content": "some text');
const state = parser.getState(0);
expect(state.inString).toBe(true);
expect(parser.hasIncompleteToolCalls()).toBe(true);
});
it('should return false for tool calls without name metadata', () => {
// Tool calls without a name should be ignored
parser.addChunk(0, '{"key": "incomplete', undefined, undefined);
expect(parser.hasIncompleteToolCalls()).toBe(false);
});
it('should detect incomplete among multiple tool calls', () => {
// First tool call is complete
parser.addChunk(0, '{"key": "value"}', 'call_1', 'func_a');
// Second tool call is incomplete
parser.addChunk(1, '{"key": "val', 'call_2', 'func_b');
expect(parser.hasIncompleteToolCalls()).toBe(true);
});
it('should return false after reset', () => {
parser.addChunk(0, '{"key": "incomplete', 'call_1', 'write_file');
expect(parser.hasIncompleteToolCalls()).toBe(true);
parser.reset();
expect(parser.hasIncompleteToolCalls()).toBe(false);
});
it('should detect real-world truncation: write_file with only file_path', () => {
// Reproduces the actual bug: LLM output truncated mid-JSON,
// only file_path key received, content never arrived.
// Buffer: {"file_path": "/path/to/file.cpp"
// depth=1 because outer brace is unclosed
parser.addChunk(
0,
'{"file_path": "/path/to/file.cpp"',
'call_1',
'write_file',
);
expect(parser.hasIncompleteToolCalls()).toBe(true);
expect(parser.getState(0).depth).toBe(1);
});
});
});

View file

@ -411,4 +411,32 @@ export class StreamingToolCallParser {
escape: this.escapes.get(index) || false,
};
}
/**
* Checks whether any buffered tool call has incomplete JSON at stream end.
*
* A tool call is considered incomplete when its JSON parsing state indicates
* the buffer was truncated mid-stream:
* - depth > 0: unclosed braces/brackets remain
* - inString === true: still inside a string literal
*
* This is critical for detecting output truncation that the LLM provider
* may not report correctly via finish_reason (e.g. reporting "stop" or
* "tool_calls" instead of "length" when output was actually cut off).
*
* @returns true if at least one tool call buffer has incomplete JSON
*/
hasIncompleteToolCalls(): boolean {
for (const [index] of this.buffers.entries()) {
const meta = this.toolCallMeta.get(index);
if (!meta?.name) continue;
const depth = this.depths.get(index) || 0;
const inString = this.inStrings.get(index) || false;
if (depth > 0 || inString) {
return true;
}
}
return false;
}
}

View file

@ -873,4 +873,141 @@ describe('Turn', () => {
expect(turn.getDebugResponses()).toEqual([resp1, resp2]);
});
});
describe('wasOutputTruncated flag', () => {
it('should set wasOutputTruncated=true on pending tool calls when finishReason is MAX_TOKENS', async () => {
const mockResponseStream = (async function* () {
// Yield a tool call request
yield {
type: StreamEventType.CHUNK,
value: {
functionCalls: [
{
name: 'write_file',
args: { file_path: '/test.txt', content: 'hello' },
},
],
} as unknown as GenerateContentResponse,
};
// Yield finish with MAX_TOKENS
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
finishReason: 'MAX_TOKENS',
content: { parts: [] },
},
],
} as unknown as GenerateContentResponse,
};
})();
mockSendMessageStream.mockResolvedValue(mockResponseStream);
const reqParts: Part[] = [{ text: 'Test prompt' }];
const events = [];
for await (const event of turn.run(
'test-model',
reqParts,
new AbortController().signal,
)) {
events.push(event);
}
// Verify that pending tool calls have wasOutputTruncated flag set
expect(turn.pendingToolCalls).toHaveLength(1);
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true);
expect(turn.pendingToolCalls[0].name).toBe('write_file');
});
it('should NOT set wasOutputTruncated when finishReason is STOP', async () => {
const mockResponseStream = (async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
functionCalls: [
{
name: 'read_file',
args: { file_path: '/test.txt' },
},
],
} as unknown as GenerateContentResponse,
};
// Yield finish with STOP (normal completion)
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
finishReason: 'STOP',
content: { parts: [] },
},
],
} as unknown as GenerateContentResponse,
};
})();
mockSendMessageStream.mockResolvedValue(mockResponseStream);
const reqParts: Part[] = [{ text: 'Test prompt' }];
for await (const _ of turn.run(
'test-model',
reqParts,
new AbortController().signal,
)) {
// consume stream
}
// Verify that pending tool calls do NOT have wasOutputTruncated flag
expect(turn.pendingToolCalls).toHaveLength(1);
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBeUndefined();
});
it('should handle multiple pending tool calls with MAX_TOKENS', async () => {
const mockResponseStream = (async function* () {
// Yield two tool calls
yield {
type: StreamEventType.CHUNK,
value: {
functionCalls: [
{
name: 'write_file',
args: { file_path: '/test1.txt', content: 'content1' },
},
{
name: 'edit',
args: { file_path: '/test2.txt', original_text: 'old' },
},
],
} as unknown as GenerateContentResponse,
};
// Yield finish with MAX_TOKENS
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
finishReason: 'MAX_TOKENS',
content: { parts: [] },
},
],
} as unknown as GenerateContentResponse,
};
})();
mockSendMessageStream.mockResolvedValue(mockResponseStream);
const reqParts: Part[] = [{ text: 'Test prompt' }];
for await (const _ of turn.run(
'test-model',
reqParts,
new AbortController().signal,
)) {
// consume stream
}
// Verify both tool calls have wasOutputTruncated flag set
expect(turn.pendingToolCalls).toHaveLength(2);
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true);
expect(turn.pendingToolCalls[1].wasOutputTruncated).toBe(true);
});
});
});

View file

@ -4,14 +4,14 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type {
Part,
PartListUnion,
GenerateContentResponse,
FunctionCall,
FunctionDeclaration,
import {
FinishReason,
GenerateContentResponseUsageMetadata,
type Part,
type PartListUnion,
type GenerateContentResponse,
type FunctionCall,
type FunctionDeclaration,
type GenerateContentResponseUsageMetadata,
} from '@google/genai';
import type {
ToolCallConfirmationDetails,
@ -98,6 +98,8 @@ export interface ToolCallRequestInfo {
isClientInitiated: boolean;
prompt_id: string;
response_id?: string;
/** Set to true when the LLM response was truncated due to max_tokens. */
wasOutputTruncated?: boolean;
}
export interface ToolCallResponseInfo {
@ -306,6 +308,14 @@ export class Turn {
// This is the key change: Only yield 'Finished' if there is a finishReason.
if (finishReason) {
// Mark pending tool calls so downstream can distinguish
// truncation from real parameter errors.
if (finishReason === FinishReason.MAX_TOKENS) {
for (const tc of this.pendingToolCalls) {
tc.wasOutputTruncated = true;
}
}
if (this.pendingCitations.size > 0) {
yield {
type: GeminiEventType.Citation,

View file

@ -66,4 +66,7 @@ export enum ToolErrorType {
// WebSearch-specific Errors
WEB_SEARCH_FAILED = 'web_search_failed',
// Truncation Errors
OUTPUT_TRUNCATED = 'output_truncated',
}