mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-10 03:59:33 +00:00
Merge pull request #2021 from sundapeng/fix/truncated-tool-call-protection
fix: detect and protect against truncated tool call output
This commit is contained in:
commit
f770be495f
9 changed files with 713 additions and 10 deletions
|
|
@ -1859,6 +1859,175 @@ describe('CoreToolScheduler request queueing', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('CoreToolScheduler truncated output protection', () => {
|
||||
function createTruncationTestScheduler(
|
||||
tool: TestApprovalTool | MockTool,
|
||||
toolNames: string[],
|
||||
) {
|
||||
const onAllToolCallsComplete = vi.fn();
|
||||
const onToolCallsUpdate = vi.fn();
|
||||
|
||||
const mockToolRegistry = {
|
||||
getTool: () => tool,
|
||||
getAllToolNames: () => toolNames,
|
||||
getFunctionDeclarations: () => [],
|
||||
tools: new Map(),
|
||||
} as unknown as ToolRegistry;
|
||||
|
||||
const mockConfig = {
|
||||
getSessionId: () => 'test-session-id',
|
||||
getUsageStatisticsEnabled: () => true,
|
||||
getDebugMode: () => false,
|
||||
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
|
||||
getAllowedTools: () => [],
|
||||
getExcludeTools: () => undefined,
|
||||
getContentGeneratorConfig: () => ({
|
||||
model: 'test-model',
|
||||
authType: 'gemini',
|
||||
}),
|
||||
getShellExecutionConfig: () => ({
|
||||
terminalWidth: 90,
|
||||
terminalHeight: 30,
|
||||
}),
|
||||
storage: {
|
||||
getProjectTempDir: () => '/tmp',
|
||||
},
|
||||
getTruncateToolOutputThreshold: () =>
|
||||
DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
|
||||
getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
|
||||
getToolRegistry: () => mockToolRegistry,
|
||||
getUseModelRouter: () => false,
|
||||
getGeminiClient: () => null,
|
||||
getChatRecordingService: () => undefined,
|
||||
isInteractive: () => true,
|
||||
} as unknown as Config;
|
||||
|
||||
const scheduler = new CoreToolScheduler({
|
||||
config: mockConfig,
|
||||
onAllToolCallsComplete,
|
||||
onToolCallsUpdate,
|
||||
getPreferredEditor: () => 'vscode',
|
||||
onEditorClose: vi.fn(),
|
||||
});
|
||||
|
||||
return { scheduler, onAllToolCallsComplete };
|
||||
}
|
||||
|
||||
it('should reject Kind.Edit tool calls when wasOutputTruncated is true', async () => {
|
||||
const declarativeTool = new TestApprovalTool({
|
||||
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
|
||||
} as unknown as Config);
|
||||
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
|
||||
declarativeTool,
|
||||
[TestApprovalTool.Name],
|
||||
);
|
||||
|
||||
await scheduler.schedule(
|
||||
[
|
||||
{
|
||||
callId: '1',
|
||||
name: TestApprovalTool.Name,
|
||||
args: { id: 'test-truncated' },
|
||||
isClientInitiated: false,
|
||||
prompt_id: 'prompt-id-truncated',
|
||||
wasOutputTruncated: true,
|
||||
},
|
||||
],
|
||||
new AbortController().signal,
|
||||
);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(onAllToolCallsComplete).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
const completedCalls = onAllToolCallsComplete.mock
|
||||
.calls[0][0] as ToolCall[];
|
||||
expect(completedCalls).toHaveLength(1);
|
||||
const completedCall = completedCalls[0];
|
||||
expect(completedCall.status).toBe('error');
|
||||
|
||||
if (completedCall.status === 'error') {
|
||||
const errorMessage = completedCall.response.error?.message;
|
||||
expect(errorMessage).toContain('truncated due to max_tokens limit');
|
||||
expect(errorMessage).toContain(
|
||||
'rejected to prevent writing truncated content',
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('should allow Kind.Edit tool calls when wasOutputTruncated is false', async () => {
|
||||
const declarativeTool = new TestApprovalTool({
|
||||
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
|
||||
} as unknown as Config);
|
||||
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
|
||||
declarativeTool,
|
||||
[TestApprovalTool.Name],
|
||||
);
|
||||
|
||||
await scheduler.schedule(
|
||||
[
|
||||
{
|
||||
callId: '1',
|
||||
name: TestApprovalTool.Name,
|
||||
args: { id: 'test-normal' },
|
||||
isClientInitiated: false,
|
||||
prompt_id: 'prompt-id-normal',
|
||||
wasOutputTruncated: false,
|
||||
},
|
||||
],
|
||||
new AbortController().signal,
|
||||
);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(onAllToolCallsComplete).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
const completedCalls = onAllToolCallsComplete.mock
|
||||
.calls[0][0] as ToolCall[];
|
||||
expect(completedCalls).toHaveLength(1);
|
||||
// Should succeed (not error) since wasOutputTruncated is false
|
||||
expect(completedCalls[0].status).toBe('success');
|
||||
});
|
||||
|
||||
it('should allow non-Edit tools when wasOutputTruncated is true', async () => {
|
||||
const mockTool = new MockTool({
|
||||
name: 'mockReadTool',
|
||||
execute: async () => ({
|
||||
llmContent: 'read result',
|
||||
returnDisplay: 'read result',
|
||||
}),
|
||||
});
|
||||
const { scheduler, onAllToolCallsComplete } = createTruncationTestScheduler(
|
||||
mockTool,
|
||||
['mockReadTool'],
|
||||
);
|
||||
|
||||
await scheduler.schedule(
|
||||
[
|
||||
{
|
||||
callId: '1',
|
||||
name: 'mockReadTool',
|
||||
args: {},
|
||||
isClientInitiated: false,
|
||||
prompt_id: 'prompt-id-read-truncated',
|
||||
wasOutputTruncated: true,
|
||||
},
|
||||
],
|
||||
new AbortController().signal,
|
||||
);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(onAllToolCallsComplete).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
const completedCalls = onAllToolCallsComplete.mock
|
||||
.calls[0][0] as ToolCall[];
|
||||
expect(completedCalls).toHaveLength(1);
|
||||
// Non-Edit tools should still execute even when output was truncated
|
||||
expect(completedCalls[0].status).toBe('success');
|
||||
});
|
||||
});
|
||||
|
||||
describe('CoreToolScheduler Sequential Execution', () => {
|
||||
it('should execute tool calls in a batch sequentially', async () => {
|
||||
// Arrange
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import {
|
|||
logToolOutputTruncated,
|
||||
ToolOutputTruncatedEvent,
|
||||
InputFormat,
|
||||
Kind,
|
||||
SkillTool,
|
||||
} from '../index.js';
|
||||
import type {
|
||||
|
|
@ -55,6 +56,23 @@ import levenshtein from 'fast-levenshtein';
|
|||
import { getPlanModeSystemReminder } from './prompts.js';
|
||||
import { ShellToolInvocation } from '../tools/shell.js';
|
||||
|
||||
const TRUNCATION_PARAM_GUIDANCE =
|
||||
'Note: Your previous response was truncated due to max_tokens limit, ' +
|
||||
'which likely caused incomplete tool call parameters. ' +
|
||||
'Please retry the tool call with complete parameters. ' +
|
||||
'If the content is too large for a single response, ' +
|
||||
'consider splitting it into smaller parts.';
|
||||
|
||||
const TRUNCATION_EDIT_REJECTION =
|
||||
'Your previous response was truncated due to max_tokens limit, ' +
|
||||
'which likely produced incomplete file content. ' +
|
||||
'The tool call has been rejected to prevent writing ' +
|
||||
'truncated content to the file. ' +
|
||||
'Please retry the tool call with complete content. ' +
|
||||
'If the content is too large for a single response, ' +
|
||||
'consider splitting it into smaller parts ' +
|
||||
'(e.g., write_file for initial content, then edit for additions).';
|
||||
|
||||
export type ValidatingToolCall = {
|
||||
status: 'validating';
|
||||
request: ToolCallRequestInfo;
|
||||
|
|
@ -773,19 +791,41 @@ export class CoreToolScheduler {
|
|||
reqInfo.args,
|
||||
);
|
||||
if (invocationOrError instanceof Error) {
|
||||
const error = reqInfo.wasOutputTruncated
|
||||
? new Error(
|
||||
`${invocationOrError.message} ${TRUNCATION_PARAM_GUIDANCE}`,
|
||||
)
|
||||
: invocationOrError;
|
||||
return {
|
||||
status: 'error',
|
||||
request: reqInfo,
|
||||
tool: toolInstance,
|
||||
response: createErrorResponse(
|
||||
reqInfo,
|
||||
invocationOrError,
|
||||
error,
|
||||
ToolErrorType.INVALID_TOOL_PARAMS,
|
||||
),
|
||||
durationMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Reject file-modifying calls when truncated to prevent
|
||||
// writing incomplete content.
|
||||
if (reqInfo.wasOutputTruncated && toolInstance.kind === Kind.Edit) {
|
||||
const truncationError = new Error(TRUNCATION_EDIT_REJECTION);
|
||||
return {
|
||||
status: 'error',
|
||||
request: reqInfo,
|
||||
tool: toolInstance,
|
||||
response: createErrorResponse(
|
||||
reqInfo,
|
||||
truncationError,
|
||||
ToolErrorType.OUTPUT_TRUNCATED,
|
||||
),
|
||||
durationMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
status: 'validating',
|
||||
request: reqInfo,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { OpenAIContentConverter } from './converter.js';
|
|||
import type { StreamingToolCallParser } from './streamingToolCallParser.js';
|
||||
import {
|
||||
Type,
|
||||
FinishReason,
|
||||
type GenerateContentParameters,
|
||||
type Content,
|
||||
type Part,
|
||||
|
|
@ -1968,6 +1969,241 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
|
|||
});
|
||||
});
|
||||
|
||||
describe('Truncated tool call detection in streaming', () => {
|
||||
let converter: OpenAIContentConverter;
|
||||
|
||||
beforeEach(() => {
|
||||
converter = new OpenAIContentConverter('test-model');
|
||||
});
|
||||
|
||||
/**
|
||||
* Helper: feed streaming chunks then a final chunk with finish_reason,
|
||||
* and return the Gemini response for the final chunk.
|
||||
*/
|
||||
function feedToolCallChunks(
|
||||
conv: OpenAIContentConverter,
|
||||
toolCallChunks: Array<{
|
||||
index: number;
|
||||
id?: string;
|
||||
name?: string;
|
||||
arguments: string;
|
||||
}>,
|
||||
finishReason: string,
|
||||
) {
|
||||
// Feed argument chunks (no finish_reason yet)
|
||||
for (const tc of toolCallChunks) {
|
||||
conv.convertOpenAIChunkToGemini({
|
||||
object: 'chat.completion.chunk',
|
||||
id: 'chunk-stream',
|
||||
created: 100,
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: tc.index,
|
||||
id: tc.id,
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: tc.name,
|
||||
arguments: tc.arguments,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
logprobs: null,
|
||||
},
|
||||
],
|
||||
} as unknown as OpenAI.Chat.ChatCompletionChunk);
|
||||
}
|
||||
|
||||
// Final chunk with finish_reason
|
||||
return conv.convertOpenAIChunkToGemini({
|
||||
object: 'chat.completion.chunk',
|
||||
id: 'chunk-final',
|
||||
created: 101,
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: finishReason,
|
||||
logprobs: null,
|
||||
},
|
||||
],
|
||||
} as unknown as OpenAI.Chat.ChatCompletionChunk);
|
||||
}
|
||||
|
||||
it('should override finishReason to MAX_TOKENS when tool call JSON is truncated and provider reports "stop"', () => {
|
||||
// Simulate: write_file call truncated mid-JSON, provider says "stop"
|
||||
const result = feedToolCallChunks(
|
||||
converter,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
name: 'write_file',
|
||||
arguments: '{"file_path": "/tmp/test.cpp"',
|
||||
// Missing closing brace and content field — truncated
|
||||
},
|
||||
],
|
||||
'stop',
|
||||
);
|
||||
|
||||
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
|
||||
});
|
||||
|
||||
it('should override finishReason to MAX_TOKENS when provider reports "tool_calls" but JSON is truncated', () => {
|
||||
const result = feedToolCallChunks(
|
||||
converter,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
name: 'write_file',
|
||||
arguments:
|
||||
'{"file_path": "/tmp/test.cpp", "content": "partial content',
|
||||
// Truncated mid-string
|
||||
},
|
||||
],
|
||||
'tool_calls',
|
||||
);
|
||||
|
||||
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
|
||||
});
|
||||
|
||||
it('should preserve finishReason STOP when tool call JSON is complete', () => {
|
||||
const result = feedToolCallChunks(
|
||||
converter,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
name: 'write_file',
|
||||
arguments: '{"file_path": "/tmp/test.cpp", "content": "hello"}',
|
||||
},
|
||||
],
|
||||
'stop',
|
||||
);
|
||||
|
||||
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
|
||||
});
|
||||
|
||||
it('should preserve finishReason MAX_TOKENS when provider already reports "length"', () => {
|
||||
const result = feedToolCallChunks(
|
||||
converter,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
name: 'write_file',
|
||||
arguments: '{"file_path": "/tmp/test.cpp"',
|
||||
},
|
||||
],
|
||||
'length',
|
||||
);
|
||||
|
||||
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
|
||||
});
|
||||
|
||||
it('should still emit the (repaired) function call even when truncated', () => {
|
||||
const result = feedToolCallChunks(
|
||||
converter,
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
name: 'write_file',
|
||||
arguments: '{"file_path": "/tmp/test.cpp"',
|
||||
},
|
||||
],
|
||||
'stop',
|
||||
);
|
||||
|
||||
const parts = result.candidates?.[0]?.content?.parts ?? [];
|
||||
const fnCall = parts.find((p: Part) => p.functionCall);
|
||||
expect(fnCall).toBeDefined();
|
||||
expect(fnCall?.functionCall?.name).toBe('write_file');
|
||||
expect(fnCall?.functionCall?.args).toEqual({
|
||||
file_path: '/tmp/test.cpp',
|
||||
});
|
||||
});
|
||||
|
||||
it('should detect truncation with multi-chunk streaming arguments', () => {
|
||||
// Feed arguments in multiple small chunks like real streaming
|
||||
const conv = new OpenAIContentConverter('test-model');
|
||||
|
||||
// Chunk 1: start of JSON with tool metadata
|
||||
conv.convertOpenAIChunkToGemini({
|
||||
object: 'chat.completion.chunk',
|
||||
id: 'c1',
|
||||
created: 100,
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_1',
|
||||
type: 'function' as const,
|
||||
function: { name: 'write_file', arguments: '{"file_' },
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
logprobs: null,
|
||||
},
|
||||
],
|
||||
} as unknown as OpenAI.Chat.ChatCompletionChunk);
|
||||
|
||||
// Chunk 2: more arguments
|
||||
conv.convertOpenAIChunkToGemini({
|
||||
object: 'chat.completion.chunk',
|
||||
id: 'c2',
|
||||
created: 100,
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
function: { arguments: 'path": "/tmp/f.txt", "conten' },
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
logprobs: null,
|
||||
},
|
||||
],
|
||||
} as unknown as OpenAI.Chat.ChatCompletionChunk);
|
||||
|
||||
// Final chunk: finish_reason "stop" but JSON is still incomplete
|
||||
const result = conv.convertOpenAIChunkToGemini({
|
||||
object: 'chat.completion.chunk',
|
||||
id: 'c3',
|
||||
created: 101,
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: 'stop',
|
||||
logprobs: null,
|
||||
},
|
||||
],
|
||||
} as unknown as OpenAI.Chat.ChatCompletionChunk);
|
||||
|
||||
expect(result.candidates?.[0]?.finishReason).toBe(FinishReason.MAX_TOKENS);
|
||||
});
|
||||
});
|
||||
|
||||
describe('modality filtering', () => {
|
||||
function makeRequest(parts: Part[]): GenerateContentParameters {
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -973,7 +973,14 @@ export class OpenAIContentConverter {
|
|||
}
|
||||
|
||||
// Only emit function calls when streaming is complete (finish_reason is present)
|
||||
let toolCallsTruncated = false;
|
||||
if (choice.finish_reason) {
|
||||
// Detect truncation the provider may not report correctly.
|
||||
// Some providers (e.g. DashScope/Qwen) send "stop" or "tool_calls"
|
||||
// even when output was cut off mid-JSON due to max_tokens.
|
||||
toolCallsTruncated =
|
||||
this.streamingToolCallParser.hasIncompleteToolCalls();
|
||||
|
||||
const completedToolCalls =
|
||||
this.streamingToolCallParser.getCompletedToolCalls();
|
||||
|
||||
|
|
@ -995,6 +1002,13 @@ export class OpenAIContentConverter {
|
|||
this.streamingToolCallParser.reset();
|
||||
}
|
||||
|
||||
// If tool call JSON was truncated, override to "length" so downstream
|
||||
// (turn.ts) correctly sets wasOutputTruncated=true.
|
||||
const effectiveFinishReason =
|
||||
toolCallsTruncated && choice.finish_reason !== 'length'
|
||||
? 'length'
|
||||
: choice.finish_reason;
|
||||
|
||||
// Only include finishReason key if finish_reason is present
|
||||
const candidate: Candidate = {
|
||||
content: {
|
||||
|
|
@ -1004,9 +1018,9 @@ export class OpenAIContentConverter {
|
|||
index: 0,
|
||||
safetyRatings: [],
|
||||
};
|
||||
if (choice.finish_reason) {
|
||||
if (effectiveFinishReason) {
|
||||
candidate.finishReason = this.mapOpenAIFinishReasonToGemini(
|
||||
choice.finish_reason,
|
||||
effectiveFinishReason,
|
||||
);
|
||||
}
|
||||
response.candidates = [candidate];
|
||||
|
|
|
|||
|
|
@ -790,4 +790,70 @@ describe('StreamingToolCallParser', () => {
|
|||
expect(call2?.args).toEqual({ param2: 'value2' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasIncompleteToolCalls', () => {
|
||||
it('should return false when no tool calls exist', () => {
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when all tool calls have complete JSON', () => {
|
||||
parser.addChunk(0, '{"key": "value"}', 'call_1', 'write_file');
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true when a tool call has depth > 0 (unclosed braces)', () => {
|
||||
parser.addChunk(
|
||||
0,
|
||||
'{"file_path": "/tmp/test.txt", "content": "partial',
|
||||
'call_1',
|
||||
'write_file',
|
||||
);
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(true);
|
||||
});
|
||||
|
||||
it('should return true when a tool call is inside a string literal', () => {
|
||||
// Simulate truncation mid-string: {"file_path": "/tmp/test.txt", "content": "some text
|
||||
parser.addChunk(0, '{"file_path": "/tmp/test.txt"', 'call_1', 'write_file');
|
||||
parser.addChunk(0, ', "content": "some text');
|
||||
const state = parser.getState(0);
|
||||
expect(state.inString).toBe(true);
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for tool calls without name metadata', () => {
|
||||
// Tool calls without a name should be ignored
|
||||
parser.addChunk(0, '{"key": "incomplete', undefined, undefined);
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(false);
|
||||
});
|
||||
|
||||
it('should detect incomplete among multiple tool calls', () => {
|
||||
// First tool call is complete
|
||||
parser.addChunk(0, '{"key": "value"}', 'call_1', 'func_a');
|
||||
// Second tool call is incomplete
|
||||
parser.addChunk(1, '{"key": "val', 'call_2', 'func_b');
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false after reset', () => {
|
||||
parser.addChunk(0, '{"key": "incomplete', 'call_1', 'write_file');
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(true);
|
||||
parser.reset();
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(false);
|
||||
});
|
||||
|
||||
it('should detect real-world truncation: write_file with only file_path', () => {
|
||||
// Reproduces the actual bug: LLM output truncated mid-JSON,
|
||||
// only file_path key received, content never arrived.
|
||||
// Buffer: {"file_path": "/path/to/file.cpp"
|
||||
// depth=1 because outer brace is unclosed
|
||||
parser.addChunk(
|
||||
0,
|
||||
'{"file_path": "/path/to/file.cpp"',
|
||||
'call_1',
|
||||
'write_file',
|
||||
);
|
||||
expect(parser.hasIncompleteToolCalls()).toBe(true);
|
||||
expect(parser.getState(0).depth).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -411,4 +411,32 @@ export class StreamingToolCallParser {
|
|||
escape: this.escapes.get(index) || false,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether any buffered tool call has incomplete JSON at stream end.
|
||||
*
|
||||
* A tool call is considered incomplete when its JSON parsing state indicates
|
||||
* the buffer was truncated mid-stream:
|
||||
* - depth > 0: unclosed braces/brackets remain
|
||||
* - inString === true: still inside a string literal
|
||||
*
|
||||
* This is critical for detecting output truncation that the LLM provider
|
||||
* may not report correctly via finish_reason (e.g. reporting "stop" or
|
||||
* "tool_calls" instead of "length" when output was actually cut off).
|
||||
*
|
||||
* @returns true if at least one tool call buffer has incomplete JSON
|
||||
*/
|
||||
hasIncompleteToolCalls(): boolean {
|
||||
for (const [index] of this.buffers.entries()) {
|
||||
const meta = this.toolCallMeta.get(index);
|
||||
if (!meta?.name) continue;
|
||||
|
||||
const depth = this.depths.get(index) || 0;
|
||||
const inString = this.inStrings.get(index) || false;
|
||||
if (depth > 0 || inString) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -873,4 +873,141 @@ describe('Turn', () => {
|
|||
expect(turn.getDebugResponses()).toEqual([resp1, resp2]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('wasOutputTruncated flag', () => {
|
||||
it('should set wasOutputTruncated=true on pending tool calls when finishReason is MAX_TOKENS', async () => {
|
||||
const mockResponseStream = (async function* () {
|
||||
// Yield a tool call request
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
functionCalls: [
|
||||
{
|
||||
name: 'write_file',
|
||||
args: { file_path: '/test.txt', content: 'hello' },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
// Yield finish with MAX_TOKENS
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
candidates: [
|
||||
{
|
||||
finishReason: 'MAX_TOKENS',
|
||||
content: { parts: [] },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
})();
|
||||
mockSendMessageStream.mockResolvedValue(mockResponseStream);
|
||||
|
||||
const reqParts: Part[] = [{ text: 'Test prompt' }];
|
||||
const events = [];
|
||||
for await (const event of turn.run(
|
||||
'test-model',
|
||||
reqParts,
|
||||
new AbortController().signal,
|
||||
)) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
// Verify that pending tool calls have wasOutputTruncated flag set
|
||||
expect(turn.pendingToolCalls).toHaveLength(1);
|
||||
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true);
|
||||
expect(turn.pendingToolCalls[0].name).toBe('write_file');
|
||||
});
|
||||
|
||||
it('should NOT set wasOutputTruncated when finishReason is STOP', async () => {
|
||||
const mockResponseStream = (async function* () {
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
functionCalls: [
|
||||
{
|
||||
name: 'read_file',
|
||||
args: { file_path: '/test.txt' },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
// Yield finish with STOP (normal completion)
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
candidates: [
|
||||
{
|
||||
finishReason: 'STOP',
|
||||
content: { parts: [] },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
})();
|
||||
mockSendMessageStream.mockResolvedValue(mockResponseStream);
|
||||
|
||||
const reqParts: Part[] = [{ text: 'Test prompt' }];
|
||||
for await (const _ of turn.run(
|
||||
'test-model',
|
||||
reqParts,
|
||||
new AbortController().signal,
|
||||
)) {
|
||||
// consume stream
|
||||
}
|
||||
|
||||
// Verify that pending tool calls do NOT have wasOutputTruncated flag
|
||||
expect(turn.pendingToolCalls).toHaveLength(1);
|
||||
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle multiple pending tool calls with MAX_TOKENS', async () => {
|
||||
const mockResponseStream = (async function* () {
|
||||
// Yield two tool calls
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
functionCalls: [
|
||||
{
|
||||
name: 'write_file',
|
||||
args: { file_path: '/test1.txt', content: 'content1' },
|
||||
},
|
||||
{
|
||||
name: 'edit',
|
||||
args: { file_path: '/test2.txt', original_text: 'old' },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
// Yield finish with MAX_TOKENS
|
||||
yield {
|
||||
type: StreamEventType.CHUNK,
|
||||
value: {
|
||||
candidates: [
|
||||
{
|
||||
finishReason: 'MAX_TOKENS',
|
||||
content: { parts: [] },
|
||||
},
|
||||
],
|
||||
} as unknown as GenerateContentResponse,
|
||||
};
|
||||
})();
|
||||
mockSendMessageStream.mockResolvedValue(mockResponseStream);
|
||||
|
||||
const reqParts: Part[] = [{ text: 'Test prompt' }];
|
||||
for await (const _ of turn.run(
|
||||
'test-model',
|
||||
reqParts,
|
||||
new AbortController().signal,
|
||||
)) {
|
||||
// consume stream
|
||||
}
|
||||
|
||||
// Verify both tool calls have wasOutputTruncated flag set
|
||||
expect(turn.pendingToolCalls).toHaveLength(2);
|
||||
expect(turn.pendingToolCalls[0].wasOutputTruncated).toBe(true);
|
||||
expect(turn.pendingToolCalls[1].wasOutputTruncated).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,14 +4,14 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type {
|
||||
Part,
|
||||
PartListUnion,
|
||||
GenerateContentResponse,
|
||||
FunctionCall,
|
||||
FunctionDeclaration,
|
||||
import {
|
||||
FinishReason,
|
||||
GenerateContentResponseUsageMetadata,
|
||||
type Part,
|
||||
type PartListUnion,
|
||||
type GenerateContentResponse,
|
||||
type FunctionCall,
|
||||
type FunctionDeclaration,
|
||||
type GenerateContentResponseUsageMetadata,
|
||||
} from '@google/genai';
|
||||
import type {
|
||||
ToolCallConfirmationDetails,
|
||||
|
|
@ -98,6 +98,8 @@ export interface ToolCallRequestInfo {
|
|||
isClientInitiated: boolean;
|
||||
prompt_id: string;
|
||||
response_id?: string;
|
||||
/** Set to true when the LLM response was truncated due to max_tokens. */
|
||||
wasOutputTruncated?: boolean;
|
||||
}
|
||||
|
||||
export interface ToolCallResponseInfo {
|
||||
|
|
@ -306,6 +308,14 @@ export class Turn {
|
|||
|
||||
// This is the key change: Only yield 'Finished' if there is a finishReason.
|
||||
if (finishReason) {
|
||||
// Mark pending tool calls so downstream can distinguish
|
||||
// truncation from real parameter errors.
|
||||
if (finishReason === FinishReason.MAX_TOKENS) {
|
||||
for (const tc of this.pendingToolCalls) {
|
||||
tc.wasOutputTruncated = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.pendingCitations.size > 0) {
|
||||
yield {
|
||||
type: GeminiEventType.Citation,
|
||||
|
|
|
|||
|
|
@ -66,4 +66,7 @@ export enum ToolErrorType {
|
|||
|
||||
// WebSearch-specific Errors
|
||||
WEB_SEARCH_FAILED = 'web_search_failed',
|
||||
|
||||
// Truncation Errors
|
||||
OUTPUT_TRUNCATED = 'output_truncated',
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue