From ea3a2d6f5b3a985edadf87e4be1fe198c1348b4f Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Wed, 25 Mar 2026 15:13:10 +0800 Subject: [PATCH 1/2] fix: improve /compress split point selection for tool-heavy conversations When conversation history is near the context window limit and dominated by tool call/response cycles, findCompressSplitPoint would return a near-zero split point because it only considered non-functionResponse user messages as valid split points. This caused /compress to send almost no history to the compression API (e.g. 29 tokens), producing a useless summary that inflated token count instead of reducing it. Changes: - Track tool completion boundaries (positions after functionResponse) as fallback split points in findCompressSplitPoint - Add user-with-functionResponse to the compress-everything safety check - Use Math.max of primary and fallback split points for better coverage - Add minimum content guard (5% threshold) to prevent futile API calls - Add 4 new test cases covering tool-heavy conversation scenarios Fixes #2647 --- .../services/chatCompressionService.test.ts | 169 ++++++++++++++++++ .../src/services/chatCompressionService.ts | 50 +++++- 2 files changed, 217 insertions(+), 2 deletions(-) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index f3f490214..95d83c354 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -100,6 +100,175 @@ describe('findCompressSplitPoint', () => { ]; expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2); }); + + it('should compress everything when last message is a functionResponse', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix this bug' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'readFile', + response: { result: 'file content' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'writeFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'writeFile', + response: { result: 'ok' }, + }, + }, + ], + }, + ]; + // Last message is functionResponse -> safe to compress everything + expect(findCompressSplitPoint(history, 0.7)).toBe(5); + }); + + it('should use tool completion split point when only one user query exists', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix this' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'read1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read1', + response: { result: 'a'.repeat(1000) }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'read2', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read2', + response: { result: 'b'.repeat(1000) }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'write1', args: {} } }], + }, + ]; + // Only one non-functionResponse user message (index 0) -> lastSplitPoint=0 + // Last message has functionCall -> can't compress everything + // But tool completion split points exist (at indices 3 and 5) + // Should use lastToolCompletionSplitPoint=5 instead of lastSplitPoint=0 + expect(findCompressSplitPoint(history, 0.7)).toBe(5); + }); + + it('should prefer larger split point between primary and tool completion', () => { + const longContent = 'a'.repeat(10000); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix bug A' }] }, + { role: 'model', parts: [{ text: 'OK' }] }, + { role: 'user', parts: [{ text: 'Fix bug B' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'read1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read1', + response: { result: longContent }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'read2', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read2', + response: { result: longContent }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'write1', args: {} } }], + }, + ]; + // Primary split points at 0 and 2 (early, before the bulky tool outputs) + // Tool completion split points at 5 and 7 (after tool call pairs) + // Last message has functionCall -> can't compress everything + // Should use max(lastSplitPoint=2, lastToolCompletionSplitPoint=7) = 7 + expect(findCompressSplitPoint(history, 0.7)).toBe(7); + }); + + it('should still prefer primary split point when it is better', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'resp1' }] }, + { + role: 'user', + parts: [{ text: 'msg2 with some substantial content here' }], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tool1', + response: { result: 'short' }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'resp3' }] }, + { role: 'user', parts: [{ text: 'msg4' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool2', args: {} } }], + }, + ]; + // Primary split points: 0, 2, 5, 7 + // Tool completion: 5 + // Last message has functionCall -> can't compress everything + // At 0.99 fraction, lastSplitPoint should be 7 which is > lastToolCompletion=5 + expect(findCompressSplitPoint(history, 0.99)).toBe(7); + }); }); describe('ChatCompressionService', () => { diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 610445fb4..84836f72e 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -48,6 +48,11 @@ export function findCompressSplitPoint( const targetCharCount = totalCharCount * fraction; let lastSplitPoint = 0; // 0 is always valid (compress nothing) + // In tool-heavy conversations, non-functionResponse user messages are rare. + // Track positions after tool call completions (functionResponse) as fallback + // split points, ensuring all function calls in the compressed portion have + // matching responses. + let lastToolCompletionSplitPoint = 0; let cumulativeCharCount = 0; for (let i = 0; i < contents.length; i++) { const content = contents[i]; @@ -60,6 +65,14 @@ export function findCompressSplitPoint( } lastSplitPoint = i; } + // After a functionResponse, all preceding function calls are properly + // paired, making the next position (i+1) a safe place to split. + if ( + content.role === 'user' && + content.parts?.some((part) => !!part.functionResponse) + ) { + lastToolCompletionSplitPoint = i + 1; + } cumulativeCharCount += charCounts[i]; } @@ -72,9 +85,20 @@ export function findCompressSplitPoint( ) { return contents.length; } + // Also safe to compress everything if the last message completes a tool call + // sequence (all function calls have matching responses). + if ( + lastContent?.role === 'user' && + lastContent?.parts?.some((part) => !!part.functionResponse) + ) { + return contents.length; + } - // Can't compress everything so just compress at last splitpoint. - return lastSplitPoint; + // Use the split point that provides the most content to compress. + // In tool-heavy conversations with few user queries, lastSplitPoint may be + // near the beginning of the history while lastToolCompletionSplitPoint + // provides much better coverage. + return Math.max(lastSplitPoint, lastToolCompletionSplitPoint); } export class ChatCompressionService { @@ -157,6 +181,28 @@ export class ChatCompressionService { }; } + // Guard: if historyToCompress is too small relative to the total history, + // skip compression. This prevents futile API calls where the model receives + // almost no context and generates a useless "summary" that inflates tokens. + const compressCharCount = historyToCompress.reduce( + (sum, c) => sum + JSON.stringify(c).length, + 0, + ); + const totalCharCount = curatedHistory.reduce( + (sum, c) => sum + JSON.stringify(c).length, + 0, + ); + if (totalCharCount > 0 && compressCharCount / totalCharCount < 0.05) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount: originalTokenCount, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + const summaryResponse = await config.getContentGenerator().generateContent( { model, From 501f80a514acea9fbac5c756de9da61513f1296e Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 30 Mar 2026 18:00:27 +0800 Subject: [PATCH 2/2] fix: handle orphaned funcCall and improve compression logic for tool-heavy conversations - Strip trailing orphaned funcCall (force=true) before split point calculation, so normal compression logic runs cleanly on the remaining history instead of requiring ad-hoc special-casing - Remove redundant lastToolCompletionSplitPoint machinery: after fixing the i+2 index bug, lastSplitPoint already subsumes it, making Math.max redundant - Add MIN_COMPRESSION_FRACTION constant (0.05) to guard against futile API calls when historyToCompress is too small relative to total history - Add tests for orphaned funcCall handling (force=true compresses, force=false NOOP) - Add test for MIN_COMPRESSION_FRACTION guard Fixes #2647 --- .../services/chatCompressionService.test.ts | 206 +++++++++++++++++- .../src/services/chatCompressionService.ts | 61 ++++-- 2 files changed, 233 insertions(+), 34 deletions(-) diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index 95d83c354..55a1e2723 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -139,7 +139,7 @@ describe('findCompressSplitPoint', () => { expect(findCompressSplitPoint(history, 0.7)).toBe(5); }); - it('should use tool completion split point when only one user query exists', () => { + it('should return primary split point when tool completions have no subsequent regular user message', () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'Fix this' }] }, { @@ -179,12 +179,12 @@ describe('findCompressSplitPoint', () => { ]; // Only one non-functionResponse user message (index 0) -> lastSplitPoint=0 // Last message has functionCall -> can't compress everything - // But tool completion split points exist (at indices 3 and 5) - // Should use lastToolCompletionSplitPoint=5 instead of lastSplitPoint=0 - expect(findCompressSplitPoint(history, 0.7)).toBe(5); + // historyToKeep must start with a regular user message, so split at 0 + // (compress nothing) is the only valid option. + expect(findCompressSplitPoint(history, 0.7)).toBe(0); }); - it('should prefer larger split point between primary and tool completion', () => { + it('should prefer primary split point when tool completions yield no valid user-starting split', () => { const longContent = 'a'.repeat(10000); const history: Content[] = [ { role: 'user', parts: [{ text: 'Fix bug A' }] }, @@ -225,11 +225,10 @@ describe('findCompressSplitPoint', () => { parts: [{ functionCall: { name: 'write1', args: {} } }], }, ]; - // Primary split points at 0 and 2 (early, before the bulky tool outputs) - // Tool completion split points at 5 and 7 (after tool call pairs) + // Primary split points at 0 and 2 (regular user messages before the bulky tool outputs) // Last message has functionCall -> can't compress everything - // Should use max(lastSplitPoint=2, lastToolCompletionSplitPoint=7) = 7 - expect(findCompressSplitPoint(history, 0.7)).toBe(7); + // Should return lastSplitPoint=2 (last valid primary split point) + expect(findCompressSplitPoint(history, 0.7)).toBe(2); }); it('should still prefer primary split point when it is better', () => { @@ -264,9 +263,8 @@ describe('findCompressSplitPoint', () => { }, ]; // Primary split points: 0, 2, 5, 7 - // Tool completion: 5 // Last message has functionCall -> can't compress everything - // At 0.99 fraction, lastSplitPoint should be 7 which is > lastToolCompletion=5 + // At 0.99 fraction, lastSplitPoint should be 7 expect(findCompressSplitPoint(history, 0.99)).toBe(7); }); }); @@ -409,6 +407,47 @@ describe('ChatCompressionService', () => { expect(tokenLimit).not.toHaveBeenCalled(); }); + it('should return NOOP when historyToCompress is below MIN_COMPRESSION_FRACTION of total', async () => { + // Construct a history where the split point lands on the 2nd regular user + // message (index 2), but indices 0-1 are tiny relative to the huge content + // at index 2. historyToCompress = [0,1] will be << 5% of totalCharCount. + const hugeContent = 'x'.repeat(100000); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'hello' }] }, + { role: 'model', parts: [{ text: 'world' }] }, + // Huge user message pushes the cumulative well past the split threshold + { role: 'user', parts: [{ text: hugeContent }] }, + // Pending functionCall prevents returning contents.length, + // so the fallback split at index 2 is used + { + role: 'model', + parts: [{ functionCall: { name: 'process', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn(); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + // force=true bypasses the token threshold gate so we exercise the 5% guard + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + expect(mockGenerateContent).not.toHaveBeenCalled(); + }); + it('should compress if over token threshold', async () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'msg1' }] }, @@ -1103,4 +1142,149 @@ describe('ChatCompressionService', () => { expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); }); }); + + describe('orphaned trailing funcCall handling', () => { + it('should compress everything when force=true and last message is an orphaned funcCall', async () => { + // Issue #2647: tool-heavy conversation interrupted/crashed while a tool + // was still running. The funcCall will never get a response since the agent + // is idle. Manual /compress strips the orphaned funcCall, then compresses + // the remaining history normally. + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'glob', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'glob', + response: { result: 'files...' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'readFile', + response: { result: 'code...' }, + }, + }, + ], + }, + // orphaned funcCall — agent was interrupted before getting a response + { + role: 'model', + parts: [{ functionCall: { name: 'editFile', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 100, + ); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: 'Summary of all work done' }] } }, + ], + usageMetadata: { + promptTokenCount: 1100, + candidatesTokenCount: 50, + totalTokenCount: 1150, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + true, // force=true (manual /compress) + mockModel, + mockConfig, + false, + ); + + // Should compress successfully — orphaned funcCall is stripped first, then + // normal compression runs on the remaining history, historyToKeep is empty + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + // Reconstructed history: [User(summary), Model("Got it...")] — valid structure + expect(result.newHistory).toHaveLength(2); + expect(result.newHistory![0].role).toBe('user'); + expect(result.newHistory![1].role).toBe('model'); + // The orphaned funcCall is stripped before compression, so only the first 5 + // messages are sent, plus the compression instruction (+1) = history.length total. + const callArg = mockGenerateContent.mock.calls[0][0]; + expect(callArg.contents.length).toBe(history.length); // (history.length - 1) messages + 1 instruction + }); + + it('should NOT compress orphaned funcCall when force=false (auto-compress)', async () => { + // Auto-compress fires BEFORE the matching funcResponse is sent back to the + // model. Compressing the funcCall away would orphan the upcoming funcResponse + // and cause an API error. So force=false must NOT take this path. + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'glob', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'glob', + response: { result: 'files...' }, + }, + }, + ], + }, + // Pending funcCall: tool is currently executing, funcResponse is coming + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + // Use a token count above threshold to ensure auto-compress isn't skipped + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + const mockGenerateContent = vi.fn(); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, // force=false (auto-compress) + mockModel, + mockConfig, + false, + ); + + // Must return NOOP — compressing would orphan the upcoming funcResponse + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + expect(mockGenerateContent).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 84836f72e..528a57b44 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -29,6 +29,13 @@ export const COMPRESSION_TOKEN_THRESHOLD = 0.7; */ export const COMPRESSION_PRESERVE_THRESHOLD = 0.3; +/** + * Minimum fraction of history (by character count) that must be compressible + * to proceed with a compression API call. Prevents futile calls where the + * model receives almost no context and generates a useless summary. + */ +export const MIN_COMPRESSION_FRACTION = 0.05; + /** * Returns the index of the oldest item to keep when compressing. May return * contents.length which indicates that everything should be compressed. @@ -48,11 +55,6 @@ export function findCompressSplitPoint( const targetCharCount = totalCharCount * fraction; let lastSplitPoint = 0; // 0 is always valid (compress nothing) - // In tool-heavy conversations, non-functionResponse user messages are rare. - // Track positions after tool call completions (functionResponse) as fallback - // split points, ensuring all function calls in the compressed portion have - // matching responses. - let lastToolCompletionSplitPoint = 0; let cumulativeCharCount = 0; for (let i = 0; i < contents.length; i++) { const content = contents[i]; @@ -65,14 +67,6 @@ export function findCompressSplitPoint( } lastSplitPoint = i; } - // After a functionResponse, all preceding function calls are properly - // paired, making the next position (i+1) a safe place to split. - if ( - content.role === 'user' && - content.parts?.some((part) => !!part.functionResponse) - ) { - lastToolCompletionSplitPoint = i + 1; - } cumulativeCharCount += charCounts[i]; } @@ -94,11 +88,7 @@ export function findCompressSplitPoint( return contents.length; } - // Use the split point that provides the most content to compress. - // In tool-heavy conversations with few user queries, lastSplitPoint may be - // near the beginning of the history while lastToolCompletionSplitPoint - // provides much better coverage. - return Math.max(lastSplitPoint, lastToolCompletionSplitPoint); + return lastSplitPoint; } export class ChatCompressionService { @@ -162,13 +152,31 @@ export class ChatCompressionService { } } + // For manual /compress (force=true), if the last message is an orphaned model + // funcCall (agent interrupted/crashed before the response arrived), strip it + // before computing the split point. After stripping, the history ends cleanly + // (typically with a user funcResponse) and findCompressSplitPoint handles it + // through its normal logic — no special-casing needed. + // + // auto-compress (force=false) must NOT strip: it fires inside + // sendMessageStream() before the matching funcResponse is pushed onto the + // history, so the trailing funcCall is still active, not orphaned. + const lastMessage = curatedHistory[curatedHistory.length - 1]; + const hasOrphanedFuncCall = + force && + lastMessage?.role === 'model' && + lastMessage.parts?.some((p) => !!p.functionCall); + const historyForSplit = hasOrphanedFuncCall + ? curatedHistory.slice(0, -1) + : curatedHistory; + const splitPoint = findCompressSplitPoint( - curatedHistory, + historyForSplit, 1 - COMPRESSION_PRESERVE_THRESHOLD, ); - const historyToCompress = curatedHistory.slice(0, splitPoint); - const historyToKeep = curatedHistory.slice(splitPoint); + const historyToCompress = historyForSplit.slice(0, splitPoint); + const historyToKeep = historyForSplit.slice(splitPoint); if (historyToCompress.length === 0) { return { @@ -184,15 +192,22 @@ export class ChatCompressionService { // Guard: if historyToCompress is too small relative to the total history, // skip compression. This prevents futile API calls where the model receives // almost no context and generates a useless "summary" that inflates tokens. + // + // Note: findCompressSplitPoint already computes charCounts internally but + // returns only the split index. We intentionally recompute here to keep + // the function signature simple; this is a minor, acceptable duplication. const compressCharCount = historyToCompress.reduce( (sum, c) => sum + JSON.stringify(c).length, 0, ); - const totalCharCount = curatedHistory.reduce( + const totalCharCount = historyForSplit.reduce( (sum, c) => sum + JSON.stringify(c).length, 0, ); - if (totalCharCount > 0 && compressCharCount / totalCharCount < 0.05) { + if ( + totalCharCount > 0 && + compressCharCount / totalCharCount < MIN_COMPRESSION_FRACTION + ) { return { newHistory: null, info: {