Merge pull request #2659 from QwenLM/fix/compress-split-point-tool-heavy-conversations

fix: make /compress handle tool-heavy conversations correctly
This commit is contained in:
tanzhenxin 2026-04-01 15:36:56 +08:00 committed by GitHub
commit 92132ec8fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 418 additions and 4 deletions

View file

@ -100,6 +100,173 @@ describe('findCompressSplitPoint', () => {
];
expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2);
});
it('should compress everything when last message is a functionResponse', () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'Fix this bug' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'readFile', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'readFile',
response: { result: 'file content' },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'writeFile', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'writeFile',
response: { result: 'ok' },
},
},
],
},
];
// Last message is functionResponse -> safe to compress everything
expect(findCompressSplitPoint(history, 0.7)).toBe(5);
});
it('should return primary split point when tool completions have no subsequent regular user message', () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'Fix this' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'read1', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read1',
response: { result: 'a'.repeat(1000) },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'read2', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read2',
response: { result: 'b'.repeat(1000) },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'write1', args: {} } }],
},
];
// Only one non-functionResponse user message (index 0) -> lastSplitPoint=0
// Last message has functionCall -> can't compress everything
// historyToKeep must start with a regular user message, so split at 0
// (compress nothing) is the only valid option.
expect(findCompressSplitPoint(history, 0.7)).toBe(0);
});
it('should prefer primary split point when tool completions yield no valid user-starting split', () => {
const longContent = 'a'.repeat(10000);
const history: Content[] = [
{ role: 'user', parts: [{ text: 'Fix bug A' }] },
{ role: 'model', parts: [{ text: 'OK' }] },
{ role: 'user', parts: [{ text: 'Fix bug B' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'read1', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read1',
response: { result: longContent },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'read2', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'read2',
response: { result: longContent },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'write1', args: {} } }],
},
];
// Primary split points at 0 and 2 (regular user messages before the bulky tool outputs)
// Last message has functionCall -> can't compress everything
// Should return lastSplitPoint=2 (last valid primary split point)
expect(findCompressSplitPoint(history, 0.7)).toBe(2);
});
it('should still prefer primary split point when it is better', () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'msg1' }] },
{ role: 'model', parts: [{ text: 'resp1' }] },
{
role: 'user',
parts: [{ text: 'msg2 with some substantial content here' }],
},
{
role: 'model',
parts: [{ functionCall: { name: 'tool1', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'tool1',
response: { result: 'short' },
},
},
],
},
{ role: 'user', parts: [{ text: 'msg3' }] },
{ role: 'model', parts: [{ text: 'resp3' }] },
{ role: 'user', parts: [{ text: 'msg4' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'tool2', args: {} } }],
},
];
// Primary split points: 0, 2, 5, 7
// Last message has functionCall -> can't compress everything
// At 0.99 fraction, lastSplitPoint should be 7
expect(findCompressSplitPoint(history, 0.99)).toBe(7);
});
});
describe('ChatCompressionService', () => {
@ -240,6 +407,47 @@ describe('ChatCompressionService', () => {
expect(tokenLimit).not.toHaveBeenCalled();
});
it('should return NOOP when historyToCompress is below MIN_COMPRESSION_FRACTION of total', async () => {
// Construct a history where the split point lands on the 2nd regular user
// message (index 2), but indices 0-1 are tiny relative to the huge content
// at index 2. historyToCompress = [0,1] will be << 5% of totalCharCount.
const hugeContent = 'x'.repeat(100000);
const history: Content[] = [
{ role: 'user', parts: [{ text: 'hello' }] },
{ role: 'model', parts: [{ text: 'world' }] },
// Huge user message pushes the cumulative well past the split threshold
{ role: 'user', parts: [{ text: hugeContent }] },
// Pending functionCall prevents returning contents.length,
// so the fallback split at index 2 is used
{
role: 'model',
parts: [{ functionCall: { name: 'process', args: {} } }],
},
];
vi.mocked(mockChat.getHistory).mockReturnValue(history);
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100);
vi.mocked(tokenLimit).mockReturnValue(1000);
const mockGenerateContent = vi.fn();
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
} as unknown as ContentGenerator);
// force=true bypasses the token threshold gate so we exercise the 5% guard
const result = await service.compress(
mockChat,
mockPromptId,
true,
mockModel,
mockConfig,
false,
);
expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
expect(result.newHistory).toBeNull();
expect(mockGenerateContent).not.toHaveBeenCalled();
});
it('should compress if over token threshold', async () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'msg1' }] },
@ -934,4 +1142,149 @@ describe('ChatCompressionService', () => {
expect(mockFirePreCompactEvent).not.toHaveBeenCalled();
});
});
describe('orphaned trailing funcCall handling', () => {
it('should compress everything when force=true and last message is an orphaned funcCall', async () => {
// Issue #2647: tool-heavy conversation interrupted/crashed while a tool
// was still running. The funcCall will never get a response since the agent
// is idle. Manual /compress strips the orphaned funcCall, then compresses
// the remaining history normally.
const history: Content[] = [
{ role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'glob', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'glob',
response: { result: 'files...' },
},
},
],
},
{
role: 'model',
parts: [{ functionCall: { name: 'readFile', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'readFile',
response: { result: 'code...' },
},
},
],
},
// orphaned funcCall — agent was interrupted before getting a response
{
role: 'model',
parts: [{ functionCall: { name: 'editFile', args: {} } }],
},
];
vi.mocked(mockChat.getHistory).mockReturnValue(history);
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
100,
);
vi.mocked(tokenLimit).mockReturnValue(1000);
const mockGenerateContent = vi.fn().mockResolvedValue({
candidates: [
{ content: { parts: [{ text: 'Summary of all work done' }] } },
],
usageMetadata: {
promptTokenCount: 1100,
candidatesTokenCount: 50,
totalTokenCount: 1150,
},
} as unknown as GenerateContentResponse);
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
} as unknown as ContentGenerator);
const result = await service.compress(
mockChat,
mockPromptId,
true, // force=true (manual /compress)
mockModel,
mockConfig,
false,
);
// Should compress successfully — orphaned funcCall is stripped first, then
// normal compression runs on the remaining history, historyToKeep is empty
expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.newHistory).not.toBeNull();
// Reconstructed history: [User(summary), Model("Got it...")] — valid structure
expect(result.newHistory).toHaveLength(2);
expect(result.newHistory![0].role).toBe('user');
expect(result.newHistory![1].role).toBe('model');
// The orphaned funcCall is stripped before compression, so only the first 5
// messages are sent, plus the compression instruction (+1) = history.length total.
const callArg = mockGenerateContent.mock.calls[0][0];
expect(callArg.contents.length).toBe(history.length); // (history.length - 1) messages + 1 instruction
});
it('should NOT compress orphaned funcCall when force=false (auto-compress)', async () => {
// Auto-compress fires BEFORE the matching funcResponse is sent back to the
// model. Compressing the funcCall away would orphan the upcoming funcResponse
// and cause an API error. So force=false must NOT take this path.
const history: Content[] = [
{ role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
{
role: 'model',
parts: [{ functionCall: { name: 'glob', args: {} } }],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'glob',
response: { result: 'files...' },
},
},
],
},
// Pending funcCall: tool is currently executing, funcResponse is coming
{
role: 'model',
parts: [{ functionCall: { name: 'readFile', args: {} } }],
},
];
vi.mocked(mockChat.getHistory).mockReturnValue(history);
// Use a token count above threshold to ensure auto-compress isn't skipped
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
800,
);
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
model: 'gemini-pro',
contextWindowSize: 1000,
} as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
const mockGenerateContent = vi.fn();
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
} as unknown as ContentGenerator);
const result = await service.compress(
mockChat,
mockPromptId,
false, // force=false (auto-compress)
mockModel,
mockConfig,
false,
);
// Must return NOOP — compressing would orphan the upcoming funcResponse
expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
expect(result.newHistory).toBeNull();
expect(mockGenerateContent).not.toHaveBeenCalled();
});
});
});

View file

@ -29,6 +29,13 @@ export const COMPRESSION_TOKEN_THRESHOLD = 0.7;
*/
export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
/**
* Minimum fraction of history (by character count) that must be compressible
* to proceed with a compression API call. Prevents futile calls where the
* model receives almost no context and generates a useless summary.
*/
export const MIN_COMPRESSION_FRACTION = 0.05;
/**
* Returns the index of the oldest item to keep when compressing. May return
* contents.length which indicates that everything should be compressed.
@ -72,8 +79,15 @@ export function findCompressSplitPoint(
) {
return contents.length;
}
// Also safe to compress everything if the last message completes a tool call
// sequence (all function calls have matching responses).
if (
lastContent?.role === 'user' &&
lastContent?.parts?.some((part) => !!part.functionResponse)
) {
return contents.length;
}
// Can't compress everything so just compress at last splitpoint.
return lastSplitPoint;
}
@ -138,13 +152,31 @@ export class ChatCompressionService {
}
}
// For manual /compress (force=true), if the last message is an orphaned model
// funcCall (agent interrupted/crashed before the response arrived), strip it
// before computing the split point. After stripping, the history ends cleanly
// (typically with a user funcResponse) and findCompressSplitPoint handles it
// through its normal logic — no special-casing needed.
//
// auto-compress (force=false) must NOT strip: it fires inside
// sendMessageStream() before the matching funcResponse is pushed onto the
// history, so the trailing funcCall is still active, not orphaned.
const lastMessage = curatedHistory[curatedHistory.length - 1];
const hasOrphanedFuncCall =
force &&
lastMessage?.role === 'model' &&
lastMessage.parts?.some((p) => !!p.functionCall);
const historyForSplit = hasOrphanedFuncCall
? curatedHistory.slice(0, -1)
: curatedHistory;
const splitPoint = findCompressSplitPoint(
curatedHistory,
historyForSplit,
1 - COMPRESSION_PRESERVE_THRESHOLD,
);
const historyToCompress = curatedHistory.slice(0, splitPoint);
const historyToKeep = curatedHistory.slice(splitPoint);
const historyToCompress = historyForSplit.slice(0, splitPoint);
const historyToKeep = historyForSplit.slice(splitPoint);
if (historyToCompress.length === 0) {
return {
@ -157,6 +189,35 @@ export class ChatCompressionService {
};
}
// Guard: if historyToCompress is too small relative to the total history,
// skip compression. This prevents futile API calls where the model receives
// almost no context and generates a useless "summary" that inflates tokens.
//
// Note: findCompressSplitPoint already computes charCounts internally but
// returns only the split index. We intentionally recompute here to keep
// the function signature simple; this is a minor, acceptable duplication.
const compressCharCount = historyToCompress.reduce(
(sum, c) => sum + JSON.stringify(c).length,
0,
);
const totalCharCount = historyForSplit.reduce(
(sum, c) => sum + JSON.stringify(c).length,
0,
);
if (
totalCharCount > 0 &&
compressCharCount / totalCharCount < MIN_COMPRESSION_FRACTION
) {
return {
newHistory: null,
info: {
originalTokenCount,
newTokenCount: originalTokenCount,
compressionStatus: CompressionStatus.NOOP,
},
};
}
const summaryResponse = await config.getContentGenerator().generateContent(
{
model,