mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-05 15:31:27 +00:00
Merge pull request #2659 from QwenLM/fix/compress-split-point-tool-heavy-conversations
fix: make /compress handle tool-heavy conversations correctly
This commit is contained in:
commit
92132ec8fa
2 changed files with 418 additions and 4 deletions
|
|
@ -100,6 +100,173 @@ describe('findCompressSplitPoint', () => {
|
|||
];
|
||||
expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2);
|
||||
});
|
||||
|
||||
it('should compress everything when last message is a functionResponse', () => {
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Fix this bug' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'readFile', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'readFile',
|
||||
response: { result: 'file content' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'writeFile', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'writeFile',
|
||||
response: { result: 'ok' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
// Last message is functionResponse -> safe to compress everything
|
||||
expect(findCompressSplitPoint(history, 0.7)).toBe(5);
|
||||
});
|
||||
|
||||
it('should return primary split point when tool completions have no subsequent regular user message', () => {
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Fix this' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'read1', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'read1',
|
||||
response: { result: 'a'.repeat(1000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'read2', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'read2',
|
||||
response: { result: 'b'.repeat(1000) },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'write1', args: {} } }],
|
||||
},
|
||||
];
|
||||
// Only one non-functionResponse user message (index 0) -> lastSplitPoint=0
|
||||
// Last message has functionCall -> can't compress everything
|
||||
// historyToKeep must start with a regular user message, so split at 0
|
||||
// (compress nothing) is the only valid option.
|
||||
expect(findCompressSplitPoint(history, 0.7)).toBe(0);
|
||||
});
|
||||
|
||||
it('should prefer primary split point when tool completions yield no valid user-starting split', () => {
|
||||
const longContent = 'a'.repeat(10000);
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Fix bug A' }] },
|
||||
{ role: 'model', parts: [{ text: 'OK' }] },
|
||||
{ role: 'user', parts: [{ text: 'Fix bug B' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'read1', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'read1',
|
||||
response: { result: longContent },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'read2', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'read2',
|
||||
response: { result: longContent },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'write1', args: {} } }],
|
||||
},
|
||||
];
|
||||
// Primary split points at 0 and 2 (regular user messages before the bulky tool outputs)
|
||||
// Last message has functionCall -> can't compress everything
|
||||
// Should return lastSplitPoint=2 (last valid primary split point)
|
||||
expect(findCompressSplitPoint(history, 0.7)).toBe(2);
|
||||
});
|
||||
|
||||
it('should still prefer primary split point when it is better', () => {
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{ role: 'model', parts: [{ text: 'resp1' }] },
|
||||
{
|
||||
role: 'user',
|
||||
parts: [{ text: 'msg2 with some substantial content here' }],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'tool1', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'tool1',
|
||||
response: { result: 'short' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'msg3' }] },
|
||||
{ role: 'model', parts: [{ text: 'resp3' }] },
|
||||
{ role: 'user', parts: [{ text: 'msg4' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'tool2', args: {} } }],
|
||||
},
|
||||
];
|
||||
// Primary split points: 0, 2, 5, 7
|
||||
// Last message has functionCall -> can't compress everything
|
||||
// At 0.99 fraction, lastSplitPoint should be 7
|
||||
expect(findCompressSplitPoint(history, 0.99)).toBe(7);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ChatCompressionService', () => {
|
||||
|
|
@ -240,6 +407,47 @@ describe('ChatCompressionService', () => {
|
|||
expect(tokenLimit).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return NOOP when historyToCompress is below MIN_COMPRESSION_FRACTION of total', async () => {
|
||||
// Construct a history where the split point lands on the 2nd regular user
|
||||
// message (index 2), but indices 0-1 are tiny relative to the huge content
|
||||
// at index 2. historyToCompress = [0,1] will be << 5% of totalCharCount.
|
||||
const hugeContent = 'x'.repeat(100000);
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'hello' }] },
|
||||
{ role: 'model', parts: [{ text: 'world' }] },
|
||||
// Huge user message pushes the cumulative well past the split threshold
|
||||
{ role: 'user', parts: [{ text: hugeContent }] },
|
||||
// Pending functionCall prevents returning contents.length,
|
||||
// so the fallback split at index 2 is used
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'process', args: {} } }],
|
||||
},
|
||||
];
|
||||
vi.mocked(mockChat.getHistory).mockReturnValue(history);
|
||||
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100);
|
||||
vi.mocked(tokenLimit).mockReturnValue(1000);
|
||||
|
||||
const mockGenerateContent = vi.fn();
|
||||
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
|
||||
generateContent: mockGenerateContent,
|
||||
} as unknown as ContentGenerator);
|
||||
|
||||
// force=true bypasses the token threshold gate so we exercise the 5% guard
|
||||
const result = await service.compress(
|
||||
mockChat,
|
||||
mockPromptId,
|
||||
true,
|
||||
mockModel,
|
||||
mockConfig,
|
||||
false,
|
||||
);
|
||||
|
||||
expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
|
||||
expect(result.newHistory).toBeNull();
|
||||
expect(mockGenerateContent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should compress if over token threshold', async () => {
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
|
|
@ -934,4 +1142,149 @@ describe('ChatCompressionService', () => {
|
|||
expect(mockFirePreCompactEvent).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('orphaned trailing funcCall handling', () => {
|
||||
it('should compress everything when force=true and last message is an orphaned funcCall', async () => {
|
||||
// Issue #2647: tool-heavy conversation interrupted/crashed while a tool
|
||||
// was still running. The funcCall will never get a response since the agent
|
||||
// is idle. Manual /compress strips the orphaned funcCall, then compresses
|
||||
// the remaining history normally.
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'glob', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'glob',
|
||||
response: { result: 'files...' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'readFile', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'readFile',
|
||||
response: { result: 'code...' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// orphaned funcCall — agent was interrupted before getting a response
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'editFile', args: {} } }],
|
||||
},
|
||||
];
|
||||
vi.mocked(mockChat.getHistory).mockReturnValue(history);
|
||||
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
|
||||
100,
|
||||
);
|
||||
vi.mocked(tokenLimit).mockReturnValue(1000);
|
||||
|
||||
const mockGenerateContent = vi.fn().mockResolvedValue({
|
||||
candidates: [
|
||||
{ content: { parts: [{ text: 'Summary of all work done' }] } },
|
||||
],
|
||||
usageMetadata: {
|
||||
promptTokenCount: 1100,
|
||||
candidatesTokenCount: 50,
|
||||
totalTokenCount: 1150,
|
||||
},
|
||||
} as unknown as GenerateContentResponse);
|
||||
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
|
||||
generateContent: mockGenerateContent,
|
||||
} as unknown as ContentGenerator);
|
||||
|
||||
const result = await service.compress(
|
||||
mockChat,
|
||||
mockPromptId,
|
||||
true, // force=true (manual /compress)
|
||||
mockModel,
|
||||
mockConfig,
|
||||
false,
|
||||
);
|
||||
|
||||
// Should compress successfully — orphaned funcCall is stripped first, then
|
||||
// normal compression runs on the remaining history, historyToKeep is empty
|
||||
expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
|
||||
expect(result.newHistory).not.toBeNull();
|
||||
// Reconstructed history: [User(summary), Model("Got it...")] — valid structure
|
||||
expect(result.newHistory).toHaveLength(2);
|
||||
expect(result.newHistory![0].role).toBe('user');
|
||||
expect(result.newHistory![1].role).toBe('model');
|
||||
// The orphaned funcCall is stripped before compression, so only the first 5
|
||||
// messages are sent, plus the compression instruction (+1) = history.length total.
|
||||
const callArg = mockGenerateContent.mock.calls[0][0];
|
||||
expect(callArg.contents.length).toBe(history.length); // (history.length - 1) messages + 1 instruction
|
||||
});
|
||||
|
||||
it('should NOT compress orphaned funcCall when force=false (auto-compress)', async () => {
|
||||
// Auto-compress fires BEFORE the matching funcResponse is sent back to the
|
||||
// model. Compressing the funcCall away would orphan the upcoming funcResponse
|
||||
// and cause an API error. So force=false must NOT take this path.
|
||||
const history: Content[] = [
|
||||
{ role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'glob', args: {} } }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: 'glob',
|
||||
response: { result: 'files...' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Pending funcCall: tool is currently executing, funcResponse is coming
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ functionCall: { name: 'readFile', args: {} } }],
|
||||
},
|
||||
];
|
||||
vi.mocked(mockChat.getHistory).mockReturnValue(history);
|
||||
// Use a token count above threshold to ensure auto-compress isn't skipped
|
||||
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
|
||||
800,
|
||||
);
|
||||
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
|
||||
model: 'gemini-pro',
|
||||
contextWindowSize: 1000,
|
||||
} as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
|
||||
|
||||
const mockGenerateContent = vi.fn();
|
||||
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
|
||||
generateContent: mockGenerateContent,
|
||||
} as unknown as ContentGenerator);
|
||||
|
||||
const result = await service.compress(
|
||||
mockChat,
|
||||
mockPromptId,
|
||||
false, // force=false (auto-compress)
|
||||
mockModel,
|
||||
mockConfig,
|
||||
false,
|
||||
);
|
||||
|
||||
// Must return NOOP — compressing would orphan the upcoming funcResponse
|
||||
expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
|
||||
expect(result.newHistory).toBeNull();
|
||||
expect(mockGenerateContent).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -29,6 +29,13 @@ export const COMPRESSION_TOKEN_THRESHOLD = 0.7;
|
|||
*/
|
||||
export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
|
||||
|
||||
/**
|
||||
* Minimum fraction of history (by character count) that must be compressible
|
||||
* to proceed with a compression API call. Prevents futile calls where the
|
||||
* model receives almost no context and generates a useless summary.
|
||||
*/
|
||||
export const MIN_COMPRESSION_FRACTION = 0.05;
|
||||
|
||||
/**
|
||||
* Returns the index of the oldest item to keep when compressing. May return
|
||||
* contents.length which indicates that everything should be compressed.
|
||||
|
|
@ -72,8 +79,15 @@ export function findCompressSplitPoint(
|
|||
) {
|
||||
return contents.length;
|
||||
}
|
||||
// Also safe to compress everything if the last message completes a tool call
|
||||
// sequence (all function calls have matching responses).
|
||||
if (
|
||||
lastContent?.role === 'user' &&
|
||||
lastContent?.parts?.some((part) => !!part.functionResponse)
|
||||
) {
|
||||
return contents.length;
|
||||
}
|
||||
|
||||
// Can't compress everything so just compress at last splitpoint.
|
||||
return lastSplitPoint;
|
||||
}
|
||||
|
||||
|
|
@ -138,13 +152,31 @@ export class ChatCompressionService {
|
|||
}
|
||||
}
|
||||
|
||||
// For manual /compress (force=true), if the last message is an orphaned model
|
||||
// funcCall (agent interrupted/crashed before the response arrived), strip it
|
||||
// before computing the split point. After stripping, the history ends cleanly
|
||||
// (typically with a user funcResponse) and findCompressSplitPoint handles it
|
||||
// through its normal logic — no special-casing needed.
|
||||
//
|
||||
// auto-compress (force=false) must NOT strip: it fires inside
|
||||
// sendMessageStream() before the matching funcResponse is pushed onto the
|
||||
// history, so the trailing funcCall is still active, not orphaned.
|
||||
const lastMessage = curatedHistory[curatedHistory.length - 1];
|
||||
const hasOrphanedFuncCall =
|
||||
force &&
|
||||
lastMessage?.role === 'model' &&
|
||||
lastMessage.parts?.some((p) => !!p.functionCall);
|
||||
const historyForSplit = hasOrphanedFuncCall
|
||||
? curatedHistory.slice(0, -1)
|
||||
: curatedHistory;
|
||||
|
||||
const splitPoint = findCompressSplitPoint(
|
||||
curatedHistory,
|
||||
historyForSplit,
|
||||
1 - COMPRESSION_PRESERVE_THRESHOLD,
|
||||
);
|
||||
|
||||
const historyToCompress = curatedHistory.slice(0, splitPoint);
|
||||
const historyToKeep = curatedHistory.slice(splitPoint);
|
||||
const historyToCompress = historyForSplit.slice(0, splitPoint);
|
||||
const historyToKeep = historyForSplit.slice(splitPoint);
|
||||
|
||||
if (historyToCompress.length === 0) {
|
||||
return {
|
||||
|
|
@ -157,6 +189,35 @@ export class ChatCompressionService {
|
|||
};
|
||||
}
|
||||
|
||||
// Guard: if historyToCompress is too small relative to the total history,
|
||||
// skip compression. This prevents futile API calls where the model receives
|
||||
// almost no context and generates a useless "summary" that inflates tokens.
|
||||
//
|
||||
// Note: findCompressSplitPoint already computes charCounts internally but
|
||||
// returns only the split index. We intentionally recompute here to keep
|
||||
// the function signature simple; this is a minor, acceptable duplication.
|
||||
const compressCharCount = historyToCompress.reduce(
|
||||
(sum, c) => sum + JSON.stringify(c).length,
|
||||
0,
|
||||
);
|
||||
const totalCharCount = historyForSplit.reduce(
|
||||
(sum, c) => sum + JSON.stringify(c).length,
|
||||
0,
|
||||
);
|
||||
if (
|
||||
totalCharCount > 0 &&
|
||||
compressCharCount / totalCharCount < MIN_COMPRESSION_FRACTION
|
||||
) {
|
||||
return {
|
||||
newHistory: null,
|
||||
info: {
|
||||
originalTokenCount,
|
||||
newTokenCount: originalTokenCount,
|
||||
compressionStatus: CompressionStatus.NOOP,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const summaryResponse = await config.getContentGenerator().generateContent(
|
||||
{
|
||||
model,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue