Merge pull request #2897 from QwenLM/feat/thinking-cross-turn-retention-idle-cleanup

feat(core): thinking block cross-turn retention with idle cleanup
This commit is contained in:
tanzhenxin 2026-04-08 15:26:53 +08:00 committed by GitHub
commit 3c23952ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 434 additions and 2 deletions

View file

@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` |
#### Troubleshooting File Search Performance

View file

@ -1069,6 +1069,7 @@ export async function loadCliConfig(
telemetry: telemetrySettings,
usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
fileFiltering: settings.context?.fileFiltering,
thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
checkpointing:
argv.checkpointing || settings.general?.checkpointing?.enabled,
proxy:

View file

@ -924,6 +924,16 @@ const SETTINGS_SCHEMA = {
},
},
},
gapThresholdMinutes: {
type: 'number',
label: 'Thinking Block Idle Threshold (minutes)',
category: 'Context',
requiresRestart: false,
default: 5,
description:
'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
showInDialog: false,
},
},
},

View file

@ -371,6 +371,8 @@ export interface ConfigParameters {
model?: string;
outputLanguageFilePath?: string;
maxSessionTurns?: number;
/** Minutes of inactivity before clearing retained thinking blocks. */
thinkingIdleThresholdMinutes?: number;
sessionTokenLimit?: number;
experimentalZedIntegration?: boolean;
cronEnabled?: boolean;
@ -559,6 +561,7 @@ export class Config {
private ideMode: boolean;
private readonly maxSessionTurns: number;
private readonly thinkingIdleThresholdMs: number;
private readonly sessionTokenLimit: number;
private readonly listExtensions: boolean;
private readonly overrideExtensions?: string[];
@ -685,6 +688,8 @@ export class Config {
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
this.bugCommand = params.bugCommand;
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.thinkingIdleThresholdMs =
(params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
this.experimentalZedIntegration =
params.experimentalZedIntegration ?? false;
@ -1331,6 +1336,10 @@ export class Config {
return this.maxSessionTurns;
}
getThinkingIdleThresholdMs(): number {
return this.thinkingIdleThresholdMs;
}
getSessionTokenLimit(): number {
return this.sessionTokenLimit;
}

View file

@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
getFileService: vi.fn().mockReturnValue(fileService),
getMaxSessionTurns: vi.fn().mockReturnValue(0),
getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
getSessionTokenLimit: vi.fn().mockReturnValue(32000),
getNoBrowser: vi.fn().mockReturnValue(false),
getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@ -427,6 +428,119 @@ describe('Gemini Client (client.ts)', () => {
});
});
describe('thinking block idle cleanup and latch', () => {
let mockChat: Partial<GeminiChat>;
beforeEach(() => {
const mockStream = (async function* () {
yield {
type: GeminiEventType.Content,
value: 'response',
};
})();
mockTurnRunFn.mockReturnValue(mockStream);
mockChat = {
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});
it('should not strip thoughts on active session (< 5min idle)', async () => {
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-1',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(
mockChat.stripThoughtsFromHistoryKeepRecent,
).not.toHaveBeenCalled();
});
it('should latch and strip thoughts after > 5min idle', async () => {
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-2',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should keep stripping once latched even if idle < 5min', async () => {
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = true;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-3',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should update lastApiCompletionTimestamp after API call', async () => {
client['lastApiCompletionTimestamp'] = null;
const before = Date.now();
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-4',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual(
before,
);
});
it('should reset latch and timestamp on resetChat', async () => {
client['lastApiCompletionTimestamp'] = Date.now();
client['thinkingClearLatched'] = true;
await client.resetChat();
expect(client['thinkingClearLatched']).toBe(false);
expect(client['lastApiCompletionTimestamp']).toBeNull();
});
});
describe('tryCompressChat', () => {
const mockGetHistory = vi.fn();
@ -436,6 +550,7 @@ describe('Gemini Client (client.ts)', () => {
addHistory: vi.fn(),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
} as unknown as GeminiChat;
});
@ -457,6 +572,7 @@ describe('Gemini Client (client.ts)', () => {
getHistory: vi.fn((_curated?: boolean) => chatHistory),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockOriginalChat as GeminiChat;
@ -1149,6 +1265,7 @@ describe('Gemini Client (client.ts)', () => {
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
} as unknown as GeminiChat;
client['chat'] = mockChat;
@ -1204,6 +1321,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1260,6 +1378,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1326,6 +1445,7 @@ hello
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1365,6 +1485,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1410,6 +1531,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1498,6 +1620,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1555,6 +1678,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1636,6 +1760,7 @@ Other open files:
{ role: 'user', parts: [{ text: 'previous message' }] },
]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});
@ -1889,6 +2014,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]), // Default empty history
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2228,6 +2354,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2265,6 +2392,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2305,6 +2433,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2329,6 +2458,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
stripOrphanedUserEntriesFromHistory: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2361,6 +2491,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
stripOrphanedUserEntriesFromHistory: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2405,6 +2536,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});

View file

@ -126,6 +126,25 @@ export class GeminiClient {
*/
private hasFailedCompressionAttempt = false;
/**
* Timestamp (epoch ms) of the last completed API call.
* Used to detect idle periods for thinking block cleanup.
* Starts as null on the first query there is no prior thinking to clean,
* so the idle check is skipped until the first API call completes.
*/
private lastApiCompletionTimestamp: number | null = null;
/**
* Sticky-on latch for clearing thinking blocks from prior turns.
* Triggered when idle exceeds the configured threshold (default 5 min,
* aligned with provider prompt-cache TTL). Once latched, stays true to
* prevent oscillation: without it, thinking would accumulate get
* stripped accumulate again, causing the message prefix to change
* repeatedly (bad for provider-side prompt caching and wastes context).
* Reset on /clear (resetChat).
*/
private thinkingClearLatched = false;
constructor(private readonly config: Config) {
this.loopDetector = new LoopDetectionService(config);
}
@ -199,6 +218,9 @@ export class GeminiClient {
}
async resetChat(): Promise<void> {
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
this.thinkingClearLatched = false;
this.lastApiCompletionTimestamp = null;
await this.startChat();
}
@ -537,8 +559,27 @@ export class GeminiClient {
// record user message for session management
this.config.getChatRecordingService()?.recordUserMessage(request);
// strip thoughts from history before sending the message
this.stripThoughtsFromHistory();
// Thinking block cross-turn retention with idle cleanup:
// - Active session (< threshold idle): keep thinking blocks for reasoning coherence
// - Idle > threshold: clear old thinking, keep only last 1 turn to free context
// - Latch: once triggered, never revert — prevents oscillation
if (
!this.thinkingClearLatched &&
this.lastApiCompletionTimestamp !== null
) {
const thresholdMs = this.config.getThinkingIdleThresholdMs();
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
if (idleMs > thresholdMs) {
this.thinkingClearLatched = true;
debugLogger.debug(
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
);
}
}
if (this.thinkingClearLatched) {
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
}
}
if (messageType !== SendMessageType.Retry) {
this.sessionTurnCount++;
@ -680,6 +721,7 @@ export class GeminiClient {
if (arenaAgentClient) {
await arenaAgentClient.reportError('Loop detected');
}
this.lastApiCompletionTimestamp = Date.now();
return turn;
}
}
@ -698,9 +740,14 @@ export class GeminiClient {
: 'Unknown error';
await arenaAgentClient.reportError(errorMsg);
}
this.lastApiCompletionTimestamp = Date.now();
return turn;
}
}
// Track API completion time for thinking block idle cleanup
this.lastApiCompletionTimestamp = Date.now();
// Fire Stop hook through MessageBus (only if hooks are enabled and registered)
// This must be done before any early returns to ensure hooks are always triggered
if (

View file

@ -1923,6 +1923,150 @@ describe('GeminiChat', async () => {
});
});
describe('stripThoughtsFromHistoryKeepRecent', () => {
it('should keep the most recent N model turns with thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [
{ text: 'old thinking', thought: true },
{ text: 'response1' },
],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'mid thinking', thought: true },
{ text: 'response2' },
],
},
{ role: 'user', parts: [{ text: 'msg3' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response3' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// First two model turns should have thoughts stripped
expect(history[1]!.parts).toEqual([{ text: 'response1' }]);
expect(history[3]!.parts).toEqual([{ text: 'response2' }]);
// Last model turn should keep thoughts
expect(history[5]!.parts).toEqual([
{ text: 'recent thinking', thought: true },
{ text: 'response3' },
]);
});
it('should not strip anything when keepTurns >= model turns with thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
expect(history[1]!.parts).toEqual([
{ text: 'thinking', thought: true },
{ text: 'response' },
]);
});
it('should remove model content objects that become empty after stripping', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'only thinking', thought: true }],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// The first model turn (only thoughts) should be removed entirely
expect(history).toHaveLength(3);
expect(history[0]!.parts).toEqual([{ text: 'msg1' }]);
expect(history[1]!.parts).toEqual([{ text: 'msg2' }]);
expect(history[2]!.parts).toEqual([
{ text: 'recent thinking', thought: true },
{ text: 'response' },
]);
});
it('should also strip thoughtSignature from stripped turns', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [
{ text: 'old thinking', thought: true },
{
text: 'with sig',
thoughtSignature: 'sig1',
} as unknown as { text: string; thoughtSignature: string },
{ text: 'response1' },
],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response2' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// First model turn: thought stripped, thoughtSignature stripped
expect(history[1]!.parts).toEqual([
{ text: 'with sig' },
{ text: 'response1' },
]);
expect(
(history[1]!.parts![0] as { thoughtSignature?: string })
.thoughtSignature,
).toBeUndefined();
});
it('should handle keepTurns=0 by stripping all thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(0);
const history = chat.getHistory();
expect(history[1]!.parts).toEqual([{ text: 'response' }]);
});
});
describe('stripOrphanedUserEntriesFromHistory', () => {
it('should pop a single trailing user entry', () => {
chat.setHistory([

View file

@ -625,6 +625,89 @@ export class GeminiChat {
.filter((content) => content.parts && content.parts.length > 0);
}
/**
* Strip thought parts from history, keeping the most recent `keepTurns`
* model turns that contain thinking blocks intact.
*
* Selection is based on thought-containing turns specifically (not all
* model turns) so the most recent reasoning chain is always preserved
* even if later model turns happen to have no thinking.
*
* Used for idle cleanup: after exceeding the configured idle threshold
* the old thinking blocks are no longer useful for reasoning coherence
* but still consume context tokens.
*/
stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
keepTurns = Number.isFinite(keepTurns)
? Math.max(0, Math.floor(keepTurns))
: 0;
// Find indices of model turns that contain thought parts
const modelTurnIndices: number[] = [];
for (let i = 0; i < this.history.length; i++) {
const content = this.history[i];
if (
content.role === 'model' &&
content.parts?.some(
(part) =>
part &&
typeof part === 'object' &&
'thought' in part &&
part.thought,
)
) {
modelTurnIndices.push(i);
}
}
// Determine which model turns to keep (the most recent `keepTurns`)
const turnsToStrip = new Set(
modelTurnIndices.slice(
0,
Math.max(0, modelTurnIndices.length - keepTurns),
),
);
if (turnsToStrip.size === 0) return;
this.history = this.history
.map((content, index) => {
if (!turnsToStrip.has(index) || !content.parts) return content;
// Strip thought parts from this turn
const filteredParts = content.parts
.filter(
(part) =>
!(
part &&
typeof part === 'object' &&
'thought' in part &&
part.thought
),
)
.map((part) => {
if (
part &&
typeof part === 'object' &&
'thoughtSignature' in part
) {
const newPart = { ...part };
delete (newPart as { thoughtSignature?: string })
.thoughtSignature;
return newPart;
}
return part;
});
return {
...content,
parts: filteredParts,
};
})
// Remove Content objects that have no parts left after filtering
.filter((content) => content.parts && content.parts.length > 0);
}
/**
* Pop all orphaned trailing user entries from chat history.
* In a valid conversation the last entry is always a model response;

View file

@ -388,6 +388,11 @@
"default": true
}
}
},
"gapThresholdMinutes": {
"description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
"type": "number",
"default": 5
}
}
},