From 03d91f1ef1720e66aaf926c4b891e784041cf7ef Mon Sep 17 00:00:00 2001 From: wenshao Date: Sun, 5 Apr 2026 06:35:42 +0800 Subject: [PATCH 1/5] feat(core): thinking block cross-turn retention with idle cleanup Previously, all thinking blocks were unconditionally stripped from history on every new user query. This caused loss of reasoning coherence in active multi-turn sessions where thinking context was still valuable. Now thinking blocks are preserved during active sessions and only cleaned up after >1h idle (cache TTL expired), keeping the most recent 1 turn. A sticky-on latch prevents the cleanup from reverting, protecting the newly-warmed cache prefix from invalidation. - Add `stripThoughtsFromHistoryKeepRecent(keepTurns)` to GeminiChat - Add `lastApiCompletionTimestamp` and `thinkingClearLatched` to GeminiClient - Replace unconditional strip with idle-aware logic in sendMessageStream - Track API completion timestamp on all exit paths (success/error/loop) - Reset latch and timestamp on resetChat() - Add 5 unit tests for the new method, update 18 mock objects Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/core/client.test.ts | 131 ++++++++++++++++++++ packages/core/src/core/client.ts | 55 ++++++++- packages/core/src/core/geminiChat.test.ts | 144 ++++++++++++++++++++++ packages/core/src/core/geminiChat.ts | 77 ++++++++++++ 4 files changed, 405 insertions(+), 2 deletions(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 19dd88bcf..b23b0ece9 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -427,6 +427,119 @@ describe('Gemini Client (client.ts)', () => { }); }); + describe('thinking block idle cleanup and latch', () => { + let mockChat: Partial; + + beforeEach(() => { + const mockStream = (async function* () { + yield { + type: GeminiEventType.Content, + value: 'response', + }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + mockChat = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), + }; + client['chat'] = mockChat as GeminiChat; + }); + + it('should not strip thoughts on active session (< 1h idle)', async () => { + // Simulate a recent API completion (5 minutes ago) + client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000; + client['thinkingClearLatched'] = false; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-1', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect( + mockChat.stripThoughtsFromHistoryKeepRecent, + ).not.toHaveBeenCalled(); + }); + + it('should latch and strip thoughts after > 1h idle', async () => { + // Simulate an old API completion (2 hours ago) + client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000; + client['thinkingClearLatched'] = false; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-2', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['thinkingClearLatched']).toBe(true); + expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith( + 1, + ); + }); + + it('should keep stripping once latched even if idle < 1h', async () => { + // Pre-set latch with a recent timestamp + client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000; + client['thinkingClearLatched'] = true; + + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-3', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['thinkingClearLatched']).toBe(true); + expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith( + 1, + ); + }); + + it('should update lastApiCompletionTimestamp after API call', async () => { + client['lastApiCompletionTimestamp'] = null; + + const before = Date.now(); + const gen = client.sendMessageStream( + [{ text: 'Hello' }], + new AbortController().signal, + 'prompt-4', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of gen) { + /* drain */ + } + + expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual( + before, + ); + }); + + it('should reset latch and timestamp on resetChat', async () => { + client['lastApiCompletionTimestamp'] = Date.now(); + client['thinkingClearLatched'] = true; + + await client.resetChat(); + + expect(client['thinkingClearLatched']).toBe(false); + expect(client['lastApiCompletionTimestamp']).toBeNull(); + }); + }); + describe('tryCompressChat', () => { const mockGetHistory = vi.fn(); @@ -436,6 +549,7 @@ describe('Gemini Client (client.ts)', () => { addHistory: vi.fn(), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), } as unknown as GeminiChat; }); @@ -457,6 +571,7 @@ describe('Gemini Client (client.ts)', () => { getHistory: vi.fn((_curated?: boolean) => chatHistory), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockOriginalChat as GeminiChat; @@ -1149,6 +1264,7 @@ describe('Gemini Client (client.ts)', () => { addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), } as unknown as GeminiChat; client['chat'] = mockChat; @@ -1204,6 +1320,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1260,6 +1377,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1326,6 +1444,7 @@ hello addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1365,6 +1484,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1410,6 +1530,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1498,6 +1619,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1555,6 +1677,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -1636,6 +1759,7 @@ Other open files: { role: 'user', parts: [{ text: 'previous message' }] }, ]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; }); @@ -1889,6 +2013,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), // Default empty history setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2228,6 +2353,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2265,6 +2391,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2305,6 +2432,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2329,6 +2457,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), stripOrphanedUserEntriesFromHistory: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2361,6 +2490,7 @@ Other open files: getHistory: vi.fn().mockReturnValue([]), setHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), stripOrphanedUserEntriesFromHistory: vi.fn(), }; client['chat'] = mockChat as GeminiChat; @@ -2405,6 +2535,7 @@ Other open files: addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), stripThoughtsFromHistory: vi.fn(), + stripThoughtsFromHistoryKeepRecent: vi.fn(), }; client['chat'] = mockChat as GeminiChat; }); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 0f985364b..128e8f8ba 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -111,6 +111,13 @@ export interface SendMessageOptions { }; } +/** + * Idle threshold for thinking block cleanup. After this period without any + * API call the old thinking blocks are unlikely to aid reasoning coherence + * and only waste context tokens. + */ +const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour + export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; @@ -126,6 +133,25 @@ export class GeminiClient { */ private hasFailedCompressionAttempt = false; + /** + * Timestamp (epoch ms) of the last completed API call. + * Used to detect idle periods for thinking block cleanup. + * Starts as null — on the first query there is no prior thinking to clean, + * so the idle check is skipped until the first API call completes. + */ + private lastApiCompletionTimestamp: number | null = null; + + /** + * Sticky-on latch for clearing thinking blocks from prior turns. + * Triggered when >1h since last API call — old thinking is no longer + * useful for reasoning coherence. Once latched, stays true to prevent + * oscillation: without it, thinking would accumulate → get stripped → + * accumulate again, causing the message prefix to change repeatedly + * (bad for any provider-side prompt caching and wastes context). + * Reset on /clear (resetChat). + */ + private thinkingClearLatched = false; + constructor(private readonly config: Config) { this.loopDetector = new LoopDetectionService(config); } @@ -199,6 +225,9 @@ export class GeminiClient { } async resetChat(): Promise { + // Reset thinking clear latch — fresh chat, no prior thinking to clean up + this.thinkingClearLatched = false; + this.lastApiCompletionTimestamp = null; await this.startChat(); } @@ -537,8 +566,24 @@ export class GeminiClient { // record user message for session management this.config.getChatRecordingService()?.recordUserMessage(request); - // strip thoughts from history before sending the message - this.stripThoughtsFromHistory(); + // Thinking block cross-turn retention with idle cleanup: + // - Active session (< 1h idle): keep thinking blocks for reasoning coherence + // - Idle > 1h: clear old thinking, keep only last 1 turn to free context + // - Latch: once triggered, never revert — prevents oscillation + if ( + !this.thinkingClearLatched && + this.lastApiCompletionTimestamp !== null + ) { + if ( + Date.now() - this.lastApiCompletionTimestamp > + THINKING_IDLE_THRESHOLD_MS + ) { + this.thinkingClearLatched = true; + } + } + if (this.thinkingClearLatched) { + this.getChat().stripThoughtsFromHistoryKeepRecent(1); + } } if (messageType !== SendMessageType.Retry) { this.sessionTurnCount++; @@ -680,6 +725,7 @@ export class GeminiClient { if (arenaAgentClient) { await arenaAgentClient.reportError('Loop detected'); } + this.lastApiCompletionTimestamp = Date.now(); return turn; } } @@ -698,9 +744,14 @@ export class GeminiClient { : 'Unknown error'; await arenaAgentClient.reportError(errorMsg); } + this.lastApiCompletionTimestamp = Date.now(); return turn; } } + + // Track API completion time for thinking block idle cleanup + this.lastApiCompletionTimestamp = Date.now(); + // Fire Stop hook through MessageBus (only if hooks are enabled and registered) // This must be done before any early returns to ensure hooks are always triggered if ( diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 4a4781388..9d9b45caf 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -1767,6 +1767,150 @@ describe('GeminiChat', async () => { }); }); + describe('stripThoughtsFromHistoryKeepRecent', () => { + it('should keep the most recent N model turns with thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [ + { text: 'old thinking', thought: true }, + { text: 'response1' }, + ], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'mid thinking', thought: true }, + { text: 'response2' }, + ], + }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response3' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // First two model turns should have thoughts stripped + expect(history[1]!.parts).toEqual([{ text: 'response1' }]); + expect(history[3]!.parts).toEqual([{ text: 'response2' }]); + // Last model turn should keep thoughts + expect(history[5]!.parts).toEqual([ + { text: 'recent thinking', thought: true }, + { text: 'response3' }, + ]); + }); + + it('should not strip anything when keepTurns >= model turns with thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'thinking', thought: true }, { text: 'response' }], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + expect(history[1]!.parts).toEqual([ + { text: 'thinking', thought: true }, + { text: 'response' }, + ]); + }); + + it('should remove model content objects that become empty after stripping', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'only thinking', thought: true }], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // The first model turn (only thoughts) should be removed entirely + expect(history).toHaveLength(3); + expect(history[0]!.parts).toEqual([{ text: 'msg1' }]); + expect(history[1]!.parts).toEqual([{ text: 'msg2' }]); + expect(history[2]!.parts).toEqual([ + { text: 'recent thinking', thought: true }, + { text: 'response' }, + ]); + }); + + it('should also strip thoughtSignature from stripped turns', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [ + { text: 'old thinking', thought: true }, + { + text: 'with sig', + thoughtSignature: 'sig1', + } as unknown as { text: string; thoughtSignature: string }, + { text: 'response1' }, + ], + }, + { role: 'user', parts: [{ text: 'msg2' }] }, + { + role: 'model', + parts: [ + { text: 'recent thinking', thought: true }, + { text: 'response2' }, + ], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(1); + + const history = chat.getHistory(); + // First model turn: thought stripped, thoughtSignature stripped + expect(history[1]!.parts).toEqual([ + { text: 'with sig' }, + { text: 'response1' }, + ]); + expect( + (history[1]!.parts![0] as { thoughtSignature?: string }) + .thoughtSignature, + ).toBeUndefined(); + }); + + it('should handle keepTurns=0 by stripping all thoughts', () => { + chat.setHistory([ + { role: 'user', parts: [{ text: 'msg1' }] }, + { + role: 'model', + parts: [{ text: 'thinking', thought: true }, { text: 'response' }], + }, + ]); + + chat.stripThoughtsFromHistoryKeepRecent(0); + + const history = chat.getHistory(); + expect(history[1]!.parts).toEqual([{ text: 'response' }]); + }); + }); + describe('stripOrphanedUserEntriesFromHistory', () => { it('should pop a single trailing user entry', () => { chat.setHistory([ diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 12dfcb080..f622e16ca 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -576,6 +576,83 @@ export class GeminiChat { .filter((content) => content.parts && content.parts.length > 0); } + /** + * Strip thought parts from history, keeping the most recent `keepTurns` + * model turns' thinking blocks intact. + * + * Used for idle cleanup: after >1h idle the old thinking blocks are no + * longer useful for reasoning coherence but still consume context tokens. + * Keeping the most recent turn preserves the latest reasoning chain. + */ + stripThoughtsFromHistoryKeepRecent(keepTurns: number): void { + keepTurns = Math.max(0, Math.floor(keepTurns)); + + // Find indices of model turns that contain thought parts + const modelTurnIndices: number[] = []; + for (let i = 0; i < this.history.length; i++) { + const content = this.history[i]; + if ( + content.role === 'model' && + content.parts?.some( + (part) => + part && + typeof part === 'object' && + 'thought' in part && + part.thought, + ) + ) { + modelTurnIndices.push(i); + } + } + + // Determine which model turns to keep (the most recent `keepTurns`) + const turnsToStrip = new Set( + modelTurnIndices.slice( + 0, + Math.max(0, modelTurnIndices.length - keepTurns), + ), + ); + + if (turnsToStrip.size === 0) return; + + this.history = this.history + .map((content, index) => { + if (!turnsToStrip.has(index) || !content.parts) return content; + + // Strip thought parts from this turn + const filteredParts = content.parts + .filter( + (part) => + !( + part && + typeof part === 'object' && + 'thought' in part && + part.thought + ), + ) + .map((part) => { + if ( + part && + typeof part === 'object' && + 'thoughtSignature' in part + ) { + const newPart = { ...part }; + delete (newPart as { thoughtSignature?: string }) + .thoughtSignature; + return newPart; + } + return part; + }); + + return { + ...content, + parts: filteredParts, + }; + }) + // Remove Content objects that have no parts left after filtering + .filter((content) => content.parts && content.parts.length > 0); + } + /** * Pop all orphaned trailing user entries from chat history. * In a valid conversation the last entry is always a model response; From e383bcebb26f2b65083266664b6ac05d33cfb082 Mon Sep 17 00:00:00 2001 From: wenshao Date: Sun, 5 Apr 2026 07:20:01 +0800 Subject: [PATCH 2/5] docs: clarify stripThoughtsFromHistoryKeepRecent JSDoc semantics Explicitly document that keepTurns selects from thought-containing model turns (not all model turns) to ensure the most recent reasoning chain is preserved even if later turns have no thinking. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/core/geminiChat.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index f622e16ca..4c3f39c7e 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -578,11 +578,14 @@ export class GeminiChat { /** * Strip thought parts from history, keeping the most recent `keepTurns` - * model turns' thinking blocks intact. + * model turns that contain thinking blocks intact. + * + * Selection is based on thought-containing turns specifically (not all + * model turns) so the most recent reasoning chain is always preserved + * even if later model turns happen to have no thinking. * * Used for idle cleanup: after >1h idle the old thinking blocks are no * longer useful for reasoning coherence but still consume context tokens. - * Keeping the most recent turn preserves the latest reasoning chain. */ stripThoughtsFromHistoryKeepRecent(keepTurns: number): void { keepTurns = Math.max(0, Math.floor(keepTurns)); From 04ce8faff4215843638ccb445b5a2a1cc18bdf4f Mon Sep 17 00:00:00 2001 From: wenshao Date: Sun, 5 Apr 2026 11:03:55 +0800 Subject: [PATCH 3/5] fix: handle NaN/non-finite keepTurns in stripThoughtsFromHistoryKeepRecent Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/core/geminiChat.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 4c3f39c7e..e9a0a1ecc 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -588,7 +588,9 @@ export class GeminiChat { * longer useful for reasoning coherence but still consume context tokens. */ stripThoughtsFromHistoryKeepRecent(keepTurns: number): void { - keepTurns = Math.max(0, Math.floor(keepTurns)); + keepTurns = Number.isFinite(keepTurns) + ? Math.max(0, Math.floor(keepTurns)) + : 0; // Find indices of model turns that contain thought parts const modelTurnIndices: number[] = []; From 4bbdfee911562fba3bb31422c7d7a64525193436 Mon Sep 17 00:00:00 2001 From: wenshao Date: Sun, 5 Apr 2026 12:14:42 +0800 Subject: [PATCH 4/5] chore: add debug logging for thinking block idle cleanup Log when the latch triggers and when old thinking blocks are stripped, making the behavior observable via QWEN_CODE_DEBUG=CLIENT. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/core/client.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 128e8f8ba..2d730e771 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -574,15 +574,17 @@ export class GeminiClient { !this.thinkingClearLatched && this.lastApiCompletionTimestamp !== null ) { - if ( - Date.now() - this.lastApiCompletionTimestamp > - THINKING_IDLE_THRESHOLD_MS - ) { + const idleMs = Date.now() - this.lastApiCompletionTimestamp; + if (idleMs > THINKING_IDLE_THRESHOLD_MS) { this.thinkingClearLatched = true; + debugLogger.debug( + `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`, + ); } } if (this.thinkingClearLatched) { this.getChat().stripThoughtsFromHistoryKeepRecent(1); + debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)'); } } if (messageType !== SendMessageType.Retry) { From 6a55a9aeea1ec9cdb1f71567b937970c0b68e5b6 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 8 Apr 2026 14:21:06 +0800 Subject: [PATCH 5/5] feat(config): make thinking idle threshold configurable and lower default to 5min Align with observed provider prompt-cache TTL (~5 min). Add `context.gapThresholdMinutes` setting so users can tune the threshold for providers with different cache TTLs. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/users/configuration/settings.md | 1 + packages/cli/src/config/config.ts | 1 + packages/cli/src/config/settingsSchema.ts | 10 +++++++ packages/core/src/config/config.ts | 9 +++++++ packages/core/src/core/client.test.ts | 19 +++++++------- packages/core/src/core/client.ts | 26 +++++++------------ packages/core/src/core/geminiChat.ts | 5 ++-- .../schemas/settings.schema.json | 5 ++++ 8 files changed, 49 insertions(+), 27 deletions(-) diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index 9389ba8f5..f27140fec 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s | `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` | | `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` | | `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` | +| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` | #### Troubleshooting File Search Performance diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 3d4ed84e2..2b64685f7 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1069,6 +1069,7 @@ export async function loadCliConfig( telemetry: telemetrySettings, usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true, fileFiltering: settings.context?.fileFiltering, + thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes, checkpointing: argv.checkpointing || settings.general?.checkpointing?.enabled, proxy: diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index e765dd801..4645d5803 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -914,6 +914,16 @@ const SETTINGS_SCHEMA = { }, }, }, + gapThresholdMinutes: { + type: 'number', + label: 'Thinking Block Idle Threshold (minutes)', + category: 'Context', + requiresRestart: false, + default: 5, + description: + 'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.', + showInDialog: false, + }, }, }, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index c54b55705..a9e47f99f 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -370,6 +370,8 @@ export interface ConfigParameters { model?: string; outputLanguageFilePath?: string; maxSessionTurns?: number; + /** Minutes of inactivity before clearing retained thinking blocks. */ + thinkingIdleThresholdMinutes?: number; sessionTokenLimit?: number; experimentalZedIntegration?: boolean; cronEnabled?: boolean; @@ -557,6 +559,7 @@ export class Config { private ideMode: boolean; private readonly maxSessionTurns: number; + private readonly thinkingIdleThresholdMs: number; private readonly sessionTokenLimit: number; private readonly listExtensions: boolean; private readonly overrideExtensions?: string[]; @@ -683,6 +686,8 @@ export class Config { this.fileDiscoveryService = params.fileDiscoveryService ?? null; this.bugCommand = params.bugCommand; this.maxSessionTurns = params.maxSessionTurns ?? -1; + this.thinkingIdleThresholdMs = + (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000; this.sessionTokenLimit = params.sessionTokenLimit ?? -1; this.experimentalZedIntegration = params.experimentalZedIntegration ?? false; @@ -1329,6 +1334,10 @@ export class Config { return this.maxSessionTurns; } + getThinkingIdleThresholdMs(): number { + return this.thinkingIdleThresholdMs; + } + getSessionTokenLimit(): number { return this.sessionTokenLimit; } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index b23b0ece9..9f7ead5c6 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => { getWorkingDir: vi.fn().mockReturnValue('/test/dir'), getFileService: vi.fn().mockReturnValue(fileService), getMaxSessionTurns: vi.fn().mockReturnValue(0), + getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000), getSessionTokenLimit: vi.fn().mockReturnValue(32000), getNoBrowser: vi.fn().mockReturnValue(false), getUsageStatisticsEnabled: vi.fn().mockReturnValue(true), @@ -448,9 +449,9 @@ describe('Gemini Client (client.ts)', () => { client['chat'] = mockChat as GeminiChat; }); - it('should not strip thoughts on active session (< 1h idle)', async () => { - // Simulate a recent API completion (5 minutes ago) - client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000; + it('should not strip thoughts on active session (< 5min idle)', async () => { + // Simulate a recent API completion (2 minutes ago — within default 5 min threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000; client['thinkingClearLatched'] = false; const gen = client.sendMessageStream( @@ -468,9 +469,9 @@ describe('Gemini Client (client.ts)', () => { ).not.toHaveBeenCalled(); }); - it('should latch and strip thoughts after > 1h idle', async () => { - // Simulate an old API completion (2 hours ago) - client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000; + it('should latch and strip thoughts after > 5min idle', async () => { + // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000; client['thinkingClearLatched'] = false; const gen = client.sendMessageStream( @@ -489,9 +490,9 @@ describe('Gemini Client (client.ts)', () => { ); }); - it('should keep stripping once latched even if idle < 1h', async () => { - // Pre-set latch with a recent timestamp - client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000; + it('should keep stripping once latched even if idle < 5min', async () => { + // Pre-set latch with a recent timestamp (2 minutes ago — within threshold) + client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000; client['thinkingClearLatched'] = true; const gen = client.sendMessageStream( diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 2d730e771..13fc86aaa 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -111,13 +111,6 @@ export interface SendMessageOptions { }; } -/** - * Idle threshold for thinking block cleanup. After this period without any - * API call the old thinking blocks are unlikely to aid reasoning coherence - * and only waste context tokens. - */ -const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour - export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; @@ -143,11 +136,11 @@ export class GeminiClient { /** * Sticky-on latch for clearing thinking blocks from prior turns. - * Triggered when >1h since last API call — old thinking is no longer - * useful for reasoning coherence. Once latched, stays true to prevent - * oscillation: without it, thinking would accumulate → get stripped → - * accumulate again, causing the message prefix to change repeatedly - * (bad for any provider-side prompt caching and wastes context). + * Triggered when idle exceeds the configured threshold (default 5 min, + * aligned with provider prompt-cache TTL). Once latched, stays true to + * prevent oscillation: without it, thinking would accumulate → get + * stripped → accumulate again, causing the message prefix to change + * repeatedly (bad for provider-side prompt caching and wastes context). * Reset on /clear (resetChat). */ private thinkingClearLatched = false; @@ -567,18 +560,19 @@ export class GeminiClient { this.config.getChatRecordingService()?.recordUserMessage(request); // Thinking block cross-turn retention with idle cleanup: - // - Active session (< 1h idle): keep thinking blocks for reasoning coherence - // - Idle > 1h: clear old thinking, keep only last 1 turn to free context + // - Active session (< threshold idle): keep thinking blocks for reasoning coherence + // - Idle > threshold: clear old thinking, keep only last 1 turn to free context // - Latch: once triggered, never revert — prevents oscillation if ( !this.thinkingClearLatched && this.lastApiCompletionTimestamp !== null ) { + const thresholdMs = this.config.getThinkingIdleThresholdMs(); const idleMs = Date.now() - this.lastApiCompletionTimestamp; - if (idleMs > THINKING_IDLE_THRESHOLD_MS) { + if (idleMs > thresholdMs) { this.thinkingClearLatched = true; debugLogger.debug( - `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`, + `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`, ); } } diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index e9a0a1ecc..5fd6caf03 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -584,8 +584,9 @@ export class GeminiChat { * model turns) so the most recent reasoning chain is always preserved * even if later model turns happen to have no thinking. * - * Used for idle cleanup: after >1h idle the old thinking blocks are no - * longer useful for reasoning coherence but still consume context tokens. + * Used for idle cleanup: after exceeding the configured idle threshold + * the old thinking blocks are no longer useful for reasoning coherence + * but still consume context tokens. */ stripThoughtsFromHistoryKeepRecent(keepTurns: number): void { keepTurns = Number.isFinite(keepTurns) diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index 4f92b74d7..fdd4fbbb1 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -383,6 +383,11 @@ "default": true } } + }, + "gapThresholdMinutes": { + "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.", + "type": "number", + "default": 5 } } },