From 03d91f1ef1720e66aaf926c4b891e784041cf7ef Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Sun, 5 Apr 2026 06:35:42 +0800
Subject: [PATCH 1/5] feat(core): thinking block cross-turn retention with idle
 cleanup

Previously, all thinking blocks were unconditionally stripped from history
on every new user query. This caused loss of reasoning coherence in active
multi-turn sessions where thinking context was still valuable.

Now thinking blocks are preserved during active sessions and only cleaned
up after >1h idle (cache TTL expired), keeping the most recent 1 turn.
A sticky-on latch prevents the cleanup from reverting, protecting the
newly-warmed cache prefix from invalidation.

- Add `stripThoughtsFromHistoryKeepRecent(keepTurns)` to GeminiChat
- Add `lastApiCompletionTimestamp` and `thinkingClearLatched` to GeminiClient
- Replace unconditional strip with idle-aware logic in sendMessageStream
- Track API completion timestamp on all exit paths (success/error/loop)
- Reset latch and timestamp on resetChat()
- Add 5 unit tests for the new method, update 18 mock objects

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/core/client.test.ts     | 131 ++++++++++++++++++++
 packages/core/src/core/client.ts          |  55 ++++++++-
 packages/core/src/core/geminiChat.test.ts | 144 ++++++++++++++++++++++
 packages/core/src/core/geminiChat.ts      |  77 ++++++++++++
 4 files changed, 405 insertions(+), 2 deletions(-)
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index 19dd88bcf..b23b0ece9 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -427,6 +427,119 @@ describe('Gemini Client (client.ts)', () => {
     });
   });
 
+  describe('thinking block idle cleanup and latch', () => {
+    let mockChat: Partial<GeminiChat>;
+
+    beforeEach(() => {
+      const mockStream = (async function* () {
+        yield {
+          type: GeminiEventType.Content,
+          value: 'response',
+        };
+      })();
+      mockTurnRunFn.mockReturnValue(mockStream);
+
+      mockChat = {
+        addHistory: vi.fn(),
+        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
+      };
+      client['chat'] = mockChat as GeminiChat;
+    });
+
+    it('should not strip thoughts on active session (< 1h idle)', async () => {
+      // Simulate a recent API completion (5 minutes ago)
+      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+      client['thinkingClearLatched'] = false;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-1',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(
+        mockChat.stripThoughtsFromHistoryKeepRecent,
+      ).not.toHaveBeenCalled();
+    });
+
+    it('should latch and strip thoughts after > 1h idle', async () => {
+      // Simulate an old API completion (2 hours ago)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000;
+      client['thinkingClearLatched'] = false;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-2',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['thinkingClearLatched']).toBe(true);
+      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
+        1,
+      );
+    });
+
+    it('should keep stripping once latched even if idle < 1h', async () => {
+      // Pre-set latch with a recent timestamp
+      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+      client['thinkingClearLatched'] = true;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-3',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['thinkingClearLatched']).toBe(true);
+      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
+        1,
+      );
+    });
+
+    it('should update lastApiCompletionTimestamp after API call', async () => {
+      client['lastApiCompletionTimestamp'] = null;
+
+      const before = Date.now();
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-4',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual(
+        before,
+      );
+    });
+
+    it('should reset latch and timestamp on resetChat', async () => {
+      client['lastApiCompletionTimestamp'] = Date.now();
+      client['thinkingClearLatched'] = true;
+
+      await client.resetChat();
+
+      expect(client['thinkingClearLatched']).toBe(false);
+      expect(client['lastApiCompletionTimestamp']).toBeNull();
+    });
+  });
+
   describe('tryCompressChat', () => {
     const mockGetHistory = vi.fn();
 
@@ -436,6 +549,7 @@ describe('Gemini Client (client.ts)', () => {
         addHistory: vi.fn(),
         setHistory: vi.fn(),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       } as unknown as GeminiChat;
     });
 
@@ -457,6 +571,7 @@ describe('Gemini Client (client.ts)', () => {
         getHistory: vi.fn((_curated?: boolean) => chatHistory),
         setHistory: vi.fn(),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockOriginalChat as GeminiChat;
 
@@ -1149,6 +1264,7 @@ describe('Gemini Client (client.ts)', () => {
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       } as unknown as GeminiChat;
       client['chat'] = mockChat;
 
@@ -1204,6 +1320,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1260,6 +1377,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1326,6 +1444,7 @@ hello
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1365,6 +1484,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1410,6 +1530,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1498,6 +1619,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1555,6 +1677,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -1636,6 +1759,7 @@ Other open files:
               { role: 'user', parts: [{ text: 'previous message' }] },
             ]),
           stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
         };
         client['chat'] = mockChat as GeminiChat;
       });
@@ -1889,6 +2013,7 @@ Other open files:
           getHistory: vi.fn().mockReturnValue([]), // Default empty history
           setHistory: vi.fn(),
           stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
         };
         client['chat'] = mockChat as GeminiChat;
 
@@ -2228,6 +2353,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -2265,6 +2391,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -2305,6 +2432,7 @@ Other open files:
         addHistory: vi.fn(),
         getHistory: vi.fn().mockReturnValue([]),
         stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
       };
       client['chat'] = mockChat as GeminiChat;
 
@@ -2329,6 +2457,7 @@ Other open files:
           getHistory: vi.fn().mockReturnValue([]),
           setHistory: vi.fn(),
           stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
           stripOrphanedUserEntriesFromHistory: vi.fn(),
         };
         client['chat'] = mockChat as GeminiChat;
@@ -2361,6 +2490,7 @@ Other open files:
           getHistory: vi.fn().mockReturnValue([]),
           setHistory: vi.fn(),
           stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
           stripOrphanedUserEntriesFromHistory: vi.fn(),
         };
         client['chat'] = mockChat as GeminiChat;
@@ -2405,6 +2535,7 @@ Other open files:
           addHistory: vi.fn(),
           getHistory: vi.fn().mockReturnValue([]),
           stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
         };
         client['chat'] = mockChat as GeminiChat;
       });
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 0f985364b..128e8f8ba 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -111,6 +111,13 @@ export interface SendMessageOptions {
   };
 }
 
+/**
+ * Idle threshold for thinking block cleanup. After this period without any
+ * API call the old thinking blocks are unlikely to aid reasoning coherence
+ * and only waste context tokens.
+ */
+const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
+
 export class GeminiClient {
   private chat?: GeminiChat;
   private sessionTurnCount = 0;
@@ -126,6 +133,25 @@ export class GeminiClient {
    */
   private hasFailedCompressionAttempt = false;
 
+  /**
+   * Timestamp (epoch ms) of the last completed API call.
+   * Used to detect idle periods for thinking block cleanup.
+   * Starts as null — on the first query there is no prior thinking to clean,
+   * so the idle check is skipped until the first API call completes.
+   */
+  private lastApiCompletionTimestamp: number | null = null;
+
+  /**
+   * Sticky-on latch for clearing thinking blocks from prior turns.
+   * Triggered when >1h since last API call — old thinking is no longer
+   * useful for reasoning coherence. Once latched, stays true to prevent
+   * oscillation: without it, thinking would accumulate → get stripped →
+   * accumulate again, causing the message prefix to change repeatedly
+   * (bad for any provider-side prompt caching and wastes context).
+   * Reset on /clear (resetChat).
+   */
+  private thinkingClearLatched = false;
+
   constructor(private readonly config: Config) {
     this.loopDetector = new LoopDetectionService(config);
   }
@@ -199,6 +225,9 @@ export class GeminiClient {
   }
 
   async resetChat(): Promise<void> {
+    // Reset thinking clear latch — fresh chat, no prior thinking to clean up
+    this.thinkingClearLatched = false;
+    this.lastApiCompletionTimestamp = null;
     await this.startChat();
   }
 
@@ -537,8 +566,24 @@ export class GeminiClient {
       // record user message for session management
       this.config.getChatRecordingService()?.recordUserMessage(request);
 
-      // strip thoughts from history before sending the message
-      this.stripThoughtsFromHistory();
+      // Thinking block cross-turn retention with idle cleanup:
+      // - Active session (< 1h idle): keep thinking blocks for reasoning coherence
+      // - Idle > 1h: clear old thinking, keep only last 1 turn to free context
+      // - Latch: once triggered, never revert — prevents oscillation
+      if (
+        !this.thinkingClearLatched &&
+        this.lastApiCompletionTimestamp !== null
+      ) {
+        if (
+          Date.now() - this.lastApiCompletionTimestamp >
+          THINKING_IDLE_THRESHOLD_MS
+        ) {
+          this.thinkingClearLatched = true;
+        }
+      }
+      if (this.thinkingClearLatched) {
+        this.getChat().stripThoughtsFromHistoryKeepRecent(1);
+      }
     }
     if (messageType !== SendMessageType.Retry) {
       this.sessionTurnCount++;
@@ -680,6 +725,7 @@ export class GeminiClient {
           if (arenaAgentClient) {
             await arenaAgentClient.reportError('Loop detected');
           }
+          this.lastApiCompletionTimestamp = Date.now();
           return turn;
         }
       }
@@ -698,9 +744,14 @@ export class GeminiClient {
               : 'Unknown error';
           await arenaAgentClient.reportError(errorMsg);
         }
+        this.lastApiCompletionTimestamp = Date.now();
         return turn;
       }
     }
+
+    // Track API completion time for thinking block idle cleanup
+    this.lastApiCompletionTimestamp = Date.now();
+
     // Fire Stop hook through MessageBus (only if hooks are enabled and registered)
     // This must be done before any early returns to ensure hooks are always triggered
     if (
diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts
index 4a4781388..9d9b45caf 100644
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -1767,6 +1767,150 @@ describe('GeminiChat', async () => {
     });
   });
 
+  describe('stripThoughtsFromHistoryKeepRecent', () => {
+    it('should keep the most recent N model turns with thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'old thinking', thought: true },
+            { text: 'response1' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'mid thinking', thought: true },
+            { text: 'response2' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg3' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response3' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // First two model turns should have thoughts stripped
+      expect(history[1]!.parts).toEqual([{ text: 'response1' }]);
+      expect(history[3]!.parts).toEqual([{ text: 'response2' }]);
+      // Last model turn should keep thoughts
+      expect(history[5]!.parts).toEqual([
+        { text: 'recent thinking', thought: true },
+        { text: 'response3' },
+      ]);
+    });
+
+    it('should not strip anything when keepTurns >= model turns with thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      expect(history[1]!.parts).toEqual([
+        { text: 'thinking', thought: true },
+        { text: 'response' },
+      ]);
+    });
+
+    it('should remove model content objects that become empty after stripping', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'only thinking', thought: true }],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // The first model turn (only thoughts) should be removed entirely
+      expect(history).toHaveLength(3);
+      expect(history[0]!.parts).toEqual([{ text: 'msg1' }]);
+      expect(history[1]!.parts).toEqual([{ text: 'msg2' }]);
+      expect(history[2]!.parts).toEqual([
+        { text: 'recent thinking', thought: true },
+        { text: 'response' },
+      ]);
+    });
+
+    it('should also strip thoughtSignature from stripped turns', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'old thinking', thought: true },
+            {
+              text: 'with sig',
+              thoughtSignature: 'sig1',
+            } as unknown as { text: string; thoughtSignature: string },
+            { text: 'response1' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response2' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // First model turn: thought stripped, thoughtSignature stripped
+      expect(history[1]!.parts).toEqual([
+        { text: 'with sig' },
+        { text: 'response1' },
+      ]);
+      expect(
+        (history[1]!.parts![0] as { thoughtSignature?: string })
+          .thoughtSignature,
+      ).toBeUndefined();
+    });
+
+    it('should handle keepTurns=0 by stripping all thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(0);
+
+      const history = chat.getHistory();
+      expect(history[1]!.parts).toEqual([{ text: 'response' }]);
+    });
+  });
+
   describe('stripOrphanedUserEntriesFromHistory', () => {
     it('should pop a single trailing user entry', () => {
       chat.setHistory([
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 12dfcb080..f622e16ca 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -576,6 +576,83 @@ export class GeminiChat {
       .filter((content) => content.parts && content.parts.length > 0);
   }
 
+  /**
+   * Strip thought parts from history, keeping the most recent `keepTurns`
+   * model turns' thinking blocks intact.
+   *
+   * Used for idle cleanup: after >1h idle the old thinking blocks are no
+   * longer useful for reasoning coherence but still consume context tokens.
+   * Keeping the most recent turn preserves the latest reasoning chain.
+   */
+  stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
+    keepTurns = Math.max(0, Math.floor(keepTurns));
+
+    // Find indices of model turns that contain thought parts
+    const modelTurnIndices: number[] = [];
+    for (let i = 0; i < this.history.length; i++) {
+      const content = this.history[i];
+      if (
+        content.role === 'model' &&
+        content.parts?.some(
+          (part) =>
+            part &&
+            typeof part === 'object' &&
+            'thought' in part &&
+            part.thought,
+        )
+      ) {
+        modelTurnIndices.push(i);
+      }
+    }
+
+    // Determine which model turns to keep (the most recent `keepTurns`)
+    const turnsToStrip = new Set(
+      modelTurnIndices.slice(
+        0,
+        Math.max(0, modelTurnIndices.length - keepTurns),
+      ),
+    );
+
+    if (turnsToStrip.size === 0) return;
+
+    this.history = this.history
+      .map((content, index) => {
+        if (!turnsToStrip.has(index) || !content.parts) return content;
+
+        // Strip thought parts from this turn
+        const filteredParts = content.parts
+          .filter(
+            (part) =>
+              !(
+                part &&
+                typeof part === 'object' &&
+                'thought' in part &&
+                part.thought
+              ),
+          )
+          .map((part) => {
+            if (
+              part &&
+              typeof part === 'object' &&
+              'thoughtSignature' in part
+            ) {
+              const newPart = { ...part };
+              delete (newPart as { thoughtSignature?: string })
+                .thoughtSignature;
+              return newPart;
+            }
+            return part;
+          });
+
+        return {
+          ...content,
+          parts: filteredParts,
+        };
+      })
+      // Remove Content objects that have no parts left after filtering
+      .filter((content) => content.parts && content.parts.length > 0);
+  }
+
   /**
    * Pop all orphaned trailing user entries from chat history.
    * In a valid conversation the last entry is always a model response;

From e383bcebb26f2b65083266664b6ac05d33cfb082 Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Sun, 5 Apr 2026 07:20:01 +0800
Subject: [PATCH 2/5] docs: clarify stripThoughtsFromHistoryKeepRecent JSDoc
 semantics

Explicitly document that keepTurns selects from thought-containing
model turns (not all model turns) to ensure the most recent reasoning
chain is preserved even if later turns have no thinking.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/core/geminiChat.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index f622e16ca..4c3f39c7e 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -578,11 +578,14 @@ export class GeminiChat {
 
   /**
    * Strip thought parts from history, keeping the most recent `keepTurns`
-   * model turns' thinking blocks intact.
+   * model turns that contain thinking blocks intact.
+   *
+   * Selection is based on thought-containing turns specifically (not all
+   * model turns) so the most recent reasoning chain is always preserved
+   * even if later model turns happen to have no thinking.
    *
    * Used for idle cleanup: after >1h idle the old thinking blocks are no
    * longer useful for reasoning coherence but still consume context tokens.
-   * Keeping the most recent turn preserves the latest reasoning chain.
    */
   stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
     keepTurns = Math.max(0, Math.floor(keepTurns));

From 04ce8faff4215843638ccb445b5a2a1cc18bdf4f Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Sun, 5 Apr 2026 11:03:55 +0800
Subject: [PATCH 3/5] fix: handle NaN/non-finite keepTurns in
 stripThoughtsFromHistoryKeepRecent

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/core/geminiChat.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 4c3f39c7e..e9a0a1ecc 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -588,7 +588,9 @@ export class GeminiChat {
    * longer useful for reasoning coherence but still consume context tokens.
    */
   stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
-    keepTurns = Math.max(0, Math.floor(keepTurns));
+    keepTurns = Number.isFinite(keepTurns)
+      ? Math.max(0, Math.floor(keepTurns))
+      : 0;
 
     // Find indices of model turns that contain thought parts
     const modelTurnIndices: number[] = [];

From 4bbdfee911562fba3bb31422c7d7a64525193436 Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Sun, 5 Apr 2026 12:14:42 +0800
Subject: [PATCH 4/5] chore: add debug logging for thinking block idle cleanup

Log when the latch triggers and when old thinking blocks are stripped,
making the behavior observable via QWEN_CODE_DEBUG=CLIENT.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/core/client.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 128e8f8ba..2d730e771 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -574,15 +574,17 @@ export class GeminiClient {
         !this.thinkingClearLatched &&
         this.lastApiCompletionTimestamp !== null
       ) {
-        if (
-          Date.now() - this.lastApiCompletionTimestamp >
-          THINKING_IDLE_THRESHOLD_MS
-        ) {
+        const idleMs = Date.now() - this.lastApiCompletionTimestamp;
+        if (idleMs > THINKING_IDLE_THRESHOLD_MS) {
           this.thinkingClearLatched = true;
+          debugLogger.debug(
+            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`,
+          );
         }
       }
       if (this.thinkingClearLatched) {
         this.getChat().stripThoughtsFromHistoryKeepRecent(1);
+        debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
       }
     }
     if (messageType !== SendMessageType.Retry) {

From 6a55a9aeea1ec9cdb1f71567b937970c0b68e5b6 Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Wed, 8 Apr 2026 14:21:06 +0800
Subject: [PATCH 5/5] feat(config): make thinking idle threshold configurable
 and lower default to 5min

Align with observed provider prompt-cache TTL (~5 min). Add
`context.gapThresholdMinutes` setting so users can tune the threshold
for providers with different cache TTLs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/users/configuration/settings.md          |  1 +
 packages/cli/src/config/config.ts             |  1 +
 packages/cli/src/config/settingsSchema.ts     | 10 +++++++
 packages/core/src/config/config.ts            |  9 +++++++
 packages/core/src/core/client.test.ts         | 19 +++++++-------
 packages/core/src/core/client.ts              | 26 +++++++------------
 packages/core/src/core/geminiChat.ts          |  5 ++--
 .../schemas/settings.schema.json              |  5 ++++
 8 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
index 9389ba8f5..f27140fec 100644
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s
 | `context.fileFiltering.respectQwenIgnore`         | boolean                    | Respect .qwenignore files when searching.                                                                                                                                                                                                                                                                                                                             | `true`      |
 | `context.fileFiltering.enableRecursiveFileSearch` | boolean                    | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt.                                                                                                                                                                                                                                              | `true`      |
 | `context.fileFiltering.enableFuzzySearch`         | boolean                    | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files.                                                                                                                                                                                                              | `true`      |
+| `context.gapThresholdMinutes`                     | number                     | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL.                                                                                                                                                                     | `5`         |
 
 #### Troubleshooting File Search Performance
 
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 3d4ed84e2..2b64685f7 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -1069,6 +1069,7 @@ export async function loadCliConfig(
     telemetry: telemetrySettings,
     usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
     fileFiltering: settings.context?.fileFiltering,
+    thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
     checkpointing:
       argv.checkpointing || settings.general?.checkpointing?.enabled,
     proxy:
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index e765dd801..4645d5803 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -914,6 +914,16 @@ const SETTINGS_SCHEMA = {
           },
         },
       },
+      gapThresholdMinutes: {
+        type: 'number',
+        label: 'Thinking Block Idle Threshold (minutes)',
+        category: 'Context',
+        requiresRestart: false,
+        default: 5,
+        description:
+          'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
+        showInDialog: false,
+      },
     },
   },
 
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index c54b55705..a9e47f99f 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -370,6 +370,8 @@ export interface ConfigParameters {
   model?: string;
   outputLanguageFilePath?: string;
   maxSessionTurns?: number;
+  /** Minutes of inactivity before clearing retained thinking blocks. */
+  thinkingIdleThresholdMinutes?: number;
   sessionTokenLimit?: number;
   experimentalZedIntegration?: boolean;
   cronEnabled?: boolean;
@@ -557,6 +559,7 @@ export class Config {
   private ideMode: boolean;
 
   private readonly maxSessionTurns: number;
+  private readonly thinkingIdleThresholdMs: number;
   private readonly sessionTokenLimit: number;
   private readonly listExtensions: boolean;
   private readonly overrideExtensions?: string[];
@@ -683,6 +686,8 @@ export class Config {
     this.fileDiscoveryService = params.fileDiscoveryService ?? null;
     this.bugCommand = params.bugCommand;
     this.maxSessionTurns = params.maxSessionTurns ?? -1;
+    this.thinkingIdleThresholdMs =
+      (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
     this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
     this.experimentalZedIntegration =
       params.experimentalZedIntegration ?? false;
@@ -1329,6 +1334,10 @@ export class Config {
     return this.maxSessionTurns;
   }
 
+  getThinkingIdleThresholdMs(): number {
+    return this.thinkingIdleThresholdMs;
+  }
+
   getSessionTokenLimit(): number {
     return this.sessionTokenLimit;
   }
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index b23b0ece9..9f7ead5c6 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
       getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
       getFileService: vi.fn().mockReturnValue(fileService),
       getMaxSessionTurns: vi.fn().mockReturnValue(0),
+      getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
       getSessionTokenLimit: vi.fn().mockReturnValue(32000),
       getNoBrowser: vi.fn().mockReturnValue(false),
       getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@@ -448,9 +449,9 @@ describe('Gemini Client (client.ts)', () => {
       client['chat'] = mockChat as GeminiChat;
     });
 
-    it('should not strip thoughts on active session (< 1h idle)', async () => {
-      // Simulate a recent API completion (5 minutes ago)
-      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+    it('should not strip thoughts on active session (< 5min idle)', async () => {
+      // Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
       client['thinkingClearLatched'] = false;
 
       const gen = client.sendMessageStream(
@@ -468,9 +469,9 @@ describe('Gemini Client (client.ts)', () => {
       ).not.toHaveBeenCalled();
     });
 
-    it('should latch and strip thoughts after > 1h idle', async () => {
-      // Simulate an old API completion (2 hours ago)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000;
+    it('should latch and strip thoughts after > 5min idle', async () => {
+      // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
       client['thinkingClearLatched'] = false;
 
       const gen = client.sendMessageStream(
@@ -489,9 +490,9 @@ describe('Gemini Client (client.ts)', () => {
       );
     });
 
-    it('should keep stripping once latched even if idle < 1h', async () => {
-      // Pre-set latch with a recent timestamp
-      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+    it('should keep stripping once latched even if idle < 5min', async () => {
+      // Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
       client['thinkingClearLatched'] = true;
 
       const gen = client.sendMessageStream(
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 2d730e771..13fc86aaa 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -111,13 +111,6 @@ export interface SendMessageOptions {
   };
 }
 
-/**
- * Idle threshold for thinking block cleanup. After this period without any
- * API call the old thinking blocks are unlikely to aid reasoning coherence
- * and only waste context tokens.
- */
-const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
-
 export class GeminiClient {
   private chat?: GeminiChat;
   private sessionTurnCount = 0;
@@ -143,11 +136,11 @@ export class GeminiClient {
 
   /**
    * Sticky-on latch for clearing thinking blocks from prior turns.
-   * Triggered when >1h since last API call — old thinking is no longer
-   * useful for reasoning coherence. Once latched, stays true to prevent
-   * oscillation: without it, thinking would accumulate → get stripped →
-   * accumulate again, causing the message prefix to change repeatedly
-   * (bad for any provider-side prompt caching and wastes context).
+   * Triggered when idle exceeds the configured threshold (default 5 min,
+   * aligned with provider prompt-cache TTL). Once latched, stays true to
+   * prevent oscillation: without it, thinking would accumulate → get
+   * stripped → accumulate again, causing the message prefix to change
+   * repeatedly (bad for provider-side prompt caching and wastes context).
    * Reset on /clear (resetChat).
    */
   private thinkingClearLatched = false;
@@ -567,18 +560,19 @@ export class GeminiClient {
       this.config.getChatRecordingService()?.recordUserMessage(request);
 
       // Thinking block cross-turn retention with idle cleanup:
-      // - Active session (< 1h idle): keep thinking blocks for reasoning coherence
-      // - Idle > 1h: clear old thinking, keep only last 1 turn to free context
+      // - Active session (< threshold idle): keep thinking blocks for reasoning coherence
+      // - Idle > threshold: clear old thinking, keep only last 1 turn to free context
       // - Latch: once triggered, never revert — prevents oscillation
       if (
         !this.thinkingClearLatched &&
         this.lastApiCompletionTimestamp !== null
       ) {
+        const thresholdMs = this.config.getThinkingIdleThresholdMs();
         const idleMs = Date.now() - this.lastApiCompletionTimestamp;
-        if (idleMs > THINKING_IDLE_THRESHOLD_MS) {
+        if (idleMs > thresholdMs) {
           this.thinkingClearLatched = true;
           debugLogger.debug(
-            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`,
+            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
           );
         }
       }
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index e9a0a1ecc..5fd6caf03 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -584,8 +584,9 @@ export class GeminiChat {
    * model turns) so the most recent reasoning chain is always preserved
    * even if later model turns happen to have no thinking.
    *
-   * Used for idle cleanup: after >1h idle the old thinking blocks are no
-   * longer useful for reasoning coherence but still consume context tokens.
+   * Used for idle cleanup: after exceeding the configured idle threshold
+   * the old thinking blocks are no longer useful for reasoning coherence
+   * but still consume context tokens.
    */
   stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
     keepTurns = Number.isFinite(keepTurns)
diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json
index 4f92b74d7..fdd4fbbb1 100644
--- a/packages/vscode-ide-companion/schemas/settings.schema.json
+++ b/packages/vscode-ide-companion/schemas/settings.schema.json
@@ -383,6 +383,11 @@
               "default": true
             }
           }
+        },
+        "gapThresholdMinutes": {
+          "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
+          "type": "number",
+          "default": 5
         }
       }
     },