fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) (#3590)

* fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) * chore(core): remove dead thinkingThresholdMinutes config after latch removal (GH#3579)
2026-04-28 03:30:40 +00:00 · 2026-04-24 17:49:05 +08:00 · 2026-04-24 17:49:05 +08:00 · 93cbad24b1
commit 93cbad24b1
parent e47b22806b
9 changed files with 126 additions and 126 deletions
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@ -223,7 +223,6 @@ The `extra_body` field allows you to add custom parameters to the request body s
 | `context.fileFiltering.respectQwenIgnore`                | boolean                    | Respect .qwenignore files when searching.                                                                                                                                                                                                                                                                                                                             | `true`      |
 | `context.fileFiltering.enableRecursiveFileSearch`        | boolean                    | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt.                                                                                                                                                                                                                                              | `true`      |
 | `context.fileFiltering.enableFuzzySearch`                | boolean                    | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files.                                                                                                                                                                                                              | `true`      |
-| `context.clearContextOnIdle.thinkingThresholdMinutes`    | number                     | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable.                                                                                                                                                                                                                 | `5`         |
 | `context.clearContextOnIdle.toolResultsThresholdMinutes` | number                     | Minutes of inactivity before clearing old tool result content. Use `-1` to disable.                                                                                                                                                                                                                                                                                   | `60`        |
 | `context.clearContextOnIdle.toolResultsNumToKeep`        | number                     | Number of most-recent compactable tool results to preserve when clearing. Floor at 1.                                                                                                                                                                                                                                                                                 | `5`         |

--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@ -1012,16 +1012,6 @@ const SETTINGS_SCHEMA = {
          'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.',
        showInDialog: false,
        properties: {
-          thinkingThresholdMinutes: {
-            type: 'number',
-            label: 'Thinking Idle Threshold (minutes)',
-            category: 'Context',
-            requiresRestart: false,
-            default: 5 as number,
-            description:
-              'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.',
-            showInDialog: false,
-          },
          toolResultsThresholdMinutes: {
            type: 'number',
            label: 'Tool Results Idle Threshold (minutes)',
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@ -203,8 +203,6 @@ export interface ChatCompressionSettings {
 * Threshold values of -1 mean "never clear" (disabled).
 */
 export interface ClearContextOnIdleSettings {
-  /** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */
-  thinkingThresholdMinutes?: number;
  /** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */
  toolResultsThresholdMinutes?: number;
  /** Number of most-recent tool results to preserve. Default 5. */
@ -766,8 +764,6 @@ export class Config {
    this.bugCommand = params.bugCommand;
    this.maxSessionTurns = params.maxSessionTurns ?? -1;
    this.clearContextOnIdle = {
-      thinkingThresholdMinutes:
-        params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5,
      toolResultsThresholdMinutes:
        params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60,
      toolResultsNumToKeep:
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@ -347,7 +347,6 @@ describe('Gemini Client (client.ts)', () => {
      getFileService: vi.fn().mockReturnValue(fileService),
      getMaxSessionTurns: vi.fn().mockReturnValue(0),
      getClearContextOnIdle: vi.fn().mockReturnValue({
-        thinkingThresholdMinutes: 5,
        toolResultsThresholdMinutes: 60,
        toolResultsNumToKeep: 5,
      }),
@ -473,73 +472,10 @@ describe('Gemini Client (client.ts)', () => {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
-        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;
    });

-    it('should not strip thoughts on active session (< 5min idle)', async () => {
-      // Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
-      client['thinkingClearLatched'] = false;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-1',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(
-        mockChat.stripThoughtsFromHistoryKeepRecent,
-      ).not.toHaveBeenCalled();
-    });
-
-    it('should latch and strip thoughts after > 5min idle', async () => {
-      // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
-      client['thinkingClearLatched'] = false;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-2',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(client['thinkingClearLatched']).toBe(true);
-      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
-        1,
-      );
-    });
-
-    it('should keep stripping once latched even if idle < 5min', async () => {
-      // Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
-      client['thinkingClearLatched'] = true;
-
-      const gen = client.sendMessageStream(
-        [{ text: 'Hello' }],
-        new AbortController().signal,
-        'prompt-3',
-        { type: SendMessageType.UserQuery },
-      );
-      for await (const _ of gen) {
-        /* drain */
-      }
-
-      expect(client['thinkingClearLatched']).toBe(true);
-      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
-        1,
-      );
-    });
-
    it('should update lastApiCompletionTimestamp after API call', async () => {
      client['lastApiCompletionTimestamp'] = null;

@ -559,13 +495,11 @@ describe('Gemini Client (client.ts)', () => {
      );
    });

-    it('should reset latch and timestamp on resetChat', async () => {
+    it('should reset lastApiCompletionTimestamp on resetChat', async () => {
      client['lastApiCompletionTimestamp'] = Date.now();
-      client['thinkingClearLatched'] = true;

      await client.resetChat();

-      expect(client['thinkingClearLatched']).toBe(false);
      expect(client['lastApiCompletionTimestamp']).toBeNull();
    });
  });
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -156,17 +156,6 @@ export class GeminiClient {
   */
  private lastApiCompletionTimestamp: number | null = null;

-  /**
-   * Sticky-on latch for clearing thinking blocks from prior turns.
-   * Triggered when idle exceeds the configured threshold (default 5 min,
-   * aligned with provider prompt-cache TTL). Once latched, stays true to
-   * prevent oscillation: without it, thinking would accumulate → get
-   * stripped → accumulate again, causing the message prefix to change
-   * repeatedly (bad for provider-side prompt caching and wastes context).
-   * Reset on /clear (resetChat).
-   */
-  private thinkingClearLatched = false;
-
  constructor(private readonly config: Config) {
    this.loopDetector = new LoopDetectionService(config);
  }
@ -242,8 +231,6 @@ export class GeminiClient {

  async resetChat(): Promise<void> {
    this.surfacedRelevantAutoMemoryPaths.clear();
-    // Reset thinking clear latch — fresh chat, no prior thinking to clean up
-    this.thinkingClearLatched = false;
    this.lastApiCompletionTimestamp = null;
    await this.startChat();
  }
@ -695,29 +682,6 @@ export class GeminiClient {
        this.config.getChatRecordingService()?.recordUserMessage(request);
      }

-      // Idle cleanup: clear stale thinking blocks after idle period.
-      // Latch: once triggered, never revert — prevents oscillation.
-      const idleConfig = this.config.getClearContextOnIdle();
-      const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5;
-      if (
-        thinkingThresholdMin >= 0 &&
-        !this.thinkingClearLatched &&
-        this.lastApiCompletionTimestamp !== null
-      ) {
-        const thresholdMs = thinkingThresholdMin * 60 * 1000;
-        const idleMs = Date.now() - this.lastApiCompletionTimestamp;
-        if (idleMs > thresholdMs) {
-          this.thinkingClearLatched = true;
-          debugLogger.debug(
-            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
-          );
-        }
-      }
-      if (this.thinkingClearLatched) {
-        this.getChat().stripThoughtsFromHistoryKeepRecent(1);
-        debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
-      }
-
      // Idle cleanup: clear old tool results when idle > threshold.
      // Runs on user and cron messages (not tool result submissions or
      // retries/hooks) so that model latency during a tool-call loop
--- a/packages/core/src/services/microcompaction/microcompact.test.ts
+++ b/packages/core/src/services/microcompaction/microcompact.test.ts
@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {
 }

 const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {
-  thinkingThresholdMinutes: 5,
  toolResultsThresholdMinutes: 5,
  toolResultsNumToKeep: 1,
 };
--- a/packages/core/src/services/sessionService.test.ts
+++ b/packages/core/src/services/sessionService.test.ts
@ -783,5 +783,126 @@ describe('SessionService', () => {
        postCompressionRecord.message,
      ]);
    });
+
+    it('should preserve thought parts by default (stripThoughtsFromHistory=false)', () => {
+      const modelWithThought: ChatRecord = {
+        uuid: 't1',
+        parentUuid: 'a1',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-01T01:00:00Z',
+        type: 'assistant',
+        message: {
+          role: 'model',
+          parts: [
+            { text: 'reasoning step', thought: true },
+            { text: 'final answer' },
+          ],
+        },
+        cwd: '/test/project/root',
+        version: '1.0.0',
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-01T01:00:00Z',
+        messages: [recordA1, modelWithThought],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation);
+
+      // Thought parts should be preserved by default
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([
+        { text: 'reasoning step', thought: true },
+        { text: 'final answer' },
+      ]);
+    });
+
+    it('should strip thought parts when stripThoughtsFromHistory=true', () => {
+      const modelWithThought: ChatRecord = {
+        uuid: 't1',
+        parentUuid: 'a1',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-01T01:00:00Z',
+        type: 'assistant',
+        message: {
+          role: 'model',
+          parts: [
+            { text: 'reasoning step', thought: true },
+            { text: 'final answer' },
+          ],
+        },
+        cwd: '/test/project/root',
+        version: '1.0.0',
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-01T01:00:00Z',
+        messages: [recordA1, modelWithThought],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation, {
+        stripThoughtsFromHistory: true,
+      });
+
+      // Thought parts should be stripped
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([{ text: 'final answer' }]);
+    });
+
+    it('should preserve thought parts in compressed history by default', () => {
+      const compressionRecord: ChatRecord = {
+        uuid: 'c1',
+        parentUuid: 'b2',
+        sessionId: sessionIdA,
+        timestamp: '2024-01-02T03:00:00Z',
+        type: 'system',
+        subtype: 'chat_compression',
+        cwd: '/test/project/root',
+        version: '1.0.0',
+        gitBranch: 'main',
+        systemPayload: {
+          info: {
+            originalTokenCount: 100,
+            newTokenCount: 50,
+            compressionStatus: CompressionStatus.COMPRESSED,
+          },
+          compressedHistory: [
+            { role: 'user', parts: [{ text: 'summary' }] },
+            {
+              role: 'model',
+              parts: [
+                { text: 'deep thinking', thought: true },
+                { text: 'final answer' },
+              ],
+            },
+          ],
+        },
+      };
+
+      const conversation: ConversationRecord = {
+        sessionId: sessionIdA,
+        projectHash: 'test-project-hash',
+        startTime: '2024-01-01T00:00:00Z',
+        lastUpdated: '2024-01-02T03:00:00Z',
+        messages: [recordA1, recordB2, compressionRecord],
+      };
+
+      const history = buildApiHistoryFromConversation(conversation);
+
+      // Thought parts should be preserved in compressed history by default.
+      // The compressedHistory has 2 entries (user, model), and no messages
+      // exist after the compression record, so the result is 2 items.
+      expect(history).toHaveLength(2);
+      expect(history[1].parts).toEqual([
+        { text: 'deep thinking', thought: true },
+        { text: 'final answer' },
+      ]);
+    });
  });
 });
--- a/packages/core/src/services/sessionService.ts
+++ b/packages/core/src/services/sessionService.ts
@ -865,7 +865,9 @@ export interface BuildApiHistoryOptions {
  /**
   * Whether to strip thought parts from the history.
   * Thought parts are content parts that have `thought: true`.
-   * @default true
+   * Keeping thoughts ensures `reasoning_content` from reasoning models
+   * (e.g. DeepSeek) is properly passed back in subsequent API calls.
+   * @default false
   */
  stripThoughtsFromHistory?: boolean;
 }
@ -906,7 +908,7 @@ export function buildApiHistoryFromConversation(
  conversation: ConversationRecord,
  options: BuildApiHistoryOptions = {},
 ): Content[] {
-  const { stripThoughtsFromHistory = true } = options;
+  const { stripThoughtsFromHistory = false } = options;
  const { messages } = conversation;

  let lastCompressionIndex = -1;
--- a/packages/vscode-ide-companion/schemas/settings.schema.json
+++ b/packages/vscode-ide-companion/schemas/settings.schema.json
@ -402,11 +402,6 @@
          "description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.",
          "type": "object",
          "properties": {
-            "thinkingThresholdMinutes": {
-              "description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.",
-              "type": "number",
-              "default": 5
-            },
            "toolResultsThresholdMinutes": {
              "description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.",
              "type": "number",