fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) (#3590)

* fix(core): preserve reasoning_content during session resume and active sessions (GH#3579)

* chore(core): remove dead thinkingThresholdMinutes config after latch removal (GH#3579)
This commit is contained in:
Fu Yuchen 2026-04-24 17:49:05 +08:00 committed by GitHub
parent e47b22806b
commit 93cbad24b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 126 additions and 126 deletions

View file

@ -223,7 +223,6 @@ The `extra_body` field allows you to add custom parameters to the request body s
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
| `context.clearContextOnIdle.thinkingThresholdMinutes` | number | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable. | `5` |
| `context.clearContextOnIdle.toolResultsThresholdMinutes` | number | Minutes of inactivity before clearing old tool result content. Use `-1` to disable. | `60` |
| `context.clearContextOnIdle.toolResultsNumToKeep` | number | Number of most-recent compactable tool results to preserve when clearing. Floor at 1. | `5` |

View file

@ -1012,16 +1012,6 @@ const SETTINGS_SCHEMA = {
'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.',
showInDialog: false,
properties: {
thinkingThresholdMinutes: {
type: 'number',
label: 'Thinking Idle Threshold (minutes)',
category: 'Context',
requiresRestart: false,
default: 5 as number,
description:
'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.',
showInDialog: false,
},
toolResultsThresholdMinutes: {
type: 'number',
label: 'Tool Results Idle Threshold (minutes)',

View file

@ -203,8 +203,6 @@ export interface ChatCompressionSettings {
* Threshold values of -1 mean "never clear" (disabled).
*/
export interface ClearContextOnIdleSettings {
/** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */
thinkingThresholdMinutes?: number;
/** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */
toolResultsThresholdMinutes?: number;
/** Number of most-recent tool results to preserve. Default 5. */
@ -766,8 +764,6 @@ export class Config {
this.bugCommand = params.bugCommand;
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.clearContextOnIdle = {
thinkingThresholdMinutes:
params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5,
toolResultsThresholdMinutes:
params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60,
toolResultsNumToKeep:

View file

@ -347,7 +347,6 @@ describe('Gemini Client (client.ts)', () => {
getFileService: vi.fn().mockReturnValue(fileService),
getMaxSessionTurns: vi.fn().mockReturnValue(0),
getClearContextOnIdle: vi.fn().mockReturnValue({
thinkingThresholdMinutes: 5,
toolResultsThresholdMinutes: 60,
toolResultsNumToKeep: 5,
}),
@ -473,73 +472,10 @@ describe('Gemini Client (client.ts)', () => {
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});
it('should not strip thoughts on active session (< 5min idle)', async () => {
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-1',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(
mockChat.stripThoughtsFromHistoryKeepRecent,
).not.toHaveBeenCalled();
});
it('should latch and strip thoughts after > 5min idle', async () => {
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-2',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should keep stripping once latched even if idle < 5min', async () => {
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = true;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-3',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should update lastApiCompletionTimestamp after API call', async () => {
client['lastApiCompletionTimestamp'] = null;
@ -559,13 +495,11 @@ describe('Gemini Client (client.ts)', () => {
);
});
it('should reset latch and timestamp on resetChat', async () => {
it('should reset lastApiCompletionTimestamp on resetChat', async () => {
client['lastApiCompletionTimestamp'] = Date.now();
client['thinkingClearLatched'] = true;
await client.resetChat();
expect(client['thinkingClearLatched']).toBe(false);
expect(client['lastApiCompletionTimestamp']).toBeNull();
});
});

View file

@ -156,17 +156,6 @@ export class GeminiClient {
*/
private lastApiCompletionTimestamp: number | null = null;
/**
* Sticky-on latch for clearing thinking blocks from prior turns.
* Triggered when idle exceeds the configured threshold (default 5 min,
* aligned with provider prompt-cache TTL). Once latched, stays true to
* prevent oscillation: without it, thinking would accumulate get
* stripped accumulate again, causing the message prefix to change
* repeatedly (bad for provider-side prompt caching and wastes context).
* Reset on /clear (resetChat).
*/
private thinkingClearLatched = false;
constructor(private readonly config: Config) {
this.loopDetector = new LoopDetectionService(config);
}
@ -242,8 +231,6 @@ export class GeminiClient {
async resetChat(): Promise<void> {
this.surfacedRelevantAutoMemoryPaths.clear();
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
this.thinkingClearLatched = false;
this.lastApiCompletionTimestamp = null;
await this.startChat();
}
@ -695,29 +682,6 @@ export class GeminiClient {
this.config.getChatRecordingService()?.recordUserMessage(request);
}
// Idle cleanup: clear stale thinking blocks after idle period.
// Latch: once triggered, never revert — prevents oscillation.
const idleConfig = this.config.getClearContextOnIdle();
const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5;
if (
thinkingThresholdMin >= 0 &&
!this.thinkingClearLatched &&
this.lastApiCompletionTimestamp !== null
) {
const thresholdMs = thinkingThresholdMin * 60 * 1000;
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
if (idleMs > thresholdMs) {
this.thinkingClearLatched = true;
debugLogger.debug(
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
);
}
}
if (this.thinkingClearLatched) {
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
}
// Idle cleanup: clear old tool results when idle > threshold.
// Runs on user and cron messages (not tool result submissions or
// retries/hooks) so that model latency during a tool-call loop

View file

@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {
}
const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {
thinkingThresholdMinutes: 5,
toolResultsThresholdMinutes: 5,
toolResultsNumToKeep: 1,
};

View file

@ -783,5 +783,126 @@ describe('SessionService', () => {
postCompressionRecord.message,
]);
});
it('should preserve thought parts by default (stripThoughtsFromHistory=false)', () => {
const modelWithThought: ChatRecord = {
uuid: 't1',
parentUuid: 'a1',
sessionId: sessionIdA,
timestamp: '2024-01-01T01:00:00Z',
type: 'assistant',
message: {
role: 'model',
parts: [
{ text: 'reasoning step', thought: true },
{ text: 'final answer' },
],
},
cwd: '/test/project/root',
version: '1.0.0',
};
const conversation: ConversationRecord = {
sessionId: sessionIdA,
projectHash: 'test-project-hash',
startTime: '2024-01-01T00:00:00Z',
lastUpdated: '2024-01-01T01:00:00Z',
messages: [recordA1, modelWithThought],
};
const history = buildApiHistoryFromConversation(conversation);
// Thought parts should be preserved by default
expect(history).toHaveLength(2);
expect(history[1].parts).toEqual([
{ text: 'reasoning step', thought: true },
{ text: 'final answer' },
]);
});
it('should strip thought parts when stripThoughtsFromHistory=true', () => {
const modelWithThought: ChatRecord = {
uuid: 't1',
parentUuid: 'a1',
sessionId: sessionIdA,
timestamp: '2024-01-01T01:00:00Z',
type: 'assistant',
message: {
role: 'model',
parts: [
{ text: 'reasoning step', thought: true },
{ text: 'final answer' },
],
},
cwd: '/test/project/root',
version: '1.0.0',
};
const conversation: ConversationRecord = {
sessionId: sessionIdA,
projectHash: 'test-project-hash',
startTime: '2024-01-01T00:00:00Z',
lastUpdated: '2024-01-01T01:00:00Z',
messages: [recordA1, modelWithThought],
};
const history = buildApiHistoryFromConversation(conversation, {
stripThoughtsFromHistory: true,
});
// Thought parts should be stripped
expect(history).toHaveLength(2);
expect(history[1].parts).toEqual([{ text: 'final answer' }]);
});
it('should preserve thought parts in compressed history by default', () => {
const compressionRecord: ChatRecord = {
uuid: 'c1',
parentUuid: 'b2',
sessionId: sessionIdA,
timestamp: '2024-01-02T03:00:00Z',
type: 'system',
subtype: 'chat_compression',
cwd: '/test/project/root',
version: '1.0.0',
gitBranch: 'main',
systemPayload: {
info: {
originalTokenCount: 100,
newTokenCount: 50,
compressionStatus: CompressionStatus.COMPRESSED,
},
compressedHistory: [
{ role: 'user', parts: [{ text: 'summary' }] },
{
role: 'model',
parts: [
{ text: 'deep thinking', thought: true },
{ text: 'final answer' },
],
},
],
},
};
const conversation: ConversationRecord = {
sessionId: sessionIdA,
projectHash: 'test-project-hash',
startTime: '2024-01-01T00:00:00Z',
lastUpdated: '2024-01-02T03:00:00Z',
messages: [recordA1, recordB2, compressionRecord],
};
const history = buildApiHistoryFromConversation(conversation);
// Thought parts should be preserved in compressed history by default.
// The compressedHistory has 2 entries (user, model), and no messages
// exist after the compression record, so the result is 2 items.
expect(history).toHaveLength(2);
expect(history[1].parts).toEqual([
{ text: 'deep thinking', thought: true },
{ text: 'final answer' },
]);
});
});
});

View file

@ -865,7 +865,9 @@ export interface BuildApiHistoryOptions {
/**
* Whether to strip thought parts from the history.
* Thought parts are content parts that have `thought: true`.
* @default true
* Keeping thoughts ensures `reasoning_content` from reasoning models
* (e.g. DeepSeek) is properly passed back in subsequent API calls.
* @default false
*/
stripThoughtsFromHistory?: boolean;
}
@ -906,7 +908,7 @@ export function buildApiHistoryFromConversation(
conversation: ConversationRecord,
options: BuildApiHistoryOptions = {},
): Content[] {
const { stripThoughtsFromHistory = true } = options;
const { stripThoughtsFromHistory = false } = options;
const { messages } = conversation;
let lastCompressionIndex = -1;

View file

@ -402,11 +402,6 @@
"description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.",
"type": "object",
"properties": {
"thinkingThresholdMinutes": {
"description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.",
"type": "number",
"default": 5
},
"toolResultsThresholdMinutes": {
"description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.",
"type": "number",