mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-28 03:30:40 +00:00
fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) (#3590)
* fix(core): preserve reasoning_content during session resume and active sessions (GH#3579) * chore(core): remove dead thinkingThresholdMinutes config after latch removal (GH#3579)
This commit is contained in:
parent
e47b22806b
commit
93cbad24b1
9 changed files with 126 additions and 126 deletions
|
|
@ -223,7 +223,6 @@ The `extra_body` field allows you to add custom parameters to the request body s
|
|||
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
|
||||
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
|
||||
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
|
||||
| `context.clearContextOnIdle.thinkingThresholdMinutes` | number | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable. | `5` |
|
||||
| `context.clearContextOnIdle.toolResultsThresholdMinutes` | number | Minutes of inactivity before clearing old tool result content. Use `-1` to disable. | `60` |
|
||||
| `context.clearContextOnIdle.toolResultsNumToKeep` | number | Number of most-recent compactable tool results to preserve when clearing. Floor at 1. | `5` |
|
||||
|
||||
|
|
|
|||
|
|
@ -1012,16 +1012,6 @@ const SETTINGS_SCHEMA = {
|
|||
'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.',
|
||||
showInDialog: false,
|
||||
properties: {
|
||||
thinkingThresholdMinutes: {
|
||||
type: 'number',
|
||||
label: 'Thinking Idle Threshold (minutes)',
|
||||
category: 'Context',
|
||||
requiresRestart: false,
|
||||
default: 5 as number,
|
||||
description:
|
||||
'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.',
|
||||
showInDialog: false,
|
||||
},
|
||||
toolResultsThresholdMinutes: {
|
||||
type: 'number',
|
||||
label: 'Tool Results Idle Threshold (minutes)',
|
||||
|
|
|
|||
|
|
@ -203,8 +203,6 @@ export interface ChatCompressionSettings {
|
|||
* Threshold values of -1 mean "never clear" (disabled).
|
||||
*/
|
||||
export interface ClearContextOnIdleSettings {
|
||||
/** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */
|
||||
thinkingThresholdMinutes?: number;
|
||||
/** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */
|
||||
toolResultsThresholdMinutes?: number;
|
||||
/** Number of most-recent tool results to preserve. Default 5. */
|
||||
|
|
@ -766,8 +764,6 @@ export class Config {
|
|||
this.bugCommand = params.bugCommand;
|
||||
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
||||
this.clearContextOnIdle = {
|
||||
thinkingThresholdMinutes:
|
||||
params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5,
|
||||
toolResultsThresholdMinutes:
|
||||
params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60,
|
||||
toolResultsNumToKeep:
|
||||
|
|
|
|||
|
|
@ -347,7 +347,6 @@ describe('Gemini Client (client.ts)', () => {
|
|||
getFileService: vi.fn().mockReturnValue(fileService),
|
||||
getMaxSessionTurns: vi.fn().mockReturnValue(0),
|
||||
getClearContextOnIdle: vi.fn().mockReturnValue({
|
||||
thinkingThresholdMinutes: 5,
|
||||
toolResultsThresholdMinutes: 60,
|
||||
toolResultsNumToKeep: 5,
|
||||
}),
|
||||
|
|
@ -473,73 +472,10 @@ describe('Gemini Client (client.ts)', () => {
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
});
|
||||
|
||||
it('should not strip thoughts on active session (< 5min idle)', async () => {
|
||||
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = false;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-1',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(
|
||||
mockChat.stripThoughtsFromHistoryKeepRecent,
|
||||
).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should latch and strip thoughts after > 5min idle', async () => {
|
||||
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = false;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-2',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(true);
|
||||
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
|
||||
1,
|
||||
);
|
||||
});
|
||||
|
||||
it('should keep stripping once latched even if idle < 5min', async () => {
|
||||
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = true;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-3',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(true);
|
||||
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
|
||||
1,
|
||||
);
|
||||
});
|
||||
|
||||
it('should update lastApiCompletionTimestamp after API call', async () => {
|
||||
client['lastApiCompletionTimestamp'] = null;
|
||||
|
||||
|
|
@ -559,13 +495,11 @@ describe('Gemini Client (client.ts)', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should reset latch and timestamp on resetChat', async () => {
|
||||
it('should reset lastApiCompletionTimestamp on resetChat', async () => {
|
||||
client['lastApiCompletionTimestamp'] = Date.now();
|
||||
client['thinkingClearLatched'] = true;
|
||||
|
||||
await client.resetChat();
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(false);
|
||||
expect(client['lastApiCompletionTimestamp']).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -156,17 +156,6 @@ export class GeminiClient {
|
|||
*/
|
||||
private lastApiCompletionTimestamp: number | null = null;
|
||||
|
||||
/**
|
||||
* Sticky-on latch for clearing thinking blocks from prior turns.
|
||||
* Triggered when idle exceeds the configured threshold (default 5 min,
|
||||
* aligned with provider prompt-cache TTL). Once latched, stays true to
|
||||
* prevent oscillation: without it, thinking would accumulate → get
|
||||
* stripped → accumulate again, causing the message prefix to change
|
||||
* repeatedly (bad for provider-side prompt caching and wastes context).
|
||||
* Reset on /clear (resetChat).
|
||||
*/
|
||||
private thinkingClearLatched = false;
|
||||
|
||||
constructor(private readonly config: Config) {
|
||||
this.loopDetector = new LoopDetectionService(config);
|
||||
}
|
||||
|
|
@ -242,8 +231,6 @@ export class GeminiClient {
|
|||
|
||||
async resetChat(): Promise<void> {
|
||||
this.surfacedRelevantAutoMemoryPaths.clear();
|
||||
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
|
||||
this.thinkingClearLatched = false;
|
||||
this.lastApiCompletionTimestamp = null;
|
||||
await this.startChat();
|
||||
}
|
||||
|
|
@ -695,29 +682,6 @@ export class GeminiClient {
|
|||
this.config.getChatRecordingService()?.recordUserMessage(request);
|
||||
}
|
||||
|
||||
// Idle cleanup: clear stale thinking blocks after idle period.
|
||||
// Latch: once triggered, never revert — prevents oscillation.
|
||||
const idleConfig = this.config.getClearContextOnIdle();
|
||||
const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5;
|
||||
if (
|
||||
thinkingThresholdMin >= 0 &&
|
||||
!this.thinkingClearLatched &&
|
||||
this.lastApiCompletionTimestamp !== null
|
||||
) {
|
||||
const thresholdMs = thinkingThresholdMin * 60 * 1000;
|
||||
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
|
||||
if (idleMs > thresholdMs) {
|
||||
this.thinkingClearLatched = true;
|
||||
debugLogger.debug(
|
||||
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (this.thinkingClearLatched) {
|
||||
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
|
||||
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
|
||||
}
|
||||
|
||||
// Idle cleanup: clear old tool results when idle > threshold.
|
||||
// Runs on user and cron messages (not tool result submissions or
|
||||
// retries/hooks) so that model latency during a tool-call loop
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ function makeModelMessage(text: string): Content {
|
|||
}
|
||||
|
||||
const DEFAULT_SETTINGS: ClearContextOnIdleSettings = {
|
||||
thinkingThresholdMinutes: 5,
|
||||
toolResultsThresholdMinutes: 5,
|
||||
toolResultsNumToKeep: 1,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -783,5 +783,126 @@ describe('SessionService', () => {
|
|||
postCompressionRecord.message,
|
||||
]);
|
||||
});
|
||||
|
||||
it('should preserve thought parts by default (stripThoughtsFromHistory=false)', () => {
|
||||
const modelWithThought: ChatRecord = {
|
||||
uuid: 't1',
|
||||
parentUuid: 'a1',
|
||||
sessionId: sessionIdA,
|
||||
timestamp: '2024-01-01T01:00:00Z',
|
||||
type: 'assistant',
|
||||
message: {
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'reasoning step', thought: true },
|
||||
{ text: 'final answer' },
|
||||
],
|
||||
},
|
||||
cwd: '/test/project/root',
|
||||
version: '1.0.0',
|
||||
};
|
||||
|
||||
const conversation: ConversationRecord = {
|
||||
sessionId: sessionIdA,
|
||||
projectHash: 'test-project-hash',
|
||||
startTime: '2024-01-01T00:00:00Z',
|
||||
lastUpdated: '2024-01-01T01:00:00Z',
|
||||
messages: [recordA1, modelWithThought],
|
||||
};
|
||||
|
||||
const history = buildApiHistoryFromConversation(conversation);
|
||||
|
||||
// Thought parts should be preserved by default
|
||||
expect(history).toHaveLength(2);
|
||||
expect(history[1].parts).toEqual([
|
||||
{ text: 'reasoning step', thought: true },
|
||||
{ text: 'final answer' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should strip thought parts when stripThoughtsFromHistory=true', () => {
|
||||
const modelWithThought: ChatRecord = {
|
||||
uuid: 't1',
|
||||
parentUuid: 'a1',
|
||||
sessionId: sessionIdA,
|
||||
timestamp: '2024-01-01T01:00:00Z',
|
||||
type: 'assistant',
|
||||
message: {
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'reasoning step', thought: true },
|
||||
{ text: 'final answer' },
|
||||
],
|
||||
},
|
||||
cwd: '/test/project/root',
|
||||
version: '1.0.0',
|
||||
};
|
||||
|
||||
const conversation: ConversationRecord = {
|
||||
sessionId: sessionIdA,
|
||||
projectHash: 'test-project-hash',
|
||||
startTime: '2024-01-01T00:00:00Z',
|
||||
lastUpdated: '2024-01-01T01:00:00Z',
|
||||
messages: [recordA1, modelWithThought],
|
||||
};
|
||||
|
||||
const history = buildApiHistoryFromConversation(conversation, {
|
||||
stripThoughtsFromHistory: true,
|
||||
});
|
||||
|
||||
// Thought parts should be stripped
|
||||
expect(history).toHaveLength(2);
|
||||
expect(history[1].parts).toEqual([{ text: 'final answer' }]);
|
||||
});
|
||||
|
||||
it('should preserve thought parts in compressed history by default', () => {
|
||||
const compressionRecord: ChatRecord = {
|
||||
uuid: 'c1',
|
||||
parentUuid: 'b2',
|
||||
sessionId: sessionIdA,
|
||||
timestamp: '2024-01-02T03:00:00Z',
|
||||
type: 'system',
|
||||
subtype: 'chat_compression',
|
||||
cwd: '/test/project/root',
|
||||
version: '1.0.0',
|
||||
gitBranch: 'main',
|
||||
systemPayload: {
|
||||
info: {
|
||||
originalTokenCount: 100,
|
||||
newTokenCount: 50,
|
||||
compressionStatus: CompressionStatus.COMPRESSED,
|
||||
},
|
||||
compressedHistory: [
|
||||
{ role: 'user', parts: [{ text: 'summary' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'deep thinking', thought: true },
|
||||
{ text: 'final answer' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const conversation: ConversationRecord = {
|
||||
sessionId: sessionIdA,
|
||||
projectHash: 'test-project-hash',
|
||||
startTime: '2024-01-01T00:00:00Z',
|
||||
lastUpdated: '2024-01-02T03:00:00Z',
|
||||
messages: [recordA1, recordB2, compressionRecord],
|
||||
};
|
||||
|
||||
const history = buildApiHistoryFromConversation(conversation);
|
||||
|
||||
// Thought parts should be preserved in compressed history by default.
|
||||
// The compressedHistory has 2 entries (user, model), and no messages
|
||||
// exist after the compression record, so the result is 2 items.
|
||||
expect(history).toHaveLength(2);
|
||||
expect(history[1].parts).toEqual([
|
||||
{ text: 'deep thinking', thought: true },
|
||||
{ text: 'final answer' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -865,7 +865,9 @@ export interface BuildApiHistoryOptions {
|
|||
/**
|
||||
* Whether to strip thought parts from the history.
|
||||
* Thought parts are content parts that have `thought: true`.
|
||||
* @default true
|
||||
* Keeping thoughts ensures `reasoning_content` from reasoning models
|
||||
* (e.g. DeepSeek) is properly passed back in subsequent API calls.
|
||||
* @default false
|
||||
*/
|
||||
stripThoughtsFromHistory?: boolean;
|
||||
}
|
||||
|
|
@ -906,7 +908,7 @@ export function buildApiHistoryFromConversation(
|
|||
conversation: ConversationRecord,
|
||||
options: BuildApiHistoryOptions = {},
|
||||
): Content[] {
|
||||
const { stripThoughtsFromHistory = true } = options;
|
||||
const { stripThoughtsFromHistory = false } = options;
|
||||
const { messages } = conversation;
|
||||
|
||||
let lastCompressionIndex = -1;
|
||||
|
|
|
|||
|
|
@ -402,11 +402,6 @@
|
|||
"description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"thinkingThresholdMinutes": {
|
||||
"description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.",
|
||||
"type": "number",
|
||||
"default": 5
|
||||
},
|
||||
"toolResultsThresholdMinutes": {
|
||||
"description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.",
|
||||
"type": "number",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue