mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-22 03:03:56 +00:00
Merge pull request #2897 from QwenLM/feat/thinking-cross-turn-retention-idle-cleanup
feat(core): thinking block cross-turn retention with idle cleanup
This commit is contained in:
commit
3c23952ef7
9 changed files with 434 additions and 2 deletions
|
|
@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s
|
|||
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
|
||||
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
|
||||
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
|
||||
| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` |
|
||||
|
||||
#### Troubleshooting File Search Performance
|
||||
|
||||
|
|
|
|||
|
|
@ -1069,6 +1069,7 @@ export async function loadCliConfig(
|
|||
telemetry: telemetrySettings,
|
||||
usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
|
||||
fileFiltering: settings.context?.fileFiltering,
|
||||
thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
|
||||
checkpointing:
|
||||
argv.checkpointing || settings.general?.checkpointing?.enabled,
|
||||
proxy:
|
||||
|
|
|
|||
|
|
@ -924,6 +924,16 @@ const SETTINGS_SCHEMA = {
|
|||
},
|
||||
},
|
||||
},
|
||||
gapThresholdMinutes: {
|
||||
type: 'number',
|
||||
label: 'Thinking Block Idle Threshold (minutes)',
|
||||
category: 'Context',
|
||||
requiresRestart: false,
|
||||
default: 5,
|
||||
description:
|
||||
'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
|
||||
showInDialog: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -371,6 +371,8 @@ export interface ConfigParameters {
|
|||
model?: string;
|
||||
outputLanguageFilePath?: string;
|
||||
maxSessionTurns?: number;
|
||||
/** Minutes of inactivity before clearing retained thinking blocks. */
|
||||
thinkingIdleThresholdMinutes?: number;
|
||||
sessionTokenLimit?: number;
|
||||
experimentalZedIntegration?: boolean;
|
||||
cronEnabled?: boolean;
|
||||
|
|
@ -559,6 +561,7 @@ export class Config {
|
|||
private ideMode: boolean;
|
||||
|
||||
private readonly maxSessionTurns: number;
|
||||
private readonly thinkingIdleThresholdMs: number;
|
||||
private readonly sessionTokenLimit: number;
|
||||
private readonly listExtensions: boolean;
|
||||
private readonly overrideExtensions?: string[];
|
||||
|
|
@ -685,6 +688,8 @@ export class Config {
|
|||
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
|
||||
this.bugCommand = params.bugCommand;
|
||||
this.maxSessionTurns = params.maxSessionTurns ?? -1;
|
||||
this.thinkingIdleThresholdMs =
|
||||
(params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
|
||||
this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
|
||||
this.experimentalZedIntegration =
|
||||
params.experimentalZedIntegration ?? false;
|
||||
|
|
@ -1331,6 +1336,10 @@ export class Config {
|
|||
return this.maxSessionTurns;
|
||||
}
|
||||
|
||||
getThinkingIdleThresholdMs(): number {
|
||||
return this.thinkingIdleThresholdMs;
|
||||
}
|
||||
|
||||
getSessionTokenLimit(): number {
|
||||
return this.sessionTokenLimit;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
|
|||
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
|
||||
getFileService: vi.fn().mockReturnValue(fileService),
|
||||
getMaxSessionTurns: vi.fn().mockReturnValue(0),
|
||||
getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
|
||||
getSessionTokenLimit: vi.fn().mockReturnValue(32000),
|
||||
getNoBrowser: vi.fn().mockReturnValue(false),
|
||||
getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
|
||||
|
|
@ -427,6 +428,119 @@ describe('Gemini Client (client.ts)', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('thinking block idle cleanup and latch', () => {
|
||||
let mockChat: Partial<GeminiChat>;
|
||||
|
||||
beforeEach(() => {
|
||||
const mockStream = (async function* () {
|
||||
yield {
|
||||
type: GeminiEventType.Content,
|
||||
value: 'response',
|
||||
};
|
||||
})();
|
||||
mockTurnRunFn.mockReturnValue(mockStream);
|
||||
|
||||
mockChat = {
|
||||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
});
|
||||
|
||||
it('should not strip thoughts on active session (< 5min idle)', async () => {
|
||||
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = false;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-1',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(
|
||||
mockChat.stripThoughtsFromHistoryKeepRecent,
|
||||
).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should latch and strip thoughts after > 5min idle', async () => {
|
||||
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = false;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-2',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(true);
|
||||
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
|
||||
1,
|
||||
);
|
||||
});
|
||||
|
||||
it('should keep stripping once latched even if idle < 5min', async () => {
|
||||
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
|
||||
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
|
||||
client['thinkingClearLatched'] = true;
|
||||
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-3',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(true);
|
||||
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
|
||||
1,
|
||||
);
|
||||
});
|
||||
|
||||
it('should update lastApiCompletionTimestamp after API call', async () => {
|
||||
client['lastApiCompletionTimestamp'] = null;
|
||||
|
||||
const before = Date.now();
|
||||
const gen = client.sendMessageStream(
|
||||
[{ text: 'Hello' }],
|
||||
new AbortController().signal,
|
||||
'prompt-4',
|
||||
{ type: SendMessageType.UserQuery },
|
||||
);
|
||||
for await (const _ of gen) {
|
||||
/* drain */
|
||||
}
|
||||
|
||||
expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual(
|
||||
before,
|
||||
);
|
||||
});
|
||||
|
||||
it('should reset latch and timestamp on resetChat', async () => {
|
||||
client['lastApiCompletionTimestamp'] = Date.now();
|
||||
client['thinkingClearLatched'] = true;
|
||||
|
||||
await client.resetChat();
|
||||
|
||||
expect(client['thinkingClearLatched']).toBe(false);
|
||||
expect(client['lastApiCompletionTimestamp']).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('tryCompressChat', () => {
|
||||
const mockGetHistory = vi.fn();
|
||||
|
||||
|
|
@ -436,6 +550,7 @@ describe('Gemini Client (client.ts)', () => {
|
|||
addHistory: vi.fn(),
|
||||
setHistory: vi.fn(),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
} as unknown as GeminiChat;
|
||||
});
|
||||
|
||||
|
|
@ -457,6 +572,7 @@ describe('Gemini Client (client.ts)', () => {
|
|||
getHistory: vi.fn((_curated?: boolean) => chatHistory),
|
||||
setHistory: vi.fn(),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockOriginalChat as GeminiChat;
|
||||
|
||||
|
|
@ -1149,6 +1265,7 @@ describe('Gemini Client (client.ts)', () => {
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
} as unknown as GeminiChat;
|
||||
client['chat'] = mockChat;
|
||||
|
||||
|
|
@ -1204,6 +1321,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1260,6 +1378,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1326,6 +1445,7 @@ hello
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1365,6 +1485,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1410,6 +1531,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1498,6 +1620,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1555,6 +1678,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -1636,6 +1760,7 @@ Other open files:
|
|||
{ role: 'user', parts: [{ text: 'previous message' }] },
|
||||
]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
});
|
||||
|
|
@ -1889,6 +2014,7 @@ Other open files:
|
|||
getHistory: vi.fn().mockReturnValue([]), // Default empty history
|
||||
setHistory: vi.fn(),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -2228,6 +2354,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -2265,6 +2392,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -2305,6 +2433,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
||||
|
|
@ -2329,6 +2458,7 @@ Other open files:
|
|||
getHistory: vi.fn().mockReturnValue([]),
|
||||
setHistory: vi.fn(),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
stripOrphanedUserEntriesFromHistory: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
|
@ -2361,6 +2491,7 @@ Other open files:
|
|||
getHistory: vi.fn().mockReturnValue([]),
|
||||
setHistory: vi.fn(),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
stripOrphanedUserEntriesFromHistory: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
|
|
@ -2405,6 +2536,7 @@ Other open files:
|
|||
addHistory: vi.fn(),
|
||||
getHistory: vi.fn().mockReturnValue([]),
|
||||
stripThoughtsFromHistory: vi.fn(),
|
||||
stripThoughtsFromHistoryKeepRecent: vi.fn(),
|
||||
};
|
||||
client['chat'] = mockChat as GeminiChat;
|
||||
});
|
||||
|
|
|
|||
|
|
@ -126,6 +126,25 @@ export class GeminiClient {
|
|||
*/
|
||||
private hasFailedCompressionAttempt = false;
|
||||
|
||||
/**
|
||||
* Timestamp (epoch ms) of the last completed API call.
|
||||
* Used to detect idle periods for thinking block cleanup.
|
||||
* Starts as null — on the first query there is no prior thinking to clean,
|
||||
* so the idle check is skipped until the first API call completes.
|
||||
*/
|
||||
private lastApiCompletionTimestamp: number | null = null;
|
||||
|
||||
/**
|
||||
* Sticky-on latch for clearing thinking blocks from prior turns.
|
||||
* Triggered when idle exceeds the configured threshold (default 5 min,
|
||||
* aligned with provider prompt-cache TTL). Once latched, stays true to
|
||||
* prevent oscillation: without it, thinking would accumulate → get
|
||||
* stripped → accumulate again, causing the message prefix to change
|
||||
* repeatedly (bad for provider-side prompt caching and wastes context).
|
||||
* Reset on /clear (resetChat).
|
||||
*/
|
||||
private thinkingClearLatched = false;
|
||||
|
||||
constructor(private readonly config: Config) {
|
||||
this.loopDetector = new LoopDetectionService(config);
|
||||
}
|
||||
|
|
@ -199,6 +218,9 @@ export class GeminiClient {
|
|||
}
|
||||
|
||||
async resetChat(): Promise<void> {
|
||||
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
|
||||
this.thinkingClearLatched = false;
|
||||
this.lastApiCompletionTimestamp = null;
|
||||
await this.startChat();
|
||||
}
|
||||
|
||||
|
|
@ -537,8 +559,27 @@ export class GeminiClient {
|
|||
// record user message for session management
|
||||
this.config.getChatRecordingService()?.recordUserMessage(request);
|
||||
|
||||
// strip thoughts from history before sending the message
|
||||
this.stripThoughtsFromHistory();
|
||||
// Thinking block cross-turn retention with idle cleanup:
|
||||
// - Active session (< threshold idle): keep thinking blocks for reasoning coherence
|
||||
// - Idle > threshold: clear old thinking, keep only last 1 turn to free context
|
||||
// - Latch: once triggered, never revert — prevents oscillation
|
||||
if (
|
||||
!this.thinkingClearLatched &&
|
||||
this.lastApiCompletionTimestamp !== null
|
||||
) {
|
||||
const thresholdMs = this.config.getThinkingIdleThresholdMs();
|
||||
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
|
||||
if (idleMs > thresholdMs) {
|
||||
this.thinkingClearLatched = true;
|
||||
debugLogger.debug(
|
||||
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (this.thinkingClearLatched) {
|
||||
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
|
||||
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
|
||||
}
|
||||
}
|
||||
if (messageType !== SendMessageType.Retry) {
|
||||
this.sessionTurnCount++;
|
||||
|
|
@ -680,6 +721,7 @@ export class GeminiClient {
|
|||
if (arenaAgentClient) {
|
||||
await arenaAgentClient.reportError('Loop detected');
|
||||
}
|
||||
this.lastApiCompletionTimestamp = Date.now();
|
||||
return turn;
|
||||
}
|
||||
}
|
||||
|
|
@ -698,9 +740,14 @@ export class GeminiClient {
|
|||
: 'Unknown error';
|
||||
await arenaAgentClient.reportError(errorMsg);
|
||||
}
|
||||
this.lastApiCompletionTimestamp = Date.now();
|
||||
return turn;
|
||||
}
|
||||
}
|
||||
|
||||
// Track API completion time for thinking block idle cleanup
|
||||
this.lastApiCompletionTimestamp = Date.now();
|
||||
|
||||
// Fire Stop hook through MessageBus (only if hooks are enabled and registered)
|
||||
// This must be done before any early returns to ensure hooks are always triggered
|
||||
if (
|
||||
|
|
|
|||
|
|
@ -1923,6 +1923,150 @@ describe('GeminiChat', async () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('stripThoughtsFromHistoryKeepRecent', () => {
|
||||
it('should keep the most recent N model turns with thoughts', () => {
|
||||
chat.setHistory([
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'old thinking', thought: true },
|
||||
{ text: 'response1' },
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'msg2' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'mid thinking', thought: true },
|
||||
{ text: 'response2' },
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'msg3' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'recent thinking', thought: true },
|
||||
{ text: 'response3' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
chat.stripThoughtsFromHistoryKeepRecent(1);
|
||||
|
||||
const history = chat.getHistory();
|
||||
// First two model turns should have thoughts stripped
|
||||
expect(history[1]!.parts).toEqual([{ text: 'response1' }]);
|
||||
expect(history[3]!.parts).toEqual([{ text: 'response2' }]);
|
||||
// Last model turn should keep thoughts
|
||||
expect(history[5]!.parts).toEqual([
|
||||
{ text: 'recent thinking', thought: true },
|
||||
{ text: 'response3' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should not strip anything when keepTurns >= model turns with thoughts', () => {
|
||||
chat.setHistory([
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
|
||||
},
|
||||
]);
|
||||
|
||||
chat.stripThoughtsFromHistoryKeepRecent(1);
|
||||
|
||||
const history = chat.getHistory();
|
||||
expect(history[1]!.parts).toEqual([
|
||||
{ text: 'thinking', thought: true },
|
||||
{ text: 'response' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should remove model content objects that become empty after stripping', () => {
|
||||
chat.setHistory([
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ text: 'only thinking', thought: true }],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'msg2' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'recent thinking', thought: true },
|
||||
{ text: 'response' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
chat.stripThoughtsFromHistoryKeepRecent(1);
|
||||
|
||||
const history = chat.getHistory();
|
||||
// The first model turn (only thoughts) should be removed entirely
|
||||
expect(history).toHaveLength(3);
|
||||
expect(history[0]!.parts).toEqual([{ text: 'msg1' }]);
|
||||
expect(history[1]!.parts).toEqual([{ text: 'msg2' }]);
|
||||
expect(history[2]!.parts).toEqual([
|
||||
{ text: 'recent thinking', thought: true },
|
||||
{ text: 'response' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should also strip thoughtSignature from stripped turns', () => {
|
||||
chat.setHistory([
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'old thinking', thought: true },
|
||||
{
|
||||
text: 'with sig',
|
||||
thoughtSignature: 'sig1',
|
||||
} as unknown as { text: string; thoughtSignature: string },
|
||||
{ text: 'response1' },
|
||||
],
|
||||
},
|
||||
{ role: 'user', parts: [{ text: 'msg2' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [
|
||||
{ text: 'recent thinking', thought: true },
|
||||
{ text: 'response2' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
chat.stripThoughtsFromHistoryKeepRecent(1);
|
||||
|
||||
const history = chat.getHistory();
|
||||
// First model turn: thought stripped, thoughtSignature stripped
|
||||
expect(history[1]!.parts).toEqual([
|
||||
{ text: 'with sig' },
|
||||
{ text: 'response1' },
|
||||
]);
|
||||
expect(
|
||||
(history[1]!.parts![0] as { thoughtSignature?: string })
|
||||
.thoughtSignature,
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle keepTurns=0 by stripping all thoughts', () => {
|
||||
chat.setHistory([
|
||||
{ role: 'user', parts: [{ text: 'msg1' }] },
|
||||
{
|
||||
role: 'model',
|
||||
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
|
||||
},
|
||||
]);
|
||||
|
||||
chat.stripThoughtsFromHistoryKeepRecent(0);
|
||||
|
||||
const history = chat.getHistory();
|
||||
expect(history[1]!.parts).toEqual([{ text: 'response' }]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripOrphanedUserEntriesFromHistory', () => {
|
||||
it('should pop a single trailing user entry', () => {
|
||||
chat.setHistory([
|
||||
|
|
|
|||
|
|
@ -625,6 +625,89 @@ export class GeminiChat {
|
|||
.filter((content) => content.parts && content.parts.length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip thought parts from history, keeping the most recent `keepTurns`
|
||||
* model turns that contain thinking blocks intact.
|
||||
*
|
||||
* Selection is based on thought-containing turns specifically (not all
|
||||
* model turns) so the most recent reasoning chain is always preserved
|
||||
* even if later model turns happen to have no thinking.
|
||||
*
|
||||
* Used for idle cleanup: after exceeding the configured idle threshold
|
||||
* the old thinking blocks are no longer useful for reasoning coherence
|
||||
* but still consume context tokens.
|
||||
*/
|
||||
stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
|
||||
keepTurns = Number.isFinite(keepTurns)
|
||||
? Math.max(0, Math.floor(keepTurns))
|
||||
: 0;
|
||||
|
||||
// Find indices of model turns that contain thought parts
|
||||
const modelTurnIndices: number[] = [];
|
||||
for (let i = 0; i < this.history.length; i++) {
|
||||
const content = this.history[i];
|
||||
if (
|
||||
content.role === 'model' &&
|
||||
content.parts?.some(
|
||||
(part) =>
|
||||
part &&
|
||||
typeof part === 'object' &&
|
||||
'thought' in part &&
|
||||
part.thought,
|
||||
)
|
||||
) {
|
||||
modelTurnIndices.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which model turns to keep (the most recent `keepTurns`)
|
||||
const turnsToStrip = new Set(
|
||||
modelTurnIndices.slice(
|
||||
0,
|
||||
Math.max(0, modelTurnIndices.length - keepTurns),
|
||||
),
|
||||
);
|
||||
|
||||
if (turnsToStrip.size === 0) return;
|
||||
|
||||
this.history = this.history
|
||||
.map((content, index) => {
|
||||
if (!turnsToStrip.has(index) || !content.parts) return content;
|
||||
|
||||
// Strip thought parts from this turn
|
||||
const filteredParts = content.parts
|
||||
.filter(
|
||||
(part) =>
|
||||
!(
|
||||
part &&
|
||||
typeof part === 'object' &&
|
||||
'thought' in part &&
|
||||
part.thought
|
||||
),
|
||||
)
|
||||
.map((part) => {
|
||||
if (
|
||||
part &&
|
||||
typeof part === 'object' &&
|
||||
'thoughtSignature' in part
|
||||
) {
|
||||
const newPart = { ...part };
|
||||
delete (newPart as { thoughtSignature?: string })
|
||||
.thoughtSignature;
|
||||
return newPart;
|
||||
}
|
||||
return part;
|
||||
});
|
||||
|
||||
return {
|
||||
...content,
|
||||
parts: filteredParts,
|
||||
};
|
||||
})
|
||||
// Remove Content objects that have no parts left after filtering
|
||||
.filter((content) => content.parts && content.parts.length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pop all orphaned trailing user entries from chat history.
|
||||
* In a valid conversation the last entry is always a model response;
|
||||
|
|
|
|||
|
|
@ -388,6 +388,11 @@
|
|||
"default": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gapThresholdMinutes": {
|
||||
"description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
|
||||
"type": "number",
|
||||
"default": 5
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue