Merge remote-tracking branch 'origin/main' into feat/review-skill-improvements

This commit is contained in:
wenshao 2026-04-08 23:09:01 +08:00
commit 3364cf880f
72 changed files with 2442 additions and 432 deletions

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/channel-base",
"version": "0.14.1",
"version": "0.14.2",
"description": "Base channel infrastructure for Qwen Code",
"type": "module",
"main": "dist/index.js",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/channel-dingtalk",
"version": "0.14.1",
"version": "0.14.2",
"description": "DingTalk channel adapter for Qwen Code",
"type": "module",
"main": "dist/index.js",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/channel-plugin-example",
"version": "0.14.1",
"version": "0.14.2",
"private": true,
"type": "module",
"main": "dist/index.js",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/channel-telegram",
"version": "0.14.1",
"version": "0.14.2",
"description": "Telegram channel adapter for Qwen Code",
"type": "module",
"main": "dist/index.js",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/channel-weixin",
"version": "0.14.1",
"version": "0.14.2",
"description": "WeChat (Weixin) channel adapter for Qwen Code",
"type": "module",
"main": "dist/index.js",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.14.1",
"version": "0.14.2",
"description": "Qwen Code",
"repository": {
"type": "git",
@ -33,7 +33,7 @@
"dist"
],
"config": {
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.14.1"
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.14.2"
},
"dependencies": {
"@agentclientprotocol/sdk": "^0.14.1",

View file

@ -1069,6 +1069,7 @@ export async function loadCliConfig(
telemetry: telemetrySettings,
usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
fileFiltering: settings.context?.fileFiltering,
thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
checkpointing:
argv.checkpointing || settings.general?.checkpointing?.enabled,
proxy:

View file

@ -518,7 +518,7 @@ const SETTINGS_SCHEMA = {
label: 'Enable Follow-up Suggestions',
category: 'UI',
requiresRestart: false,
default: true,
default: false,
description:
'Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.',
showInDialog: true,
@ -924,6 +924,16 @@ const SETTINGS_SCHEMA = {
},
},
},
gapThresholdMinutes: {
type: 'number',
label: 'Thinking Block Idle Threshold (minutes)',
category: 'Context',
requiresRestart: false,
default: 5,
description:
'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
showInDialog: false,
},
},
},

View file

@ -52,6 +52,18 @@ export function generateCodingPlanTemplate(
// China region uses legacy fields to maintain backward compatibility
// This ensures existing users don't get prompted for unnecessary updates
return [
{
id: 'qwen3.6-plus',
name: '[ModelStudio Coding Plan] qwen3.6-plus',
baseUrl: 'https://coding.dashscope.aliyuncs.com/v1',
envKey: CODING_PLAN_ENV_KEY,
generationConfig: {
extra_body: {
enable_thinking: true,
},
contextWindowSize: 1000000,
},
},
{
id: 'qwen3.5-plus',
name: '[ModelStudio Coding Plan] qwen3.5-plus',
@ -147,6 +159,18 @@ export function generateCodingPlanTemplate(
// Global region uses ModelStudio Coding Plan branding for Global/Intl
return [
{
id: 'qwen3.6-plus',
name: '[ModelStudio Coding Plan for Global/Intl] qwen3.6-plus',
baseUrl: 'https://coding-intl.dashscope.aliyuncs.com/v1',
envKey: CODING_PLAN_ENV_KEY,
generationConfig: {
extra_body: {
enable_thinking: true,
},
contextWindowSize: 1000000,
},
},
{
id: 'qwen3.5-plus',
name: '[ModelStudio Coding Plan for Global/Intl] qwen3.5-plus',

View file

@ -1973,4 +1973,15 @@ export default {
'Vollständige Tool-Ausgabe und Denkprozess im ausführlichen Modus anzeigen (mit Strg+O umschalten).',
'Press Ctrl+O to show full tool output':
'Strg+O für vollständige Tool-Ausgabe drücken',
'Switch to plan mode or exit plan mode':
'Switch to plan mode or exit plan mode',
'Exited plan mode. Previous approval mode restored.':
'Exited plan mode. Previous approval mode restored.',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'Enabled plan mode. The agent will analyze and plan without executing tools.',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'Already in plan mode. Use "/plan exit" to exit plan mode.',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'Not in plan mode. Use "/plan" to enter plan mode first.',
};

View file

@ -2013,4 +2013,15 @@ export default {
'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).',
'Press Ctrl+O to show full tool output':
'Press Ctrl+O to show full tool output',
'Switch to plan mode or exit plan mode':
'Switch to plan mode or exit plan mode',
'Exited plan mode. Previous approval mode restored.':
'Exited plan mode. Previous approval mode restored.',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'Enabled plan mode. The agent will analyze and plan without executing tools.',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'Already in plan mode. Use "/plan exit" to exit plan mode.',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'Not in plan mode. Use "/plan" to enter plan mode first.',
};

View file

@ -1464,4 +1464,15 @@ export default {
'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).':
'詳細モードで完全なツール出力と思考を表示しますCtrl+O で切り替え)。',
'Press Ctrl+O to show full tool output': 'Ctrl+O で完全なツール出力を表示',
'Switch to plan mode or exit plan mode':
'Switch to plan mode or exit plan mode',
'Exited plan mode. Previous approval mode restored.':
'Exited plan mode. Previous approval mode restored.',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'Enabled plan mode. The agent will analyze and plan without executing tools.',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'Already in plan mode. Use "/plan exit" to exit plan mode.',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'Not in plan mode. Use "/plan" to enter plan mode first.',
};

View file

@ -1963,4 +1963,15 @@ export default {
'Mostrar saída completa da ferramenta e raciocínio no modo detalhado (alternar com Ctrl+O).',
'Press Ctrl+O to show full tool output':
'Pressione Ctrl+O para exibir a saída completa da ferramenta',
'Switch to plan mode or exit plan mode':
'Switch to plan mode or exit plan mode',
'Exited plan mode. Previous approval mode restored.':
'Exited plan mode. Previous approval mode restored.',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'Enabled plan mode. The agent will analyze and plan without executing tools.',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'Already in plan mode. Use "/plan exit" to exit plan mode.',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'Not in plan mode. Use "/plan" to enter plan mode first.',
};

View file

@ -1970,4 +1970,15 @@ export default {
'Показывать полный вывод инструментов и процесс рассуждений в подробном режиме (переключить с помощью Ctrl+O).',
'Press Ctrl+O to show full tool output':
'Нажмите Ctrl+O для показа полного вывода инструментов',
'Switch to plan mode or exit plan mode':
'Switch to plan mode or exit plan mode',
'Exited plan mode. Previous approval mode restored.':
'Exited plan mode. Previous approval mode restored.',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'Enabled plan mode. The agent will analyze and plan without executing tools.',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'Already in plan mode. Use "/plan exit" to exit plan mode.',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'Not in plan mode. Use "/plan" to enter plan mode first.',
};

View file

@ -1817,4 +1817,14 @@ export default {
'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).':
'详细模式下显示完整工具输出和思考过程Ctrl+O 切换)。',
'Press Ctrl+O to show full tool output': '按 Ctrl+O 查看详细工具调用结果',
'Switch to plan mode or exit plan mode': '切换到计划模式或退出计划模式',
'Exited plan mode. Previous approval mode restored.':
'已退出计划模式,已恢复之前的审批模式。',
'Enabled plan mode. The agent will analyze and plan without executing tools.':
'启用计划模式。智能体将只分析和规划,而不执行工具。',
'Already in plan mode. Use "/plan exit" to exit plan mode.':
'已处于计划模式。使用 "/plan exit" 退出计划模式。',
'Not in plan mode. Use "/plan" to enter plan mode first.':
'未处于计划模式。请先使用 "/plan" 进入计划模式。',
};

View file

@ -32,6 +32,7 @@ import { languageCommand } from '../ui/commands/languageCommand.js';
import { mcpCommand } from '../ui/commands/mcpCommand.js';
import { memoryCommand } from '../ui/commands/memoryCommand.js';
import { modelCommand } from '../ui/commands/modelCommand.js';
import { planCommand } from '../ui/commands/planCommand.js';
import { permissionsCommand } from '../ui/commands/permissionsCommand.js';
import { trustCommand } from '../ui/commands/trustCommand.js';
import { quitCommand } from '../ui/commands/quitCommand.js';
@ -103,6 +104,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
mcpCommand,
memoryCommand,
modelCommand,
planCommand,
permissionsCommand,
...(this.config?.getFolderTrust() ? [trustCommand] : []),
quitCommand,

View file

@ -1113,7 +1113,7 @@ export const AppContainer = (props: AppContainerProps) => {
// Generate prompt suggestions when streaming completes
const followupSuggestionsEnabled =
settings.merged.ui?.enableFollowupSuggestions !== false;
settings.merged.ui?.enableFollowupSuggestions === true;
useEffect(() => {
// Clear suggestion when feature is disabled at runtime

View file

@ -0,0 +1,159 @@
/**
* @license
* Copyright 2026 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi, type Mock } from 'vitest';
import { planCommand } from './planCommand.js';
import { type CommandContext } from './types.js';
import { createMockCommandContext } from '../../test-utils/mockCommandContext.js';
import { ApprovalMode } from '@qwen-code/qwen-code-core';
describe('planCommand', () => {
let mockContext: CommandContext;
beforeEach(() => {
mockContext = createMockCommandContext({
services: {
config: {
getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT),
getPrePlanMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT),
setApprovalMode: vi.fn(),
} as unknown as import('@qwen-code/qwen-code-core').Config,
},
});
});
it('should switch to plan mode if not in plan mode', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
const result = await planCommand.action(mockContext, '');
expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.PLAN,
);
expect(result).toEqual({
type: 'message',
messageType: 'info',
content:
'Enabled plan mode. The agent will analyze and plan without executing tools.',
});
});
it('should return submit prompt if arguments are provided when switching to plan mode', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
const result = await planCommand.action(mockContext, 'refactor the code');
expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.PLAN,
);
expect(result).toEqual({
type: 'submit_prompt',
content: [{ text: 'refactor the code' }],
});
});
it('should return already in plan mode if mode is already plan', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
(mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
ApprovalMode.PLAN,
);
const result = await planCommand.action(mockContext, '');
expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
expect(result).toEqual({
type: 'message',
messageType: 'info',
content: 'Already in plan mode. Use "/plan exit" to exit plan mode.',
});
});
it('should return submit prompt if arguments are provided and already in plan mode', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
(mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
ApprovalMode.PLAN,
);
const result = await planCommand.action(mockContext, 'keep planning');
expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
expect(result).toEqual({
type: 'submit_prompt',
content: [{ text: 'keep planning' }],
});
});
it('should exit plan mode when exit argument is passed', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
(mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
ApprovalMode.PLAN,
);
const result = await planCommand.action(mockContext, 'exit');
expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.DEFAULT,
);
expect(result).toEqual({
type: 'message',
messageType: 'info',
content: 'Exited plan mode. Previous approval mode restored.',
});
});
it('should restore pre-plan mode when executing from plan mode', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
(mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
ApprovalMode.PLAN,
);
(mockContext.services.config?.getPrePlanMode as Mock).mockReturnValue(
ApprovalMode.AUTO_EDIT,
);
const result = await planCommand.action(mockContext, 'exit');
expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.AUTO_EDIT,
);
expect(result).toEqual({
type: 'message',
messageType: 'info',
content: 'Exited plan mode. Previous approval mode restored.',
});
});
it('should return error when execute is used but not in plan mode', async () => {
if (!planCommand.action) {
throw new Error('The plan command must have an action.');
}
// Default mock returns ApprovalMode.DEFAULT (not PLAN)
const result = await planCommand.action(mockContext, 'exit');
expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
expect(result).toEqual({
type: 'message',
messageType: 'error',
content: 'Not in plan mode. Use "/plan" to enter plan mode first.',
});
});
});

View file

@ -0,0 +1,104 @@
/**
* @license
* Copyright 2025 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
import {
type CommandContext,
CommandKind,
type SlashCommand,
type MessageActionReturn,
type SubmitPromptActionReturn,
} from './types.js';
import { t } from '../../i18n/index.js';
import { ApprovalMode } from '@qwen-code/qwen-code-core';
export const planCommand: SlashCommand = {
name: 'plan',
get description() {
return t('Switch to plan mode or exit plan mode');
},
kind: CommandKind.BUILT_IN,
action: async (
context: CommandContext,
args: string,
): Promise<MessageActionReturn | SubmitPromptActionReturn> => {
const { config } = context.services;
if (!config) {
return {
type: 'message',
messageType: 'error',
content: t('Configuration is not available.'),
};
}
const trimmedArgs = args.trim();
const currentMode = config.getApprovalMode();
if (trimmedArgs === 'exit') {
if (currentMode !== ApprovalMode.PLAN) {
return {
type: 'message',
messageType: 'error',
content: t('Not in plan mode. Use "/plan" to enter plan mode first.'),
};
}
try {
config.setApprovalMode(config.getPrePlanMode());
} catch (e) {
return {
type: 'message',
messageType: 'error',
content: (e as Error).message,
};
}
return {
type: 'message',
messageType: 'info',
content: t('Exited plan mode. Previous approval mode restored.'),
};
}
if (currentMode !== ApprovalMode.PLAN) {
try {
config.setApprovalMode(ApprovalMode.PLAN);
} catch (e) {
return {
type: 'message',
messageType: 'error',
content: (e as Error).message,
};
}
if (trimmedArgs) {
return {
type: 'submit_prompt',
content: [{ text: trimmedArgs }],
};
}
return {
type: 'message',
messageType: 'info',
content: t(
'Enabled plan mode. The agent will analyze and plan without executing tools.',
),
};
}
// Already in plan mode
if (trimmedArgs) {
return {
type: 'submit_prompt',
content: [{ text: trimmedArgs }],
};
}
return {
type: 'message',
messageType: 'info',
content: t('Already in plan mode. Use "/plan exit" to exit plan mode.'),
};
},
};

View file

@ -849,7 +849,6 @@ describe('InputPrompt', () => {
// Verify useCompletion was called with correct signature
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -878,7 +877,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -907,7 +905,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -936,7 +933,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -965,7 +961,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -995,7 +990,6 @@ describe('InputPrompt', () => {
// Verify useCompletion was called with the buffer
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1024,7 +1018,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1054,7 +1047,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1084,7 +1076,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1114,7 +1105,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1144,7 +1134,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1176,7 +1165,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1206,7 +1194,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,
@ -1238,7 +1225,6 @@ describe('InputPrompt', () => {
expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
mockBuffer,
['/test/project/src'],
path.join('test', 'project', 'src'),
mockSlashCommands,
mockCommandContext,

View file

@ -168,15 +168,6 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
}
}, []);
const [dirs, setDirs] = useState<readonly string[]>(
config.getWorkspaceContext().getDirectories(),
);
const dirsChanged = config.getWorkspaceContext().getDirectories();
useEffect(() => {
if (dirs.length !== dirsChanged.length) {
setDirs(dirsChanged);
}
}, [dirs.length, dirsChanged]);
const [reverseSearchActive, setReverseSearchActive] = useState(false);
const [commandSearchActive, setCommandSearchActive] = useState(false);
const [textBeforeReverseSearch, setTextBeforeReverseSearch] = useState('');
@ -190,7 +181,6 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
const completion = useCommandCompletion(
buffer,
dirs,
config.getTargetDir(),
slashCommands,
commandContext,

View file

@ -189,13 +189,35 @@ export function KeypressProvider({
clearKittyTimeout();
kittySequenceTimeout = setTimeout(() => {
if (kittySequenceBufferRef.current) {
if (debugKeystrokeLogging) {
debugLogger.debug(
'[DEBUG] Kitty buffer timeout, clearing:',
kittySequenceBufferRef.current,
);
// Before discarding, try to salvage any parseable sequences
// that may have been missed (e.g., due to chunked input).
while (kittySequenceBufferRef.current) {
const parsed = parseKittyPrefix(kittySequenceBufferRef.current);
if (parsed) {
kittySequenceBufferRef.current =
kittySequenceBufferRef.current.slice(parsed.length);
broadcast(parsed.key);
continue;
}
const plain = parsePlainTextPrefix(kittySequenceBufferRef.current);
if (plain) {
kittySequenceBufferRef.current =
kittySequenceBufferRef.current.slice(plain.length);
broadcast(plain.key);
continue;
}
break;
}
// Clear any remaining unparseable content
if (kittySequenceBufferRef.current) {
if (debugKeystrokeLogging) {
debugLogger.debug(
'[DEBUG] Kitty buffer timeout, clearing:',
kittySequenceBufferRef.current,
);
}
kittySequenceBufferRef.current = '';
}
kittySequenceBufferRef.current = '';
}
}, KITTY_SEQUENCE_TIMEOUT_MS);
};
@ -331,14 +353,19 @@ export function KeypressProvider({
};
}
// 3) CSI-u form: ESC [ <code> ; <mods> (u|~)
// 3) CSI-u and tilde-coded functional keys: ESC [ <code> ; <mods> (u|~)
// 3) CSI-u form: ESC [ <code>[:<shifted>][:<base>] ; <mods>[:<event>] [; <text>] (u|~)
// 3) CSI-u and tilde-coded functional keys with optional kitty extensions:
// Full kitty format: ESC [ code:shifted:base ; mods:event ; text u
// 'u' terminator: Kitty CSI-u; '~' terminator: tilde-coded function keys.
const csiUPrefix = new RegExp(`^${ESC}\\[(\\d+)(;(\\d+))?([u~])`);
// The colon-separated fields (shifted key, base key, event type, text)
// are optional extensions that some terminals send.
const csiUPrefix = new RegExp(
`^${ESC}\\[(\\d+)(?::\\d+)*(?:;(\\d+)(?::\\d+)*)?(?:;\\d+)?([u~])`,
);
m = buffer.match(csiUPrefix);
if (m) {
const keyCode = parseInt(m[1], 10);
let modifiers = m[3] ? parseInt(m[3], 10) : KITTY_MODIFIER_BASE;
let modifiers = m[2] ? parseInt(m[2], 10) : KITTY_MODIFIER_BASE;
if (modifiers >= KITTY_MODIFIER_EVENT_TYPES_OFFSET) {
modifiers -= KITTY_MODIFIER_EVENT_TYPES_OFFSET;
}
@ -347,7 +374,7 @@ export function KeypressProvider({
(modifierBits & MODIFIER_SHIFT_BIT) === MODIFIER_SHIFT_BIT;
const alt = (modifierBits & MODIFIER_ALT_BIT) === MODIFIER_ALT_BIT;
const ctrl = (modifierBits & MODIFIER_CTRL_BIT) === MODIFIER_CTRL_BIT;
const terminator = m[4];
const terminator = m[3];
// Tilde-coded functional keys (Delete, Insert, PageUp/Down, Home/End)
if (terminator === '~') {

View file

@ -391,9 +391,9 @@ describe('useCodingPlanUpdates', () => {
>;
// Should have new China configs + custom config only (global config removed since regions are mutually exclusive)
// The China template has 8 models, so we expect 8 (from template) + 1 (custom) = 9
// The China template has 9 models, so we expect 9 (from template) + 1 (custom) = 10
// Note: description field has been removed, only name field contains the branding
expect(updatedConfigs.length).toBe(9);
expect(updatedConfigs.length).toBe(10);
// Should NOT contain the Global config (mutually exclusive)
expect(

View file

@ -84,7 +84,6 @@ const setupMocks = ({
describe('useCommandCompletion', () => {
const mockCommandContext = {} as CommandContext;
const mockConfig = {} as Config;
const testDirs: string[] = [];
const testRootDir = '/';
// Helper to create real TextBuffer objects within renderHook
@ -114,7 +113,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest(''),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -139,7 +137,6 @@ describe('useCommandCompletion', () => {
const textBuffer = useTextBufferForTest('@file');
const completion = useCommandCompletion(
textBuffer,
testDirs,
testRootDir,
[],
mockCommandContext,
@ -172,7 +169,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('@files'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -200,7 +196,6 @@ describe('useCommandCompletion', () => {
renderHook(() =>
useCommandCompletion(
useTextBufferForTest(text),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -226,7 +221,6 @@ describe('useCommandCompletion', () => {
renderHook(() =>
useCommandCompletion(
useTextBufferForTest(text, cursorOffset),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -265,7 +259,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -286,7 +279,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -306,7 +298,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -332,7 +323,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -361,7 +351,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -398,7 +387,6 @@ describe('useCommandCompletion', () => {
const { result } = renderHook(() =>
useCommandCompletion(
useTextBufferForTest('/'),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -427,7 +415,6 @@ describe('useCommandCompletion', () => {
renderHook(() =>
useCommandCompletion(
useTextBufferForTest(text),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -455,7 +442,6 @@ describe('useCommandCompletion', () => {
renderHook(() =>
useCommandCompletion(
useTextBufferForTest(text),
testDirs,
testRootDir,
[],
mockCommandContext,
@ -484,7 +470,6 @@ describe('useCommandCompletion', () => {
const textBuffer = useTextBufferForTest(text);
const completion = useCommandCompletion(
textBuffer,
testDirs,
testRootDir,
[],
mockCommandContext,
@ -517,7 +502,6 @@ describe('useCommandCompletion', () => {
const textBuffer = useTextBufferForTest('/mem');
const completion = useCommandCompletion(
textBuffer,
testDirs,
testRootDir,
[],
mockCommandContext,
@ -547,7 +531,6 @@ describe('useCommandCompletion', () => {
const textBuffer = useTextBufferForTest('@src/fi');
const completion = useCommandCompletion(
textBuffer,
testDirs,
testRootDir,
[],
mockCommandContext,
@ -580,7 +563,6 @@ describe('useCommandCompletion', () => {
const textBuffer = useTextBufferForTest(text, cursorOffset);
const completion = useCommandCompletion(
textBuffer,
testDirs,
testRootDir,
[],
mockCommandContext,

View file

@ -39,7 +39,6 @@ export interface UseCommandCompletionReturn {
export function useCommandCompletion(
buffer: TextBuffer,
dirs: readonly string[],
cwd: string,
slashCommands: readonly SlashCommand[],
commandContext: CommandContext,

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-core",
"version": "0.14.1",
"version": "0.14.2",
"description": "Qwen Code Core",
"repository": {
"type": "git",

View file

@ -8,6 +8,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import type { Mock } from 'vitest';
import type { ConfigParameters, SandboxConfig } from './config.js';
import { Config, ApprovalMode } from './config.js';
import * as fs from 'node:fs';
import * as path from 'node:path';
import { setGeminiMdFilename as mockSetGeminiMdFilename } from '../tools/memoryTool.js';
import {
@ -57,6 +58,9 @@ vi.mock('node:fs', async (importOriginal) => {
isDirectory: vi.fn().mockReturnValue(true),
}),
realpathSync: vi.fn((path) => path),
mkdirSync: vi.fn(),
writeFileSync: vi.fn(),
readFileSync: vi.fn(),
};
return {
...mocked,
@ -1203,6 +1207,103 @@ describe('setApprovalMode with folder trust', () => {
expect(() => config.setApprovalMode(ApprovalMode.PLAN)).not.toThrow();
});
describe('prePlanMode tracking', () => {
it('should save pre-plan mode when entering plan mode', () => {
const config = new Config(baseParams);
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
config.setApprovalMode(ApprovalMode.AUTO_EDIT);
config.setApprovalMode(ApprovalMode.PLAN);
expect(config.getPrePlanMode()).toBe(ApprovalMode.AUTO_EDIT);
});
it('should clear pre-plan mode when leaving plan mode', () => {
const config = new Config(baseParams);
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
config.setApprovalMode(ApprovalMode.AUTO_EDIT);
config.setApprovalMode(ApprovalMode.PLAN);
config.setApprovalMode(ApprovalMode.DEFAULT);
expect(config.getPrePlanMode()).toBe(ApprovalMode.DEFAULT);
});
it('should default to DEFAULT when no pre-plan mode was recorded', () => {
const config = new Config(baseParams);
expect(config.getPrePlanMode()).toBe(ApprovalMode.DEFAULT);
});
it('should not update pre-plan mode when already in plan mode', () => {
const config = new Config(baseParams);
vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
config.setApprovalMode(ApprovalMode.YOLO);
config.setApprovalMode(ApprovalMode.PLAN);
// Setting PLAN again should not overwrite prePlanMode
config.setApprovalMode(ApprovalMode.PLAN);
expect(config.getPrePlanMode()).toBe(ApprovalMode.YOLO);
});
});
describe('plan file persistence', () => {
it('should save plan to disk', () => {
const config = new Config(baseParams);
config.savePlan('# My Plan\n1. Step one\n2. Step two');
expect(fs.mkdirSync).toHaveBeenCalledWith(
expect.stringContaining('plans'),
{ recursive: true },
);
expect(fs.writeFileSync).toHaveBeenCalledWith(
expect.stringContaining('.md'),
'# My Plan\n1. Step one\n2. Step two',
'utf-8',
);
});
it('should load plan from disk', () => {
const config = new Config(baseParams);
(fs.readFileSync as Mock).mockReturnValue('# Saved Plan');
const plan = config.loadPlan();
expect(plan).toBe('# Saved Plan');
});
it('should return undefined when no plan file exists', () => {
const config = new Config(baseParams);
const enoentError = new Error('ENOENT') as NodeJS.ErrnoException;
enoentError.code = 'ENOENT';
(fs.readFileSync as Mock).mockImplementation(() => {
throw enoentError;
});
const plan = config.loadPlan();
expect(plan).toBeUndefined();
});
it('should rethrow non-ENOENT errors from loadPlan', () => {
const config = new Config(baseParams);
const permError = new Error('EACCES') as NodeJS.ErrnoException;
permError.code = 'EACCES';
(fs.readFileSync as Mock).mockImplementation(() => {
throw permError;
});
expect(() => config.loadPlan()).toThrow('EACCES');
});
it('should use session ID in plan file path', () => {
const config = new Config({
...baseParams,
sessionId: 'test-session-123',
});
const filePath = config.getPlanFilePath();
expect(filePath).toContain('test-session-123');
expect(filePath).toMatch(/\.md$/);
});
});
describe('registerCoreTools', () => {
beforeEach(() => {
vi.clearAllMocks();

View file

@ -6,6 +6,7 @@
// Node built-ins
import type { EventEmitter } from 'node:events';
import * as fs from 'node:fs';
import * as path from 'node:path';
import process from 'node:process';
@ -370,6 +371,8 @@ export interface ConfigParameters {
model?: string;
outputLanguageFilePath?: string;
maxSessionTurns?: number;
/** Minutes of inactivity before clearing retained thinking blocks. */
thinkingIdleThresholdMinutes?: number;
sessionTokenLimit?: number;
experimentalZedIntegration?: boolean;
cronEnabled?: boolean;
@ -529,6 +532,7 @@ export class Config {
private sdkMode: boolean;
private geminiMdFileCount: number;
private approvalMode: ApprovalMode;
private prePlanMode?: ApprovalMode;
private readonly accessibility: AccessibilitySettings;
private readonly telemetrySettings: TelemetrySettings;
private readonly gitCoAuthor: GitCoAuthorSettings;
@ -557,6 +561,7 @@ export class Config {
private ideMode: boolean;
private readonly maxSessionTurns: number;
private readonly thinkingIdleThresholdMs: number;
private readonly sessionTokenLimit: number;
private readonly listExtensions: boolean;
private readonly overrideExtensions?: string[];
@ -683,6 +688,8 @@ export class Config {
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
this.bugCommand = params.bugCommand;
this.maxSessionTurns = params.maxSessionTurns ?? -1;
this.thinkingIdleThresholdMs =
(params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
this.experimentalZedIntegration =
params.experimentalZedIntegration ?? false;
@ -1329,6 +1336,10 @@ export class Config {
return this.maxSessionTurns;
}
getThinkingIdleThresholdMs(): number {
return this.thinkingIdleThresholdMs;
}
getSessionTokenLimit(): number {
return this.sessionTokenLimit;
}
@ -1634,6 +1645,14 @@ export class Config {
return this.approvalMode;
}
/**
* Returns the approval mode that was active before entering plan mode.
* Falls back to DEFAULT if no pre-plan mode was recorded.
*/
getPrePlanMode(): ApprovalMode {
return this.prePlanMode ?? ApprovalMode.DEFAULT;
}
setApprovalMode(mode: ApprovalMode): void {
if (
!this.isTrustedFolder() &&
@ -1644,9 +1663,55 @@ export class Config {
'Cannot enable privileged approval modes in an untrusted folder.',
);
}
// Track the mode before entering plan mode so it can be restored later
if (mode === ApprovalMode.PLAN && this.approvalMode !== ApprovalMode.PLAN) {
this.prePlanMode = this.approvalMode;
} else if (
mode !== ApprovalMode.PLAN &&
this.approvalMode === ApprovalMode.PLAN
) {
this.prePlanMode = undefined;
}
this.approvalMode = mode;
}
/**
* Returns the file path for this session's plan file.
*/
getPlanFilePath(): string {
return Storage.getPlanFilePath(this.sessionId);
}
/**
* Saves a plan to disk for the current session.
*/
savePlan(plan: string): void {
const filePath = this.getPlanFilePath();
const dir = path.dirname(filePath);
fs.mkdirSync(dir, { recursive: true });
fs.writeFileSync(filePath, plan, 'utf-8');
}
/**
* Loads the plan for the current session, or returns undefined if none exists.
*/
loadPlan(): string | undefined {
const filePath = this.getPlanFilePath();
try {
return fs.readFileSync(filePath, 'utf-8');
} catch (error: unknown) {
if (
typeof error === 'object' &&
error !== null &&
'code' in error &&
(error as NodeJS.ErrnoException).code === 'ENOENT'
) {
return undefined;
}
throw error;
}
}
getInputFormat(): 'text' | 'stream-json' {
return this.inputFormat;
}

View file

@ -7,4 +7,4 @@
export const DEFAULT_QWEN_MODEL = 'coder-model';
export const DEFAULT_QWEN_FLASH_MODEL = 'coder-model';
export const DEFAULT_QWEN_EMBEDDING_MODEL = 'text-embedding-v4';
export const MAINLINE_CODER_MODEL = 'qwen3.5-plus';
export const MAINLINE_CODER_MODEL = 'qwen3.6-plus';

View file

@ -18,6 +18,7 @@ const TMP_DIR_NAME = 'tmp';
const BIN_DIR_NAME = 'bin';
const PROJECT_DIR_NAME = 'projects';
const IDE_DIR_NAME = 'ide';
const PLANS_DIR_NAME = 'plans';
const DEBUG_DIR_NAME = 'debug';
const ARENA_DIR_NAME = 'arena';
@ -165,6 +166,14 @@ export class Storage {
return path.join(Storage.getRuntimeBaseDir(), IDE_DIR_NAME);
}
static getPlansDir(): string {
return path.join(Storage.getGlobalQwenDir(), PLANS_DIR_NAME);
}
static getPlanFilePath(sessionId: string): string {
return path.join(Storage.getPlansDir(), `${sessionId}.md`);
}
static getGlobalBinDir(): string {
return path.join(Storage.getGlobalQwenDir(), BIN_DIR_NAME);
}

View file

@ -423,7 +423,7 @@ describe('AnthropicContentGenerator', () => {
const [anthropicRequest] =
anthropicState.lastCreateArgs as AnthropicCreateArgs;
expect(anthropicRequest).toEqual(
expect.objectContaining({ max_tokens: 32000 }),
expect.objectContaining({ max_tokens: 8000 }),
);
});
@ -488,7 +488,7 @@ describe('AnthropicContentGenerator', () => {
const [anthropicRequest] =
anthropicState.lastCreateArgs as AnthropicCreateArgs;
expect(anthropicRequest).toEqual(
expect.objectContaining({ max_tokens: 32000 }),
expect.objectContaining({ max_tokens: 8000 }),
);
});
});

View file

@ -33,7 +33,7 @@ import { DEFAULT_TIMEOUT } from '../openaiContentGenerator/constants.js';
import { createDebugLogger } from '../../utils/debugLogger.js';
import {
tokenLimit,
DEFAULT_OUTPUT_TOKEN_LIMIT,
CAPPED_DEFAULT_MAX_TOKENS,
hasExplicitOutputLimit,
} from '../tokenLimits.js';
@ -234,12 +234,23 @@ export class AnthropicContentGenerator implements ContentGenerator {
const modelLimit = tokenLimit(modelId, 'output');
const isKnownModel = hasExplicitOutputLimit(modelId);
const maxTokens =
userMaxTokens !== undefined && userMaxTokens !== null
? isKnownModel
? Math.min(userMaxTokens, modelLimit)
: userMaxTokens
: Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
let maxTokens: number;
if (userMaxTokens !== undefined && userMaxTokens !== null) {
maxTokens = isKnownModel
? Math.min(userMaxTokens, modelLimit)
: userMaxTokens;
} else {
// No explicit user config — check env var, then use capped default.
const envVal = process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
const envMaxTokens = envVal ? parseInt(envVal, 10) : NaN;
if (!isNaN(envMaxTokens) && envMaxTokens > 0) {
maxTokens = isKnownModel
? Math.min(envMaxTokens, modelLimit)
: envMaxTokens;
} else {
maxTokens = Math.min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS);
}
}
return {
max_tokens: maxTokens,

View file

@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
getFileService: vi.fn().mockReturnValue(fileService),
getMaxSessionTurns: vi.fn().mockReturnValue(0),
getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
getSessionTokenLimit: vi.fn().mockReturnValue(32000),
getNoBrowser: vi.fn().mockReturnValue(false),
getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@ -427,6 +428,119 @@ describe('Gemini Client (client.ts)', () => {
});
});
describe('thinking block idle cleanup and latch', () => {
let mockChat: Partial<GeminiChat>;
beforeEach(() => {
const mockStream = (async function* () {
yield {
type: GeminiEventType.Content,
value: 'response',
};
})();
mockTurnRunFn.mockReturnValue(mockStream);
mockChat = {
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});
it('should not strip thoughts on active session (< 5min idle)', async () => {
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-1',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(
mockChat.stripThoughtsFromHistoryKeepRecent,
).not.toHaveBeenCalled();
});
it('should latch and strip thoughts after > 5min idle', async () => {
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
client['thinkingClearLatched'] = false;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-2',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should keep stripping once latched even if idle < 5min', async () => {
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
client['thinkingClearLatched'] = true;
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-3',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['thinkingClearLatched']).toBe(true);
expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
1,
);
});
it('should update lastApiCompletionTimestamp after API call', async () => {
client['lastApiCompletionTimestamp'] = null;
const before = Date.now();
const gen = client.sendMessageStream(
[{ text: 'Hello' }],
new AbortController().signal,
'prompt-4',
{ type: SendMessageType.UserQuery },
);
for await (const _ of gen) {
/* drain */
}
expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual(
before,
);
});
it('should reset latch and timestamp on resetChat', async () => {
client['lastApiCompletionTimestamp'] = Date.now();
client['thinkingClearLatched'] = true;
await client.resetChat();
expect(client['thinkingClearLatched']).toBe(false);
expect(client['lastApiCompletionTimestamp']).toBeNull();
});
});
describe('tryCompressChat', () => {
const mockGetHistory = vi.fn();
@ -436,6 +550,7 @@ describe('Gemini Client (client.ts)', () => {
addHistory: vi.fn(),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
} as unknown as GeminiChat;
});
@ -457,6 +572,7 @@ describe('Gemini Client (client.ts)', () => {
getHistory: vi.fn((_curated?: boolean) => chatHistory),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockOriginalChat as GeminiChat;
@ -1149,6 +1265,7 @@ describe('Gemini Client (client.ts)', () => {
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
} as unknown as GeminiChat;
client['chat'] = mockChat;
@ -1204,6 +1321,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1260,6 +1378,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1326,6 +1445,7 @@ hello
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1365,6 +1485,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1410,6 +1531,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1498,6 +1620,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1555,6 +1678,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -1636,6 +1760,7 @@ Other open files:
{ role: 'user', parts: [{ text: 'previous message' }] },
]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});
@ -1889,6 +2014,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]), // Default empty history
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2228,6 +2354,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2265,6 +2392,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2305,6 +2433,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2329,6 +2458,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
stripOrphanedUserEntriesFromHistory: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2361,6 +2491,7 @@ Other open files:
getHistory: vi.fn().mockReturnValue([]),
setHistory: vi.fn(),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
stripOrphanedUserEntriesFromHistory: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
@ -2405,6 +2536,7 @@ Other open files:
addHistory: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
stripThoughtsFromHistory: vi.fn(),
stripThoughtsFromHistoryKeepRecent: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
});

View file

@ -126,6 +126,25 @@ export class GeminiClient {
*/
private hasFailedCompressionAttempt = false;
/**
* Timestamp (epoch ms) of the last completed API call.
* Used to detect idle periods for thinking block cleanup.
* Starts as null on the first query there is no prior thinking to clean,
* so the idle check is skipped until the first API call completes.
*/
private lastApiCompletionTimestamp: number | null = null;
/**
* Sticky-on latch for clearing thinking blocks from prior turns.
* Triggered when idle exceeds the configured threshold (default 5 min,
* aligned with provider prompt-cache TTL). Once latched, stays true to
* prevent oscillation: without it, thinking would accumulate get
* stripped accumulate again, causing the message prefix to change
* repeatedly (bad for provider-side prompt caching and wastes context).
* Reset on /clear (resetChat).
*/
private thinkingClearLatched = false;
constructor(private readonly config: Config) {
this.loopDetector = new LoopDetectionService(config);
}
@ -199,6 +218,9 @@ export class GeminiClient {
}
async resetChat(): Promise<void> {
// Reset thinking clear latch — fresh chat, no prior thinking to clean up
this.thinkingClearLatched = false;
this.lastApiCompletionTimestamp = null;
await this.startChat();
}
@ -537,8 +559,27 @@ export class GeminiClient {
// record user message for session management
this.config.getChatRecordingService()?.recordUserMessage(request);
// strip thoughts from history before sending the message
this.stripThoughtsFromHistory();
// Thinking block cross-turn retention with idle cleanup:
// - Active session (< threshold idle): keep thinking blocks for reasoning coherence
// - Idle > threshold: clear old thinking, keep only last 1 turn to free context
// - Latch: once triggered, never revert — prevents oscillation
if (
!this.thinkingClearLatched &&
this.lastApiCompletionTimestamp !== null
) {
const thresholdMs = this.config.getThinkingIdleThresholdMs();
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
if (idleMs > thresholdMs) {
this.thinkingClearLatched = true;
debugLogger.debug(
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
);
}
}
if (this.thinkingClearLatched) {
this.getChat().stripThoughtsFromHistoryKeepRecent(1);
debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
}
}
if (messageType !== SendMessageType.Retry) {
this.sessionTurnCount++;
@ -680,6 +721,7 @@ export class GeminiClient {
if (arenaAgentClient) {
await arenaAgentClient.reportError('Loop detected');
}
this.lastApiCompletionTimestamp = Date.now();
return turn;
}
}
@ -698,9 +740,14 @@ export class GeminiClient {
: 'Unknown error';
await arenaAgentClient.reportError(errorMsg);
}
this.lastApiCompletionTimestamp = Date.now();
return turn;
}
}
// Track API completion time for thinking block idle cleanup
this.lastApiCompletionTimestamp = Date.now();
// Fire Stop hook through MessageBus (only if hooks are enabled and registered)
// This must be done before any early returns to ensure hooks are always triggered
if (

View file

@ -1923,6 +1923,150 @@ describe('GeminiChat', async () => {
});
});
describe('stripThoughtsFromHistoryKeepRecent', () => {
it('should keep the most recent N model turns with thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [
{ text: 'old thinking', thought: true },
{ text: 'response1' },
],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'mid thinking', thought: true },
{ text: 'response2' },
],
},
{ role: 'user', parts: [{ text: 'msg3' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response3' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// First two model turns should have thoughts stripped
expect(history[1]!.parts).toEqual([{ text: 'response1' }]);
expect(history[3]!.parts).toEqual([{ text: 'response2' }]);
// Last model turn should keep thoughts
expect(history[5]!.parts).toEqual([
{ text: 'recent thinking', thought: true },
{ text: 'response3' },
]);
});
it('should not strip anything when keepTurns >= model turns with thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
expect(history[1]!.parts).toEqual([
{ text: 'thinking', thought: true },
{ text: 'response' },
]);
});
it('should remove model content objects that become empty after stripping', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'only thinking', thought: true }],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// The first model turn (only thoughts) should be removed entirely
expect(history).toHaveLength(3);
expect(history[0]!.parts).toEqual([{ text: 'msg1' }]);
expect(history[1]!.parts).toEqual([{ text: 'msg2' }]);
expect(history[2]!.parts).toEqual([
{ text: 'recent thinking', thought: true },
{ text: 'response' },
]);
});
it('should also strip thoughtSignature from stripped turns', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [
{ text: 'old thinking', thought: true },
{
text: 'with sig',
thoughtSignature: 'sig1',
} as unknown as { text: string; thoughtSignature: string },
{ text: 'response1' },
],
},
{ role: 'user', parts: [{ text: 'msg2' }] },
{
role: 'model',
parts: [
{ text: 'recent thinking', thought: true },
{ text: 'response2' },
],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(1);
const history = chat.getHistory();
// First model turn: thought stripped, thoughtSignature stripped
expect(history[1]!.parts).toEqual([
{ text: 'with sig' },
{ text: 'response1' },
]);
expect(
(history[1]!.parts![0] as { thoughtSignature?: string })
.thoughtSignature,
).toBeUndefined();
});
it('should handle keepTurns=0 by stripping all thoughts', () => {
chat.setHistory([
{ role: 'user', parts: [{ text: 'msg1' }] },
{
role: 'model',
parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
},
]);
chat.stripThoughtsFromHistoryKeepRecent(0);
const history = chat.getHistory();
expect(history[1]!.parts).toEqual([{ text: 'response' }]);
});
});
describe('stripOrphanedUserEntriesFromHistory', () => {
it('should pop a single trailing user entry', () => {
chat.setHistory([

View file

@ -16,13 +16,14 @@ import type {
Tool,
GenerateContentResponseUsageMetadata,
} from '@google/genai';
import { createUserContent } from '@google/genai';
import { createUserContent, FinishReason } from '@google/genai';
import { retryWithBackoff } from '../utils/retry.js';
import { getErrorStatus } from '../utils/errors.js';
import { createDebugLogger } from '../utils/debugLogger.js';
import { parseAndFormatApiError } from '../utils/errorParsing.js';
import { isRateLimitError, type RetryInfo } from '../utils/rateLimit.js';
import type { Config } from '../config/config.js';
import { ESCALATED_MAX_TOKENS } from './tokenLimits.js';
import { hasCycleInSchema } from '../tools/tools.js';
import type { StructuredError } from './turn.js';
import {
@ -355,6 +356,17 @@ export class GeminiChat {
cgConfig?.maxRetries ?? RATE_LIMIT_RETRY_OPTIONS.maxRetries;
const extraRetryErrorCodes = cgConfig?.retryErrorCodes;
// Max output tokens escalation: when no user/env override is set,
// the capped default (8K) is used. If the model hits MAX_TOKENS,
// retry once with escalated limit (64K).
let maxTokensEscalated = false;
const hasUserMaxTokensOverride =
(cgConfig?.samplingParams?.max_tokens !== undefined &&
cgConfig?.samplingParams?.max_tokens !== null) ||
!!process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
let lastFinishReason: string | undefined;
for (
let attempt = 0;
attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
@ -376,7 +388,10 @@ export class GeminiChat {
prompt_id,
);
lastFinishReason = undefined;
for await (const chunk of stream) {
const fr = chunk.candidates?.[0]?.finishReason;
if (fr) lastFinishReason = fr;
yield { type: StreamEventType.CHUNK, value: chunk };
}
@ -481,6 +496,49 @@ export class GeminiChat {
}
}
// Max output tokens escalation: if the retry loop succeeded with
// the capped default (8K) but hit MAX_TOKENS, retry once at 64K.
// Placed outside the retry loop so that any errors from the
// escalated stream propagate directly (not caught by retry logic).
if (
lastError === null &&
lastFinishReason === FinishReason.MAX_TOKENS &&
!maxTokensEscalated &&
!hasUserMaxTokensOverride
) {
maxTokensEscalated = true;
debugLogger.info(
`Output truncated at capped default. Escalating to ${ESCALATED_MAX_TOKENS} tokens.`,
);
// Remove partial model response from history
// (processStreamResponse already pushed it)
if (
self.history.length > 0 &&
self.history[self.history.length - 1].role === 'model'
) {
self.history.pop();
}
// Signal UI to discard partial output
yield { type: StreamEventType.RETRY };
// Retry with escalated max_tokens
const escalatedParams: SendMessageParameters = {
...params,
config: {
...params.config,
maxOutputTokens: ESCALATED_MAX_TOKENS,
},
};
const escalatedStream = await self.makeApiCallAndProcessStream(
model,
requestContents,
escalatedParams,
prompt_id,
);
for await (const chunk of escalatedStream) {
yield { type: StreamEventType.CHUNK, value: chunk };
}
}
if (lastError) {
if (lastError instanceof InvalidStreamError) {
const totalAttempts = invalidStreamRetryCount + 1;
@ -625,6 +683,89 @@ export class GeminiChat {
.filter((content) => content.parts && content.parts.length > 0);
}
/**
* Strip thought parts from history, keeping the most recent `keepTurns`
* model turns that contain thinking blocks intact.
*
* Selection is based on thought-containing turns specifically (not all
* model turns) so the most recent reasoning chain is always preserved
* even if later model turns happen to have no thinking.
*
* Used for idle cleanup: after exceeding the configured idle threshold
* the old thinking blocks are no longer useful for reasoning coherence
* but still consume context tokens.
*/
stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
keepTurns = Number.isFinite(keepTurns)
? Math.max(0, Math.floor(keepTurns))
: 0;
// Find indices of model turns that contain thought parts
const modelTurnIndices: number[] = [];
for (let i = 0; i < this.history.length; i++) {
const content = this.history[i];
if (
content.role === 'model' &&
content.parts?.some(
(part) =>
part &&
typeof part === 'object' &&
'thought' in part &&
part.thought,
)
) {
modelTurnIndices.push(i);
}
}
// Determine which model turns to keep (the most recent `keepTurns`)
const turnsToStrip = new Set(
modelTurnIndices.slice(
0,
Math.max(0, modelTurnIndices.length - keepTurns),
),
);
if (turnsToStrip.size === 0) return;
this.history = this.history
.map((content, index) => {
if (!turnsToStrip.has(index) || !content.parts) return content;
// Strip thought parts from this turn
const filteredParts = content.parts
.filter(
(part) =>
!(
part &&
typeof part === 'object' &&
'thought' in part &&
part.thought
),
)
.map((part) => {
if (
part &&
typeof part === 'object' &&
'thoughtSignature' in part
) {
const newPart = { ...part };
delete (newPart as { thoughtSignature?: string })
.thoughtSignature;
return newPart;
}
return part;
});
return {
...content,
parts: filteredParts,
};
})
// Remove Content objects that have no parts left after filtering
.filter((content) => content.parts && content.parts.length > 0);
}
/**
* Pop all orphaned trailing user entries from chat history.
* In a valid conversation the last entry is always a model response;

View file

@ -40,8 +40,9 @@ const MODALITY_PATTERNS: Array<[RegExp, InputModalities]> = [
// -------------------
// Alibaba / Qwen
// -------------------
// Qwen3.5-Plus: image support
// Qwen3.5-Plus, Qwen3.6-Plus: image + video support
[/^qwen3\.5-plus/, { image: true, video: true }],
[/^qwen3\.6-plus/, { image: true, video: true }],
[/^coder-model$/, { image: true, video: true }],
// Qwen VL (vision-language) models: image + video

View file

@ -786,9 +786,9 @@ describe('DashScopeOpenAICompatibleProvider', () => {
const result = provider.buildRequest(request, 'test-prompt-id');
// Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
// qwen3-max has 32K output limit, so min(32K, 32K) = 32K
expect(result.max_tokens).toBe(32000);
// Should set capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS)
// qwen3-max has 32K output limit, so min(32K, 8K) = 8K
expect(result.max_tokens).toBe(8000);
});
it('should set conservative max_tokens when null is provided', () => {
@ -800,8 +800,8 @@ describe('DashScopeOpenAICompatibleProvider', () => {
const result = provider.buildRequest(request, 'test-prompt-id');
// null is treated as not configured, so set conservative default
expect(result.max_tokens).toBe(32000);
// null is treated as not configured, so set capped default: min(32K, 8K) = 8K
expect(result.max_tokens).toBe(8000);
});
it('should respect user max_tokens for unknown models', () => {

View file

@ -110,8 +110,8 @@ export class DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatiblePr
}
// Apply output token limits using parent class logic
// Uses conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
// to preserve input quota when user hasn't explicitly configured max_tokens
// Uses capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS=8K)
// Requests hitting the cap get one clean retry at 64K (geminiChat.ts)
const requestWithTokenLimits = this.applyOutputTokenLimit(request);
const extraBody = this.contentGeneratorConfig.extra_body;

View file

@ -204,9 +204,9 @@ describe('DefaultOpenAICompatibleProvider', () => {
'prompt-id',
);
// Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
// GPT-4 has 16K output limit, so min(16K, 32K) = 16K
expect(result.max_tokens).toBe(16384);
// Should set capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS)
// GPT-4 has 16K output limit, so min(16K, 8K) = 8K
expect(result.max_tokens).toBe(8000);
});
it('should respect user max_tokens for unknown models (deployment aliases, self-hosted)', () => {
@ -223,8 +223,8 @@ describe('DefaultOpenAICompatibleProvider', () => {
expect(result.max_tokens).toBe(100000);
});
it('should use conservative default for unknown models when max_tokens not configured', () => {
// Unknown models without user config: use DEFAULT_OUTPUT_TOKEN_LIMIT
it('should use capped default for unknown models when max_tokens not configured', () => {
// Unknown models without user config: use CAPPED_DEFAULT_MAX_TOKENS
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'custom-deployment-alias',
messages: [{ role: 'user', content: 'Hello' }],
@ -232,8 +232,8 @@ describe('DefaultOpenAICompatibleProvider', () => {
const result = provider.buildRequest(request, 'prompt-id');
// Uses conservative default (32K)
expect(result.max_tokens).toBe(32000);
// Uses capped default (8K)
expect(result.max_tokens).toBe(8000);
});
it('should cap max_tokens for known models to avoid API errors', () => {
@ -259,8 +259,8 @@ describe('DefaultOpenAICompatibleProvider', () => {
const result = provider.buildRequest(request, 'prompt-id');
// GPT-4 has 16K output limit, so conservative default is still 16K
expect(result.max_tokens).toBe(16384);
// GPT-4 has 16K output limit, capped default is 8K: min(16K, 8K) = 8K
expect(result.max_tokens).toBe(8000);
});
it('should preserve all sampling parameters', () => {
@ -303,7 +303,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
// Should set conservative max_tokens default
expect(result.model).toBe('gpt-4');
expect(result.messages).toEqual(minimalRequest.messages);
expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
});
it('should handle streaming requests', () => {
@ -319,7 +319,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
expect(result.model).toBe('gpt-4');
expect(result.messages).toEqual(streamingRequest.messages);
expect(result.stream).toBe(true);
expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
});
it('should not modify the original request object', () => {
@ -363,7 +363,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
expect(result).toEqual({
...originalRequest,
max_tokens: 16384, // GPT-4 has 16K limit, min(16K, 32K) = 16K
max_tokens: 8000, // GPT-4 has 16K limit, min(16K, 8K) = 8K
custom_param: 'custom_value',
nested: { key: 'value' },
});
@ -382,7 +382,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
expect(result.model).toBe('gpt-4');
expect(result.messages).toEqual(originalRequest.messages);
expect(result.temperature).toBe(0.7);
expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
expect(result).not.toHaveProperty('custom_param');
});
});

View file

@ -7,7 +7,7 @@ import type { OpenAICompatibleProvider } from './types.js';
import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
import {
tokenLimit,
DEFAULT_OUTPUT_TOKEN_LIMIT,
CAPPED_DEFAULT_MAX_TOKENS,
hasExplicitOutputLimit,
} from '../../tokenLimits.js';
@ -101,18 +101,19 @@ export class DefaultOpenAICompatibleProvider
* - For unknown models (deployment aliases, self-hosted): respect user's
* configured value entirely (backend may support larger limits)
* 2. If user didn't configure max_tokens:
* - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT)
* - This provides a conservative default (32K) that avoids truncating output
* while preserving input quota (not occupying too much context window)
* - Check QWEN_CODE_MAX_OUTPUT_TOKENS env var first
* - Otherwise use min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS=8K)
* - Requests hitting the 8K cap get one clean retry at 64K (geminiChat.ts)
* 3. If model has no specific limit (tokenLimit returns default):
* - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard
* - Still apply CAPPED_DEFAULT_MAX_TOKENS as safeguard
*
* Examples:
* - User sets 4K, known model limit 64K uses 4K (respects user preference)
* - User sets 100K, known model limit 64K uses 64K (capped to avoid API error)
* - User sets 100K, unknown model uses 100K (respects user, backend may support it)
* - User not set, model limit 64K uses 32K (conservative default)
* - User not set, model limit 8K uses 8K (model limit is lower)
* - User not set, model limit 64K uses 8K (capped default for slot optimization)
* - User not set, model limit 4K uses 4K (model limit is lower)
* - User not set, env QWEN_CODE_MAX_OUTPUT_TOKENS=16000 -> uses 16K
*
* @param request - The chat completion request parameters
* @returns The request with max_tokens adjusted according to the logic
@ -140,9 +141,18 @@ export class DefaultOpenAICompatibleProvider
effectiveMaxTokens = userMaxTokens;
}
} else {
// User didn't configure, use conservative default:
// min(model-specific limit, DEFAULT_OUTPUT_TOKEN_LIMIT)
effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
// No explicit user config — check env var, then use capped default.
// Capped default (8K) reduces GPU slot over-reservation by ~4×.
// Requests hitting the cap get one clean retry at 64K (geminiChat.ts).
const envVal = process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
const envMaxTokens = envVal ? parseInt(envVal, 10) : NaN;
if (!isNaN(envMaxTokens) && envMaxTokens > 0) {
effectiveMaxTokens = isKnownModel
? Math.min(envMaxTokens, modelLimit)
: envMaxTokens;
} else {
effectiveMaxTokens = Math.min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS);
}
}
return {

View file

@ -11,6 +11,13 @@ export type TokenLimitType = 'input' | 'output';
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000; // 32K tokens
// Capped default for slot-reservation optimization. 99% of outputs are under 5K
// tokens, so 32K defaults over-reserve 4-6× slot capacity. With the cap
// enabled, <1% of requests hit the limit; those get one clean retry at 64K
// (see geminiChat.ts max_output_tokens escalation).
export const CAPPED_DEFAULT_MAX_TOKENS: TokenCount = 8_000;
export const ESCALATED_MAX_TOKENS: TokenCount = 64_000;
/**
* Accurate numeric limits:
* - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.)

View file

@ -280,8 +280,13 @@ export class Turn {
return;
}
// Handle the new RETRY event
// Handle the new RETRY event: clear accumulated state from the
// previous attempt to avoid duplicate tool calls and stale metadata.
if (streamEvent.type === 'retry') {
this.pendingToolCalls.length = 0;
this.pendingCitations.clear();
this.debugResponses = [];
this.finishReason = undefined;
yield {
type: GeminiEventType.Retry,
retryInfo: streamEvent.retryInfo,

View file

@ -420,6 +420,7 @@ export class HookRunner {
}
}
const killedBySignal = exitCode === null;
resolve({
hookConfig,
eventName,
@ -427,8 +428,11 @@ export class HookRunner {
output,
stdout,
stderr,
exitCode: exitCode || EXIT_CODE_SUCCESS,
exitCode: exitCode ?? -1,
duration,
...(killedBySignal && {
error: new Error('Hook killed by signal'),
}),
});
});

View file

@ -18,9 +18,11 @@ describe('ExitPlanModeTool', () => {
approvalMode = ApprovalMode.PLAN;
mockConfig = {
getApprovalMode: vi.fn(() => approvalMode),
getPrePlanMode: vi.fn(() => ApprovalMode.DEFAULT),
setApprovalMode: vi.fn((mode: ApprovalMode) => {
approvalMode = mode;
}),
savePlan: vi.fn(),
} as unknown as Config;
tool = new ExitPlanModeTool(mockConfig);
@ -147,6 +149,9 @@ describe('ExitPlanModeTool', () => {
ApprovalMode.DEFAULT,
);
expect(approvalMode).toBe(ApprovalMode.DEFAULT);
// Plan should be saved to disk
expect(mockConfig.savePlan).toHaveBeenCalledWith(params.plan);
});
it('should request confirmation with plan details', async () => {
@ -173,6 +178,29 @@ describe('ExitPlanModeTool', () => {
expect(approvalMode).toBe(ApprovalMode.AUTO_EDIT);
});
it('should set DEFAULT mode on ProceedOnce regardless of pre-plan mode', async () => {
// Even if pre-plan mode was AUTO_EDIT, ProceedOnce ("manually approve
// edits") should always set DEFAULT to match the option label semantics.
(mockConfig.getPrePlanMode as ReturnType<typeof vi.fn>).mockReturnValue(
ApprovalMode.AUTO_EDIT,
);
const params: ExitPlanModeParams = { plan: 'Restore test' };
const signal = new AbortController().signal;
const invocation = tool.build(params);
const confirmation = await invocation.getConfirmationDetails(signal);
if (confirmation) {
await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce);
}
expect(mockConfig.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.DEFAULT,
);
expect(approvalMode).toBe(ApprovalMode.DEFAULT);
});
it('should remain in plan mode when confirmation is rejected', async () => {
const params: ExitPlanModeParams = {
plan: 'Remain in planning',
@ -199,6 +227,9 @@ describe('ExitPlanModeTool', () => {
ApprovalMode.PLAN,
);
expect(approvalMode).toBe(ApprovalMode.PLAN);
// Plan should NOT be saved when rejected
expect(mockConfig.savePlan).not.toHaveBeenCalled();
});
it('should have correct description', () => {

View file

@ -147,6 +147,15 @@ class ExitPlanModeToolInvocation extends BaseToolInvocation<
};
}
// Persist the approved plan to disk
try {
this.config.savePlan(plan);
} catch (error) {
debugLogger.warn(
`[ExitPlanModeTool] Failed to save plan to disk: ${error instanceof Error ? error.message : String(error)}`,
);
}
const llmMessage = `User has approved your plan. You can now start coding. Start with updating your todo list if applicable.`;
const displayMessage = 'User approved the plan.';

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-test-utils",
"version": "0.14.1",
"version": "0.14.2",
"private": true,
"main": "src/index.ts",
"license": "Apache-2.0",

View file

@ -179,7 +179,12 @@ async function main() {
// Since @qwen-code/webui marks it as external in its own Vite build, the
// browser bundle must also mark it external to avoid bundling Node.js-only
// modules (undici, @grpc/grpc-js, fs, stream, etc.) into the webview.
external: ['@qwen-code/qwen-code-core'],
// The wildcard ensures deep sub-path imports (e.g.
// '@qwen-code/qwen-code-core/src/core/tokenLimits.js') are also excluded;
// without it esbuild only matches the bare package name and attempts to
// bundle the sub-path, which triggers "Dynamic require is not supported"
// at runtime in the browser.
external: ['@qwen-code/qwen-code-core', '@qwen-code/qwen-code-core/*'],
logLevel: 'silent',
plugins: [reactDedupPlugin, cssInjectPlugin, esbuildProblemMatcherPlugin],
jsx: 'automatic', // Use new JSX transform (React 17+)

View file

@ -2,7 +2,7 @@
"name": "qwen-code-vscode-ide-companion",
"displayName": "Qwen Code Companion",
"description": "Enable Qwen Code with direct access to your VS Code workspace.",
"version": "0.14.1",
"version": "0.14.2",
"publisher": "qwenlm",
"icon": "assets/icon.png",
"repository": {

View file

@ -183,7 +183,7 @@
"enableFollowupSuggestions": {
"description": "Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.",
"type": "boolean",
"default": true
"default": false
},
"enableCacheSharing": {
"description": "Use cache-aware forked queries for suggestion generation. Reduces cost on providers that support prefix caching (experimental).",
@ -388,6 +388,11 @@
"default": true
}
}
},
"gapThresholdMinutes": {
"description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
"type": "number",
"default": 5
}
}
},

View file

@ -4,8 +4,6 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { isSupportedImageMimeType } from '@qwen-code/qwen-code-core/src/utils/request-tokenizer/supportedImageFormats.js';
// ---------- Types ----------
export interface ImageAttachment {
@ -61,6 +59,31 @@ export function unescapePath(filePath: string): string {
);
}
// ---------- Supported image MIME types ----------
// Inlined from @qwen-code/qwen-code-core to avoid pulling Node.js-only modules
// into the browser webview bundle (esbuild marks core as external, but deep
// sub-path imports like core/src/utils/... bypass the external filter and cause
// "Dynamic require is not supported" at runtime).
const SUPPORTED_IMAGE_MIME_TYPES: readonly string[] = [
'image/bmp',
'image/jpeg',
'image/jpg',
'image/png',
'image/tiff',
'image/webp',
'image/heic',
];
/**
* Check whether a MIME type is supported for pasted-image processing.
* @param mimeType - The MIME type string to validate
* @returns `true` when the type is in the supported list
*/
function isSupportedImageMimeType(mimeType: string): boolean {
return SUPPORTED_IMAGE_MIME_TYPES.includes(mimeType);
}
// ---------- Image format detection ----------
const PASTED_IMAGE_MIME_TO_EXTENSION: Record<string, string> = {

View file

@ -0,0 +1,196 @@
/**
* @license
* Copyright 2025 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Browser-safe subset of @qwen-code/qwen-code-core tokenLimits.
*
* The webview bundle (IIFE, platform: browser) cannot `require` Node.js
* packages. This module replicates the constants and logic the webview
* actually uses so that the core package never needs to be pulled into the
* browser bundle.
*
* Keep this file in sync with:
* packages/core/src/core/tokenLimits.ts
*/
type TokenCount = number;
// ---------------------------------------------------------------------------
// Public constants
// ---------------------------------------------------------------------------
/** Default input context window size: 128 K tokens (power-of-two). */
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072;
// ---------------------------------------------------------------------------
// Token limit types
// ---------------------------------------------------------------------------
export type TokenLimitType = 'input' | 'output';
// ---------------------------------------------------------------------------
// Internal constants
// ---------------------------------------------------------------------------
const LIMITS = {
'32k': 32_768,
'64k': 65_536,
'128k': 131_072,
'192k': 196_608,
'200k': 200_000,
'256k': 262_144,
'272k': 272_000,
'400k': 400_000,
'512k': 524_288,
'1m': 1_000_000,
'4k': 4_096,
'8k': 8_192,
'16k': 16_384,
} as const;
const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000;
// ---------------------------------------------------------------------------
// Model name normaliser
// ---------------------------------------------------------------------------
/**
* Robust normaliser: strips provider prefixes, pipes/colons, date/version
* suffixes, quantisation markers, etc.
* @param model - Raw model identifier string
* @returns Normalised lowercase model name
*/
function normalize(model: string): string {
let s = (model ?? '').toLowerCase().trim();
s = s.replace(/^.*\//, '');
s = s.split('|').pop() ?? s;
s = s.split(':').pop() ?? s;
s = s.replace(/\s+/g, '-');
s = s.replace(/-preview/g, '');
if (
!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/) &&
!s.match(/^kimi-k2-\d{4}$/)
) {
s = s.replace(
/-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
'',
);
}
s = s.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, '');
return s;
}
// ---------------------------------------------------------------------------
// Input context-window patterns (most specific → most general)
// ---------------------------------------------------------------------------
const INPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
// Google Gemini
[/^gemini-3/, LIMITS['1m']],
[/^gemini-/, LIMITS['1m']],
// OpenAI
[/^gpt-5/, LIMITS['272k']],
[/^gpt-/, LIMITS['128k']],
[/^o\d/, LIMITS['200k']],
// Anthropic Claude
[/^claude-/, LIMITS['200k']],
// Alibaba / Qwen
[/^qwen3-coder-plus/, LIMITS['1m']],
[/^qwen3-coder-flash/, LIMITS['1m']],
[/^qwen3\.\d/, LIMITS['1m']],
[/^qwen-plus-latest$/, LIMITS['1m']],
[/^qwen-flash-latest$/, LIMITS['1m']],
[/^coder-model$/, LIMITS['1m']],
[/^qwen3-max/, LIMITS['256k']],
[/^qwen3-coder-/, LIMITS['256k']],
[/^qwen/, LIMITS['256k']],
// DeepSeek
[/^deepseek/, LIMITS['128k']],
// Zhipu GLM
[/^glm-5/, 202_752 as TokenCount],
[/^glm-/, 202_752 as TokenCount],
// MiniMax
[/^minimax-m2\.5/i, LIMITS['192k']],
[/^minimax-/i, LIMITS['200k']],
// Moonshot / Kimi
[/^kimi-/, LIMITS['256k']],
// ByteDance Seed-OSS
[/^seed-oss/, LIMITS['512k']],
];
// ---------------------------------------------------------------------------
// Output token-limit patterns
// ---------------------------------------------------------------------------
const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
[/^gemini-3/, LIMITS['64k']],
[/^gemini-/, LIMITS['8k']],
[/^gpt-5/, LIMITS['128k']],
[/^gpt-/, LIMITS['16k']],
[/^o\d/, LIMITS['128k']],
[/^claude-opus-4-6/, LIMITS['128k']],
[/^claude-sonnet-4-6/, LIMITS['64k']],
[/^claude-/, LIMITS['64k']],
[/^qwen3\.\d/, LIMITS['64k']],
[/^coder-model$/, LIMITS['64k']],
[/^qwen/, LIMITS['32k']],
[/^deepseek-reasoner/, LIMITS['64k']],
[/^deepseek-r1/, LIMITS['64k']],
[/^deepseek-chat/, LIMITS['8k']],
[/^glm-5/, LIMITS['16k']],
[/^glm-4\.7/, LIMITS['16k']],
[/^minimax-m2\.5/i, LIMITS['64k']],
[/^kimi-k2\.5/, LIMITS['32k']],
];
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Return the token limit for a given model name.
*
* This is a browser-safe mirror of `tokenLimit()` in
* `@qwen-code/qwen-code-core`. The webview only calls this as a fallback
* when `modelInfo._meta.contextLimit` is unavailable.
*
* @param model - The model identifier string
* @param type - 'input' for context window, 'output' for generation limit
* @returns Maximum token count for the model and type
*/
export function tokenLimit(
model: string,
type: TokenLimitType = 'input',
): TokenCount {
const norm = normalize(model);
const patterns = type === 'output' ? OUTPUT_PATTERNS : INPUT_PATTERNS;
for (const [regex, limit] of patterns) {
if (regex.test(norm)) {
return limit;
}
}
return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
}

View file

@ -52,10 +52,7 @@ import type { ApprovalModeValue } from '../types/approvalModeValueTypes.js';
import type { PlanEntry, UsageStatsPayload } from '../types/chatTypes.js';
import type { ModelInfo, AvailableCommand } from '@agentclientprotocol/sdk';
import type { Question } from '../types/acpTypes.js';
import {
DEFAULT_TOKEN_LIMIT,
tokenLimit,
} from '@qwen-code/qwen-code-core/src/core/tokenLimits.js';
import { DEFAULT_TOKEN_LIMIT, tokenLimit } from '../utils/tokenLimits.js';
import { useImagePaste, type WebViewImageMessage } from './hooks/useImage.js';
export const App: React.FC = () => {

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/web-templates",
"version": "0.14.1",
"version": "0.14.2",
"description": "Web templates bundled as embeddable JS/CSS strings",
"repository": {
"type": "git",

View file

@ -1,6 +1,6 @@
{
"name": "@qwen-code/webui",
"version": "0.14.1",
"version": "0.14.2",
"description": "Shared UI components for Qwen Code packages",
"type": "module",
"main": "./dist/index.cjs",

View file

@ -112,7 +112,6 @@ function createFollowupController(
suggestion_length: text.length,
});
} catch (e: unknown) {
console.error('[followup] onOutcome callback threw:', e);
}
@ -122,7 +121,6 @@ function createFollowupController(
try {
getOnAccept?.()?.(text);
} catch (error: unknown) {
console.error('[followup] onAccept callback threw:', error);
} finally {
if (acceptTimeoutId) {
@ -154,7 +152,6 @@ function createFollowupController(
suggestion_length: currentState.suggestion.length,
});
} catch (e: unknown) {
console.error('[followup] onOutcome callback threw:', e);
}
}