Merge remote-tracking branch 'origin/main' into feat/review-skill-improvements

2026-04-28 11:41:04 +00:00 · 2026-04-08 23:09:01 +08:00 · 2026-04-08 23:09:01 +08:00 · 3364cf880f
commit 3364cf880f
parent a83877e3de 5a5a175f00
72 changed files with 2442 additions and 432 deletions
--- a/packages/channels/base/package.json
+++ b/packages/channels/base/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/channel-base",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Base channel infrastructure for Qwen Code",
  "type": "module",
  "main": "dist/index.js",
--- a/packages/channels/dingtalk/package.json
+++ b/packages/channels/dingtalk/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/channel-dingtalk",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "DingTalk channel adapter for Qwen Code",
  "type": "module",
  "main": "dist/index.js",
--- a/packages/channels/plugin-example/package.json
+++ b/packages/channels/plugin-example/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/channel-plugin-example",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "private": true,
  "type": "module",
  "main": "dist/index.js",
--- a/packages/channels/telegram/package.json
+++ b/packages/channels/telegram/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/channel-telegram",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Telegram channel adapter for Qwen Code",
  "type": "module",
  "main": "dist/index.js",
--- a/packages/channels/weixin/package.json
+++ b/packages/channels/weixin/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/channel-weixin",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "WeChat (Weixin) channel adapter for Qwen Code",
  "type": "module",
  "main": "dist/index.js",
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Qwen Code",
  "repository": {
    "type": "git",
@ -33,7 +33,7 @@
    "dist"
  ],
  "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.14.1"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.14.2"
  },
  "dependencies": {
    "@agentclientprotocol/sdk": "^0.14.1",
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@ -1069,6 +1069,7 @@ export async function loadCliConfig(
    telemetry: telemetrySettings,
    usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
    fileFiltering: settings.context?.fileFiltering,
+    thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
    checkpointing:
      argv.checkpointing || settings.general?.checkpointing?.enabled,
    proxy:
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@ -518,7 +518,7 @@ const SETTINGS_SCHEMA = {
        label: 'Enable Follow-up Suggestions',
        category: 'UI',
        requiresRestart: false,
-        default: true,
+        default: false,
        description:
          'Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.',
        showInDialog: true,
@ -924,6 +924,16 @@ const SETTINGS_SCHEMA = {
          },
        },
      },
+      gapThresholdMinutes: {
+        type: 'number',
+        label: 'Thinking Block Idle Threshold (minutes)',
+        category: 'Context',
+        requiresRestart: false,
+        default: 5,
+        description:
+          'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
+        showInDialog: false,
+      },
    },
  },

--- a/packages/cli/src/constants/codingPlan.ts
+++ b/packages/cli/src/constants/codingPlan.ts
@ -52,6 +52,18 @@ export function generateCodingPlanTemplate(
    // China region uses legacy fields to maintain backward compatibility
    // This ensures existing users don't get prompted for unnecessary updates
    return [
+      {
+        id: 'qwen3.6-plus',
+        name: '[ModelStudio Coding Plan] qwen3.6-plus',
+        baseUrl: 'https://coding.dashscope.aliyuncs.com/v1',
+        envKey: CODING_PLAN_ENV_KEY,
+        generationConfig: {
+          extra_body: {
+            enable_thinking: true,
+          },
+          contextWindowSize: 1000000,
+        },
+      },
      {
        id: 'qwen3.5-plus',
        name: '[ModelStudio Coding Plan] qwen3.5-plus',
@ -147,6 +159,18 @@ export function generateCodingPlanTemplate(

  // Global region uses ModelStudio Coding Plan branding for Global/Intl
  return [
+    {
+      id: 'qwen3.6-plus',
+      name: '[ModelStudio Coding Plan for Global/Intl] qwen3.6-plus',
+      baseUrl: 'https://coding-intl.dashscope.aliyuncs.com/v1',
+      envKey: CODING_PLAN_ENV_KEY,
+      generationConfig: {
+        extra_body: {
+          enable_thinking: true,
+        },
+        contextWindowSize: 1000000,
+      },
+    },
    {
      id: 'qwen3.5-plus',
      name: '[ModelStudio Coding Plan for Global/Intl] qwen3.5-plus',
--- a/packages/cli/src/i18n/locales/de.js
+++ b/packages/cli/src/i18n/locales/de.js
@ -1973,4 +1973,15 @@ export default {
    'Vollständige Tool-Ausgabe und Denkprozess im ausführlichen Modus anzeigen (mit Strg+O umschalten).',
  'Press Ctrl+O to show full tool output':
    'Strg+O für vollständige Tool-Ausgabe drücken',
+
+  'Switch to plan mode or exit plan mode':
+    'Switch to plan mode or exit plan mode',
+  'Exited plan mode. Previous approval mode restored.':
+    'Exited plan mode. Previous approval mode restored.',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    'Enabled plan mode. The agent will analyze and plan without executing tools.',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    'Already in plan mode. Use "/plan exit" to exit plan mode.',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    'Not in plan mode. Use "/plan" to enter plan mode first.',
 };
--- a/packages/cli/src/i18n/locales/en.js
+++ b/packages/cli/src/i18n/locales/en.js
@ -2013,4 +2013,15 @@ export default {
    'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).',
  'Press Ctrl+O to show full tool output':
    'Press Ctrl+O to show full tool output',
+
+  'Switch to plan mode or exit plan mode':
+    'Switch to plan mode or exit plan mode',
+  'Exited plan mode. Previous approval mode restored.':
+    'Exited plan mode. Previous approval mode restored.',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    'Enabled plan mode. The agent will analyze and plan without executing tools.',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    'Already in plan mode. Use "/plan exit" to exit plan mode.',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    'Not in plan mode. Use "/plan" to enter plan mode first.',
 };
--- a/packages/cli/src/i18n/locales/ja.js
+++ b/packages/cli/src/i18n/locales/ja.js
@ -1464,4 +1464,15 @@ export default {
  'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).':
    '詳細モードで完全なツール出力と思考を表示します（Ctrl+O で切り替え）。',
  'Press Ctrl+O to show full tool output': 'Ctrl+O で完全なツール出力を表示',
+
+  'Switch to plan mode or exit plan mode':
+    'Switch to plan mode or exit plan mode',
+  'Exited plan mode. Previous approval mode restored.':
+    'Exited plan mode. Previous approval mode restored.',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    'Enabled plan mode. The agent will analyze and plan without executing tools.',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    'Already in plan mode. Use "/plan exit" to exit plan mode.',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    'Not in plan mode. Use "/plan" to enter plan mode first.',
 };
--- a/packages/cli/src/i18n/locales/pt.js
+++ b/packages/cli/src/i18n/locales/pt.js
@ -1963,4 +1963,15 @@ export default {
    'Mostrar saída completa da ferramenta e raciocínio no modo detalhado (alternar com Ctrl+O).',
  'Press Ctrl+O to show full tool output':
    'Pressione Ctrl+O para exibir a saída completa da ferramenta',
+
+  'Switch to plan mode or exit plan mode':
+    'Switch to plan mode or exit plan mode',
+  'Exited plan mode. Previous approval mode restored.':
+    'Exited plan mode. Previous approval mode restored.',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    'Enabled plan mode. The agent will analyze and plan without executing tools.',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    'Already in plan mode. Use "/plan exit" to exit plan mode.',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    'Not in plan mode. Use "/plan" to enter plan mode first.',
 };
--- a/packages/cli/src/i18n/locales/ru.js
+++ b/packages/cli/src/i18n/locales/ru.js
@ -1970,4 +1970,15 @@ export default {
    'Показывать полный вывод инструментов и процесс рассуждений в подробном режиме (переключить с помощью Ctrl+O).',
  'Press Ctrl+O to show full tool output':
    'Нажмите Ctrl+O для показа полного вывода инструментов',
+
+  'Switch to plan mode or exit plan mode':
+    'Switch to plan mode or exit plan mode',
+  'Exited plan mode. Previous approval mode restored.':
+    'Exited plan mode. Previous approval mode restored.',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    'Enabled plan mode. The agent will analyze and plan without executing tools.',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    'Already in plan mode. Use "/plan exit" to exit plan mode.',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    'Not in plan mode. Use "/plan" to enter plan mode first.',
 };
--- a/packages/cli/src/i18n/locales/zh.js
+++ b/packages/cli/src/i18n/locales/zh.js
@ -1817,4 +1817,14 @@ export default {
  'Show full tool output and thinking in verbose mode (toggle with Ctrl+O).':
    '详细模式下显示完整工具输出和思考过程（Ctrl+O 切换）。',
  'Press Ctrl+O to show full tool output': '按 Ctrl+O 查看详细工具调用结果',
+
+  'Switch to plan mode or exit plan mode': '切换到计划模式或退出计划模式',
+  'Exited plan mode. Previous approval mode restored.':
+    '已退出计划模式，已恢复之前的审批模式。',
+  'Enabled plan mode. The agent will analyze and plan without executing tools.':
+    '启用计划模式。智能体将只分析和规划，而不执行工具。',
+  'Already in plan mode. Use "/plan exit" to exit plan mode.':
+    '已处于计划模式。使用 "/plan exit" 退出计划模式。',
+  'Not in plan mode. Use "/plan" to enter plan mode first.':
+    '未处于计划模式。请先使用 "/plan" 进入计划模式。',
 };
--- a/packages/cli/src/services/BuiltinCommandLoader.ts
+++ b/packages/cli/src/services/BuiltinCommandLoader.ts
@ -32,6 +32,7 @@ import { languageCommand } from '../ui/commands/languageCommand.js';
 import { mcpCommand } from '../ui/commands/mcpCommand.js';
 import { memoryCommand } from '../ui/commands/memoryCommand.js';
 import { modelCommand } from '../ui/commands/modelCommand.js';
+import { planCommand } from '../ui/commands/planCommand.js';
 import { permissionsCommand } from '../ui/commands/permissionsCommand.js';
 import { trustCommand } from '../ui/commands/trustCommand.js';
 import { quitCommand } from '../ui/commands/quitCommand.js';
@ -103,6 +104,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
      mcpCommand,
      memoryCommand,
      modelCommand,
+      planCommand,
      permissionsCommand,
      ...(this.config?.getFolderTrust() ? [trustCommand] : []),
      quitCommand,
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@ -1113,7 +1113,7 @@ export const AppContainer = (props: AppContainerProps) => {

  // Generate prompt suggestions when streaming completes
  const followupSuggestionsEnabled =
-    settings.merged.ui?.enableFollowupSuggestions !== false;
+    settings.merged.ui?.enableFollowupSuggestions === true;

  useEffect(() => {
    // Clear suggestion when feature is disabled at runtime
--- a/packages/cli/src/ui/commands/planCommand.test.ts
+++ b/packages/cli/src/ui/commands/planCommand.test.ts
@ -0,0 +1,159 @@
+/**
+ * @license
+ * Copyright 2026 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, vi, type Mock } from 'vitest';
+import { planCommand } from './planCommand.js';
+import { type CommandContext } from './types.js';
+import { createMockCommandContext } from '../../test-utils/mockCommandContext.js';
+import { ApprovalMode } from '@qwen-code/qwen-code-core';
+
+describe('planCommand', () => {
+  let mockContext: CommandContext;
+
+  beforeEach(() => {
+    mockContext = createMockCommandContext({
+      services: {
+        config: {
+          getApprovalMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT),
+          getPrePlanMode: vi.fn().mockReturnValue(ApprovalMode.DEFAULT),
+          setApprovalMode: vi.fn(),
+        } as unknown as import('@qwen-code/qwen-code-core').Config,
+      },
+    });
+  });
+
+  it('should switch to plan mode if not in plan mode', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    const result = await planCommand.action(mockContext, '');
+
+    expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
+      ApprovalMode.PLAN,
+    );
+    expect(result).toEqual({
+      type: 'message',
+      messageType: 'info',
+      content:
+        'Enabled plan mode. The agent will analyze and plan without executing tools.',
+    });
+  });
+
+  it('should return submit prompt if arguments are provided when switching to plan mode', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    const result = await planCommand.action(mockContext, 'refactor the code');
+
+    expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
+      ApprovalMode.PLAN,
+    );
+    expect(result).toEqual({
+      type: 'submit_prompt',
+      content: [{ text: 'refactor the code' }],
+    });
+  });
+
+  it('should return already in plan mode if mode is already plan', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    (mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
+      ApprovalMode.PLAN,
+    );
+
+    const result = await planCommand.action(mockContext, '');
+
+    expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
+    expect(result).toEqual({
+      type: 'message',
+      messageType: 'info',
+      content: 'Already in plan mode. Use "/plan exit" to exit plan mode.',
+    });
+  });
+
+  it('should return submit prompt if arguments are provided and already in plan mode', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    (mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
+      ApprovalMode.PLAN,
+    );
+
+    const result = await planCommand.action(mockContext, 'keep planning');
+
+    expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
+    expect(result).toEqual({
+      type: 'submit_prompt',
+      content: [{ text: 'keep planning' }],
+    });
+  });
+
+  it('should exit plan mode when exit argument is passed', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    (mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
+      ApprovalMode.PLAN,
+    );
+
+    const result = await planCommand.action(mockContext, 'exit');
+
+    expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
+      ApprovalMode.DEFAULT,
+    );
+    expect(result).toEqual({
+      type: 'message',
+      messageType: 'info',
+      content: 'Exited plan mode. Previous approval mode restored.',
+    });
+  });
+
+  it('should restore pre-plan mode when executing from plan mode', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    (mockContext.services.config?.getApprovalMode as Mock).mockReturnValue(
+      ApprovalMode.PLAN,
+    );
+    (mockContext.services.config?.getPrePlanMode as Mock).mockReturnValue(
+      ApprovalMode.AUTO_EDIT,
+    );
+
+    const result = await planCommand.action(mockContext, 'exit');
+
+    expect(mockContext.services.config?.setApprovalMode).toHaveBeenCalledWith(
+      ApprovalMode.AUTO_EDIT,
+    );
+    expect(result).toEqual({
+      type: 'message',
+      messageType: 'info',
+      content: 'Exited plan mode. Previous approval mode restored.',
+    });
+  });
+
+  it('should return error when execute is used but not in plan mode', async () => {
+    if (!planCommand.action) {
+      throw new Error('The plan command must have an action.');
+    }
+
+    // Default mock returns ApprovalMode.DEFAULT (not PLAN)
+    const result = await planCommand.action(mockContext, 'exit');
+
+    expect(mockContext.services.config?.setApprovalMode).not.toHaveBeenCalled();
+    expect(result).toEqual({
+      type: 'message',
+      messageType: 'error',
+      content: 'Not in plan mode. Use "/plan" to enter plan mode first.',
+    });
+  });
+});
--- a/packages/cli/src/ui/commands/planCommand.ts
+++ b/packages/cli/src/ui/commands/planCommand.ts
@ -0,0 +1,104 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  type CommandContext,
+  CommandKind,
+  type SlashCommand,
+  type MessageActionReturn,
+  type SubmitPromptActionReturn,
+} from './types.js';
+import { t } from '../../i18n/index.js';
+import { ApprovalMode } from '@qwen-code/qwen-code-core';
+
+export const planCommand: SlashCommand = {
+  name: 'plan',
+  get description() {
+    return t('Switch to plan mode or exit plan mode');
+  },
+  kind: CommandKind.BUILT_IN,
+  action: async (
+    context: CommandContext,
+    args: string,
+  ): Promise<MessageActionReturn | SubmitPromptActionReturn> => {
+    const { config } = context.services;
+    if (!config) {
+      return {
+        type: 'message',
+        messageType: 'error',
+        content: t('Configuration is not available.'),
+      };
+    }
+
+    const trimmedArgs = args.trim();
+    const currentMode = config.getApprovalMode();
+
+    if (trimmedArgs === 'exit') {
+      if (currentMode !== ApprovalMode.PLAN) {
+        return {
+          type: 'message',
+          messageType: 'error',
+          content: t('Not in plan mode. Use "/plan" to enter plan mode first.'),
+        };
+      }
+      try {
+        config.setApprovalMode(config.getPrePlanMode());
+      } catch (e) {
+        return {
+          type: 'message',
+          messageType: 'error',
+          content: (e as Error).message,
+        };
+      }
+      return {
+        type: 'message',
+        messageType: 'info',
+        content: t('Exited plan mode. Previous approval mode restored.'),
+      };
+    }
+
+    if (currentMode !== ApprovalMode.PLAN) {
+      try {
+        config.setApprovalMode(ApprovalMode.PLAN);
+      } catch (e) {
+        return {
+          type: 'message',
+          messageType: 'error',
+          content: (e as Error).message,
+        };
+      }
+
+      if (trimmedArgs) {
+        return {
+          type: 'submit_prompt',
+          content: [{ text: trimmedArgs }],
+        };
+      }
+
+      return {
+        type: 'message',
+        messageType: 'info',
+        content: t(
+          'Enabled plan mode. The agent will analyze and plan without executing tools.',
+        ),
+      };
+    }
+
+    // Already in plan mode
+    if (trimmedArgs) {
+      return {
+        type: 'submit_prompt',
+        content: [{ text: trimmedArgs }],
+      };
+    }
+
+    return {
+      type: 'message',
+      messageType: 'info',
+      content: t('Already in plan mode. Use "/plan exit" to exit plan mode.'),
+    };
+  },
+};
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@ -849,7 +849,6 @@ describe('InputPrompt', () => {
      // Verify useCompletion was called with correct signature
      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -878,7 +877,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -907,7 +905,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -936,7 +933,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -965,7 +961,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -995,7 +990,6 @@ describe('InputPrompt', () => {
      // Verify useCompletion was called with the buffer
      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1024,7 +1018,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1054,7 +1047,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1084,7 +1076,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1114,7 +1105,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1144,7 +1134,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1176,7 +1165,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1206,7 +1194,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
@ -1238,7 +1225,6 @@ describe('InputPrompt', () => {

      expect(mockedUseCommandCompletion).toHaveBeenCalledWith(
        mockBuffer,
-        ['/test/project/src'],
        path.join('test', 'project', 'src'),
        mockSlashCommands,
        mockCommandContext,
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@ -168,15 +168,6 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
    }
  }, []);

-  const [dirs, setDirs] = useState<readonly string[]>(
-    config.getWorkspaceContext().getDirectories(),
-  );
-  const dirsChanged = config.getWorkspaceContext().getDirectories();
-  useEffect(() => {
-    if (dirs.length !== dirsChanged.length) {
-      setDirs(dirsChanged);
-    }
-  }, [dirs.length, dirsChanged]);
  const [reverseSearchActive, setReverseSearchActive] = useState(false);
  const [commandSearchActive, setCommandSearchActive] = useState(false);
  const [textBeforeReverseSearch, setTextBeforeReverseSearch] = useState('');
@ -190,7 +181,6 @@ export const InputPrompt: React.FC<InputPromptProps> = ({

  const completion = useCommandCompletion(
    buffer,
-    dirs,
    config.getTargetDir(),
    slashCommands,
    commandContext,
--- a/packages/cli/src/ui/contexts/KeypressContext.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.tsx
@ -189,13 +189,35 @@ export function KeypressProvider({
      clearKittyTimeout();
      kittySequenceTimeout = setTimeout(() => {
        if (kittySequenceBufferRef.current) {
-          if (debugKeystrokeLogging) {
-            debugLogger.debug(
-              '[DEBUG] Kitty buffer timeout, clearing:',
-              kittySequenceBufferRef.current,
-            );
+          // Before discarding, try to salvage any parseable sequences
+          // that may have been missed (e.g., due to chunked input).
+          while (kittySequenceBufferRef.current) {
+            const parsed = parseKittyPrefix(kittySequenceBufferRef.current);
+            if (parsed) {
+              kittySequenceBufferRef.current =
+                kittySequenceBufferRef.current.slice(parsed.length);
+              broadcast(parsed.key);
+              continue;
+            }
+            const plain = parsePlainTextPrefix(kittySequenceBufferRef.current);
+            if (plain) {
+              kittySequenceBufferRef.current =
+                kittySequenceBufferRef.current.slice(plain.length);
+              broadcast(plain.key);
+              continue;
+            }
+            break;
+          }
+          // Clear any remaining unparseable content
+          if (kittySequenceBufferRef.current) {
+            if (debugKeystrokeLogging) {
+              debugLogger.debug(
+                '[DEBUG] Kitty buffer timeout, clearing:',
+                kittySequenceBufferRef.current,
+              );
+            }
+            kittySequenceBufferRef.current = '';
          }
-          kittySequenceBufferRef.current = '';
        }
      }, KITTY_SEQUENCE_TIMEOUT_MS);
    };
@ -331,14 +353,19 @@ export function KeypressProvider({
        };
      }

-      // 3) CSI-u form: ESC [ <code> ; <mods> (u|~)
-      // 3) CSI-u and tilde-coded functional keys: ESC [ <code> ; <mods> (u|~)
+      // 3) CSI-u form: ESC [ <code>[:<shifted>][:<base>] ; <mods>[:<event>] [; <text>] (u|~)
+      // 3) CSI-u and tilde-coded functional keys with optional kitty extensions:
+      //    Full kitty format: ESC [ code:shifted:base ; mods:event ; text u
      //    'u' terminator: Kitty CSI-u; '~' terminator: tilde-coded function keys.
-      const csiUPrefix = new RegExp(`^${ESC}\\[(\\d+)(;(\\d+))?([u~])`);
+      //    The colon-separated fields (shifted key, base key, event type, text)
+      //    are optional extensions that some terminals send.
+      const csiUPrefix = new RegExp(
+        `^${ESC}\\[(\\d+)(?::\\d+)*(?:;(\\d+)(?::\\d+)*)?(?:;\\d+)?([u~])`,
+      );
      m = buffer.match(csiUPrefix);
      if (m) {
        const keyCode = parseInt(m[1], 10);
-        let modifiers = m[3] ? parseInt(m[3], 10) : KITTY_MODIFIER_BASE;
+        let modifiers = m[2] ? parseInt(m[2], 10) : KITTY_MODIFIER_BASE;
        if (modifiers >= KITTY_MODIFIER_EVENT_TYPES_OFFSET) {
          modifiers -= KITTY_MODIFIER_EVENT_TYPES_OFFSET;
        }
@ -347,7 +374,7 @@ export function KeypressProvider({
          (modifierBits & MODIFIER_SHIFT_BIT) === MODIFIER_SHIFT_BIT;
        const alt = (modifierBits & MODIFIER_ALT_BIT) === MODIFIER_ALT_BIT;
        const ctrl = (modifierBits & MODIFIER_CTRL_BIT) === MODIFIER_CTRL_BIT;
-        const terminator = m[4];
+        const terminator = m[3];

        // Tilde-coded functional keys (Delete, Insert, PageUp/Down, Home/End)
        if (terminator === '~') {
--- a/packages/cli/src/ui/hooks/useCodingPlanUpdates.test.ts
+++ b/packages/cli/src/ui/hooks/useCodingPlanUpdates.test.ts
@ -391,9 +391,9 @@ describe('useCodingPlanUpdates', () => {
      >;

      // Should have new China configs + custom config only (global config removed since regions are mutually exclusive)
-      // The China template has 8 models, so we expect 8 (from template) + 1 (custom) = 9
+      // The China template has 9 models, so we expect 9 (from template) + 1 (custom) = 10
      // Note: description field has been removed, only name field contains the branding
-      expect(updatedConfigs.length).toBe(9);
+      expect(updatedConfigs.length).toBe(10);

      // Should NOT contain the Global config (mutually exclusive)
      expect(
--- a/packages/cli/src/ui/hooks/useCommandCompletion.test.ts
+++ b/packages/cli/src/ui/hooks/useCommandCompletion.test.ts
@ -84,7 +84,6 @@ const setupMocks = ({
 describe('useCommandCompletion', () => {
  const mockCommandContext = {} as CommandContext;
  const mockConfig = {} as Config;
-  const testDirs: string[] = [];
  const testRootDir = '/';

  // Helper to create real TextBuffer objects within renderHook
@ -114,7 +113,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest(''),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -139,7 +137,6 @@ describe('useCommandCompletion', () => {
          const textBuffer = useTextBufferForTest('@file');
          const completion = useCommandCompletion(
            textBuffer,
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -172,7 +169,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('@files'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -200,7 +196,6 @@ describe('useCommandCompletion', () => {
        renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest(text),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -226,7 +221,6 @@ describe('useCommandCompletion', () => {
        renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest(text, cursorOffset),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -265,7 +259,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -286,7 +279,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -306,7 +298,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -332,7 +323,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -361,7 +351,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -398,7 +387,6 @@ describe('useCommandCompletion', () => {
        const { result } = renderHook(() =>
          useCommandCompletion(
            useTextBufferForTest('/'),
-            testDirs,
            testRootDir,
            [],
            mockCommandContext,
@ -427,7 +415,6 @@ describe('useCommandCompletion', () => {
      renderHook(() =>
        useCommandCompletion(
          useTextBufferForTest(text),
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
@ -455,7 +442,6 @@ describe('useCommandCompletion', () => {
      renderHook(() =>
        useCommandCompletion(
          useTextBufferForTest(text),
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
@ -484,7 +470,6 @@ describe('useCommandCompletion', () => {
        const textBuffer = useTextBufferForTest(text);
        const completion = useCommandCompletion(
          textBuffer,
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
@ -517,7 +502,6 @@ describe('useCommandCompletion', () => {
        const textBuffer = useTextBufferForTest('/mem');
        const completion = useCommandCompletion(
          textBuffer,
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
@ -547,7 +531,6 @@ describe('useCommandCompletion', () => {
        const textBuffer = useTextBufferForTest('@src/fi');
        const completion = useCommandCompletion(
          textBuffer,
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
@ -580,7 +563,6 @@ describe('useCommandCompletion', () => {
        const textBuffer = useTextBufferForTest(text, cursorOffset);
        const completion = useCommandCompletion(
          textBuffer,
-          testDirs,
          testRootDir,
          [],
          mockCommandContext,
--- a/packages/cli/src/ui/hooks/useCommandCompletion.tsx
+++ b/packages/cli/src/ui/hooks/useCommandCompletion.tsx
@ -39,7 +39,6 @@ export interface UseCommandCompletionReturn {

 export function useCommandCompletion(
  buffer: TextBuffer,
-  dirs: readonly string[],
  cwd: string,
  slashCommands: readonly SlashCommand[],
  commandContext: CommandContext,
--- a/packages/core/package.json
+++ b/packages/core/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-core",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Qwen Code Core",
  "repository": {
    "type": "git",
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@ -8,6 +8,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import type { Mock } from 'vitest';
 import type { ConfigParameters, SandboxConfig } from './config.js';
 import { Config, ApprovalMode } from './config.js';
+import * as fs from 'node:fs';
 import * as path from 'node:path';
 import { setGeminiMdFilename as mockSetGeminiMdFilename } from '../tools/memoryTool.js';
 import {
@ -57,6 +58,9 @@ vi.mock('node:fs', async (importOriginal) => {
      isDirectory: vi.fn().mockReturnValue(true),
    }),
    realpathSync: vi.fn((path) => path),
+    mkdirSync: vi.fn(),
+    writeFileSync: vi.fn(),
+    readFileSync: vi.fn(),
  };
  return {
    ...mocked,
@ -1203,6 +1207,103 @@ describe('setApprovalMode with folder trust', () => {
    expect(() => config.setApprovalMode(ApprovalMode.PLAN)).not.toThrow();
  });

+  describe('prePlanMode tracking', () => {
+    it('should save pre-plan mode when entering plan mode', () => {
+      const config = new Config(baseParams);
+      vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
+
+      config.setApprovalMode(ApprovalMode.AUTO_EDIT);
+      config.setApprovalMode(ApprovalMode.PLAN);
+      expect(config.getPrePlanMode()).toBe(ApprovalMode.AUTO_EDIT);
+    });
+
+    it('should clear pre-plan mode when leaving plan mode', () => {
+      const config = new Config(baseParams);
+      vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
+
+      config.setApprovalMode(ApprovalMode.AUTO_EDIT);
+      config.setApprovalMode(ApprovalMode.PLAN);
+      config.setApprovalMode(ApprovalMode.DEFAULT);
+      expect(config.getPrePlanMode()).toBe(ApprovalMode.DEFAULT);
+    });
+
+    it('should default to DEFAULT when no pre-plan mode was recorded', () => {
+      const config = new Config(baseParams);
+      expect(config.getPrePlanMode()).toBe(ApprovalMode.DEFAULT);
+    });
+
+    it('should not update pre-plan mode when already in plan mode', () => {
+      const config = new Config(baseParams);
+      vi.spyOn(config, 'isTrustedFolder').mockReturnValue(true);
+
+      config.setApprovalMode(ApprovalMode.YOLO);
+      config.setApprovalMode(ApprovalMode.PLAN);
+      // Setting PLAN again should not overwrite prePlanMode
+      config.setApprovalMode(ApprovalMode.PLAN);
+      expect(config.getPrePlanMode()).toBe(ApprovalMode.YOLO);
+    });
+  });
+
+  describe('plan file persistence', () => {
+    it('should save plan to disk', () => {
+      const config = new Config(baseParams);
+
+      config.savePlan('# My Plan\n1. Step one\n2. Step two');
+
+      expect(fs.mkdirSync).toHaveBeenCalledWith(
+        expect.stringContaining('plans'),
+        { recursive: true },
+      );
+      expect(fs.writeFileSync).toHaveBeenCalledWith(
+        expect.stringContaining('.md'),
+        '# My Plan\n1. Step one\n2. Step two',
+        'utf-8',
+      );
+    });
+
+    it('should load plan from disk', () => {
+      const config = new Config(baseParams);
+      (fs.readFileSync as Mock).mockReturnValue('# Saved Plan');
+
+      const plan = config.loadPlan();
+      expect(plan).toBe('# Saved Plan');
+    });
+
+    it('should return undefined when no plan file exists', () => {
+      const config = new Config(baseParams);
+      const enoentError = new Error('ENOENT') as NodeJS.ErrnoException;
+      enoentError.code = 'ENOENT';
+      (fs.readFileSync as Mock).mockImplementation(() => {
+        throw enoentError;
+      });
+
+      const plan = config.loadPlan();
+      expect(plan).toBeUndefined();
+    });
+
+    it('should rethrow non-ENOENT errors from loadPlan', () => {
+      const config = new Config(baseParams);
+      const permError = new Error('EACCES') as NodeJS.ErrnoException;
+      permError.code = 'EACCES';
+      (fs.readFileSync as Mock).mockImplementation(() => {
+        throw permError;
+      });
+
+      expect(() => config.loadPlan()).toThrow('EACCES');
+    });
+
+    it('should use session ID in plan file path', () => {
+      const config = new Config({
+        ...baseParams,
+        sessionId: 'test-session-123',
+      });
+
+      const filePath = config.getPlanFilePath();
+      expect(filePath).toContain('test-session-123');
+      expect(filePath).toMatch(/\.md$/);
+    });
+  });
+
  describe('registerCoreTools', () => {
    beforeEach(() => {
      vi.clearAllMocks();
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@ -6,6 +6,7 @@

 // Node built-ins
 import type { EventEmitter } from 'node:events';
+import * as fs from 'node:fs';
 import * as path from 'node:path';
 import process from 'node:process';

@ -370,6 +371,8 @@ export interface ConfigParameters {
  model?: string;
  outputLanguageFilePath?: string;
  maxSessionTurns?: number;
+  /** Minutes of inactivity before clearing retained thinking blocks. */
+  thinkingIdleThresholdMinutes?: number;
  sessionTokenLimit?: number;
  experimentalZedIntegration?: boolean;
  cronEnabled?: boolean;
@ -529,6 +532,7 @@ export class Config {
  private sdkMode: boolean;
  private geminiMdFileCount: number;
  private approvalMode: ApprovalMode;
+  private prePlanMode?: ApprovalMode;
  private readonly accessibility: AccessibilitySettings;
  private readonly telemetrySettings: TelemetrySettings;
  private readonly gitCoAuthor: GitCoAuthorSettings;
@ -557,6 +561,7 @@ export class Config {
  private ideMode: boolean;

  private readonly maxSessionTurns: number;
+  private readonly thinkingIdleThresholdMs: number;
  private readonly sessionTokenLimit: number;
  private readonly listExtensions: boolean;
  private readonly overrideExtensions?: string[];
@ -683,6 +688,8 @@ export class Config {
    this.fileDiscoveryService = params.fileDiscoveryService ?? null;
    this.bugCommand = params.bugCommand;
    this.maxSessionTurns = params.maxSessionTurns ?? -1;
+    this.thinkingIdleThresholdMs =
+      (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
    this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
    this.experimentalZedIntegration =
      params.experimentalZedIntegration ?? false;
@ -1329,6 +1336,10 @@ export class Config {
    return this.maxSessionTurns;
  }

+  getThinkingIdleThresholdMs(): number {
+    return this.thinkingIdleThresholdMs;
+  }
+
  getSessionTokenLimit(): number {
    return this.sessionTokenLimit;
  }
@ -1634,6 +1645,14 @@ export class Config {
    return this.approvalMode;
  }

+  /**
+   * Returns the approval mode that was active before entering plan mode.
+   * Falls back to DEFAULT if no pre-plan mode was recorded.
+   */
+  getPrePlanMode(): ApprovalMode {
+    return this.prePlanMode ?? ApprovalMode.DEFAULT;
+  }
+
  setApprovalMode(mode: ApprovalMode): void {
    if (
      !this.isTrustedFolder() &&
@ -1644,9 +1663,55 @@ export class Config {
        'Cannot enable privileged approval modes in an untrusted folder.',
      );
    }
+    // Track the mode before entering plan mode so it can be restored later
+    if (mode === ApprovalMode.PLAN && this.approvalMode !== ApprovalMode.PLAN) {
+      this.prePlanMode = this.approvalMode;
+    } else if (
+      mode !== ApprovalMode.PLAN &&
+      this.approvalMode === ApprovalMode.PLAN
+    ) {
+      this.prePlanMode = undefined;
+    }
    this.approvalMode = mode;
  }

+  /**
+   * Returns the file path for this session's plan file.
+   */
+  getPlanFilePath(): string {
+    return Storage.getPlanFilePath(this.sessionId);
+  }
+
+  /**
+   * Saves a plan to disk for the current session.
+   */
+  savePlan(plan: string): void {
+    const filePath = this.getPlanFilePath();
+    const dir = path.dirname(filePath);
+    fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(filePath, plan, 'utf-8');
+  }
+
+  /**
+   * Loads the plan for the current session, or returns undefined if none exists.
+   */
+  loadPlan(): string | undefined {
+    const filePath = this.getPlanFilePath();
+    try {
+      return fs.readFileSync(filePath, 'utf-8');
+    } catch (error: unknown) {
+      if (
+        typeof error === 'object' &&
+        error !== null &&
+        'code' in error &&
+        (error as NodeJS.ErrnoException).code === 'ENOENT'
+      ) {
+        return undefined;
+      }
+      throw error;
+    }
+  }
+
  getInputFormat(): 'text' | 'stream-json' {
    return this.inputFormat;
  }
--- a/packages/core/src/config/models.ts
+++ b/packages/core/src/config/models.ts
@ -7,4 +7,4 @@
 export const DEFAULT_QWEN_MODEL = 'coder-model';
 export const DEFAULT_QWEN_FLASH_MODEL = 'coder-model';
 export const DEFAULT_QWEN_EMBEDDING_MODEL = 'text-embedding-v4';
-export const MAINLINE_CODER_MODEL = 'qwen3.5-plus';
+export const MAINLINE_CODER_MODEL = 'qwen3.6-plus';
--- a/packages/core/src/config/storage.ts
+++ b/packages/core/src/config/storage.ts
@ -18,6 +18,7 @@ const TMP_DIR_NAME = 'tmp';
 const BIN_DIR_NAME = 'bin';
 const PROJECT_DIR_NAME = 'projects';
 const IDE_DIR_NAME = 'ide';
+const PLANS_DIR_NAME = 'plans';
 const DEBUG_DIR_NAME = 'debug';
 const ARENA_DIR_NAME = 'arena';

@ -165,6 +166,14 @@ export class Storage {
    return path.join(Storage.getRuntimeBaseDir(), IDE_DIR_NAME);
  }

+  static getPlansDir(): string {
+    return path.join(Storage.getGlobalQwenDir(), PLANS_DIR_NAME);
+  }
+
+  static getPlanFilePath(sessionId: string): string {
+    return path.join(Storage.getPlansDir(), `${sessionId}.md`);
+  }
+
  static getGlobalBinDir(): string {
    return path.join(Storage.getGlobalQwenDir(), BIN_DIR_NAME);
  }
--- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts
+++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts
@ -423,7 +423,7 @@ describe('AnthropicContentGenerator', () => {
        const [anthropicRequest] =
          anthropicState.lastCreateArgs as AnthropicCreateArgs;
        expect(anthropicRequest).toEqual(
-          expect.objectContaining({ max_tokens: 32000 }),
+          expect.objectContaining({ max_tokens: 8000 }),
        );
      });

@ -488,7 +488,7 @@ describe('AnthropicContentGenerator', () => {
        const [anthropicRequest] =
          anthropicState.lastCreateArgs as AnthropicCreateArgs;
        expect(anthropicRequest).toEqual(
-          expect.objectContaining({ max_tokens: 32000 }),
+          expect.objectContaining({ max_tokens: 8000 }),
        );
      });
    });
--- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts
+++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts
@ -33,7 +33,7 @@ import { DEFAULT_TIMEOUT } from '../openaiContentGenerator/constants.js';
 import { createDebugLogger } from '../../utils/debugLogger.js';
 import {
  tokenLimit,
-  DEFAULT_OUTPUT_TOKEN_LIMIT,
+  CAPPED_DEFAULT_MAX_TOKENS,
  hasExplicitOutputLimit,
 } from '../tokenLimits.js';

@ -234,12 +234,23 @@ export class AnthropicContentGenerator implements ContentGenerator {
    const modelLimit = tokenLimit(modelId, 'output');
    const isKnownModel = hasExplicitOutputLimit(modelId);

-    const maxTokens =
-      userMaxTokens !== undefined && userMaxTokens !== null
-        ? isKnownModel
-          ? Math.min(userMaxTokens, modelLimit)
-          : userMaxTokens
-        : Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
+    let maxTokens: number;
+    if (userMaxTokens !== undefined && userMaxTokens !== null) {
+      maxTokens = isKnownModel
+        ? Math.min(userMaxTokens, modelLimit)
+        : userMaxTokens;
+    } else {
+      // No explicit user config — check env var, then use capped default.
+      const envVal = process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
+      const envMaxTokens = envVal ? parseInt(envVal, 10) : NaN;
+      if (!isNaN(envMaxTokens) && envMaxTokens > 0) {
+        maxTokens = isKnownModel
+          ? Math.min(envMaxTokens, modelLimit)
+          : envMaxTokens;
+      } else {
+        maxTokens = Math.min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS);
+      }
+    }

    return {
      max_tokens: maxTokens,
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
      getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
      getFileService: vi.fn().mockReturnValue(fileService),
      getMaxSessionTurns: vi.fn().mockReturnValue(0),
+      getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
      getSessionTokenLimit: vi.fn().mockReturnValue(32000),
      getNoBrowser: vi.fn().mockReturnValue(false),
      getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@ -427,6 +428,119 @@ describe('Gemini Client (client.ts)', () => {
    });
  });

+  describe('thinking block idle cleanup and latch', () => {
+    let mockChat: Partial<GeminiChat>;
+
+    beforeEach(() => {
+      const mockStream = (async function* () {
+        yield {
+          type: GeminiEventType.Content,
+          value: 'response',
+        };
+      })();
+      mockTurnRunFn.mockReturnValue(mockStream);
+
+      mockChat = {
+        addHistory: vi.fn(),
+        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
+      };
+      client['chat'] = mockChat as GeminiChat;
+    });
+
+    it('should not strip thoughts on active session (< 5min idle)', async () => {
+      // Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
+      client['thinkingClearLatched'] = false;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-1',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(
+        mockChat.stripThoughtsFromHistoryKeepRecent,
+      ).not.toHaveBeenCalled();
+    });
+
+    it('should latch and strip thoughts after > 5min idle', async () => {
+      // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
+      client['thinkingClearLatched'] = false;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-2',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['thinkingClearLatched']).toBe(true);
+      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
+        1,
+      );
+    });
+
+    it('should keep stripping once latched even if idle < 5min', async () => {
+      // Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
+      client['thinkingClearLatched'] = true;
+
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-3',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['thinkingClearLatched']).toBe(true);
+      expect(mockChat.stripThoughtsFromHistoryKeepRecent).toHaveBeenCalledWith(
+        1,
+      );
+    });
+
+    it('should update lastApiCompletionTimestamp after API call', async () => {
+      client['lastApiCompletionTimestamp'] = null;
+
+      const before = Date.now();
+      const gen = client.sendMessageStream(
+        [{ text: 'Hello' }],
+        new AbortController().signal,
+        'prompt-4',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of gen) {
+        /* drain */
+      }
+
+      expect(client['lastApiCompletionTimestamp']).toBeGreaterThanOrEqual(
+        before,
+      );
+    });
+
+    it('should reset latch and timestamp on resetChat', async () => {
+      client['lastApiCompletionTimestamp'] = Date.now();
+      client['thinkingClearLatched'] = true;
+
+      await client.resetChat();
+
+      expect(client['thinkingClearLatched']).toBe(false);
+      expect(client['lastApiCompletionTimestamp']).toBeNull();
+    });
+  });
+
  describe('tryCompressChat', () => {
    const mockGetHistory = vi.fn();

@ -436,6 +550,7 @@ describe('Gemini Client (client.ts)', () => {
        addHistory: vi.fn(),
        setHistory: vi.fn(),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      } as unknown as GeminiChat;
    });

@ -457,6 +572,7 @@ describe('Gemini Client (client.ts)', () => {
        getHistory: vi.fn((_curated?: boolean) => chatHistory),
        setHistory: vi.fn(),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockOriginalChat as GeminiChat;

@ -1149,6 +1265,7 @@ describe('Gemini Client (client.ts)', () => {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      } as unknown as GeminiChat;
      client['chat'] = mockChat;

@ -1204,6 +1321,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1260,6 +1378,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1326,6 +1445,7 @@ hello
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1365,6 +1485,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1410,6 +1531,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1498,6 +1620,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1555,6 +1678,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -1636,6 +1760,7 @@ Other open files:
              { role: 'user', parts: [{ text: 'previous message' }] },
            ]),
          stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;
      });
@ -1889,6 +2014,7 @@ Other open files:
          getHistory: vi.fn().mockReturnValue([]), // Default empty history
          setHistory: vi.fn(),
          stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;

@ -2228,6 +2354,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -2265,6 +2392,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -2305,6 +2433,7 @@ Other open files:
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
        stripThoughtsFromHistory: vi.fn(),
+        stripThoughtsFromHistoryKeepRecent: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@ -2329,6 +2458,7 @@ Other open files:
          getHistory: vi.fn().mockReturnValue([]),
          setHistory: vi.fn(),
          stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
          stripOrphanedUserEntriesFromHistory: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;
@ -2361,6 +2491,7 @@ Other open files:
          getHistory: vi.fn().mockReturnValue([]),
          setHistory: vi.fn(),
          stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
          stripOrphanedUserEntriesFromHistory: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;
@ -2405,6 +2536,7 @@ Other open files:
          addHistory: vi.fn(),
          getHistory: vi.fn().mockReturnValue([]),
          stripThoughtsFromHistory: vi.fn(),
+          stripThoughtsFromHistoryKeepRecent: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;
      });
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@ -126,6 +126,25 @@ export class GeminiClient {
   */
  private hasFailedCompressionAttempt = false;

+  /**
+   * Timestamp (epoch ms) of the last completed API call.
+   * Used to detect idle periods for thinking block cleanup.
+   * Starts as null — on the first query there is no prior thinking to clean,
+   * so the idle check is skipped until the first API call completes.
+   */
+  private lastApiCompletionTimestamp: number | null = null;
+
+  /**
+   * Sticky-on latch for clearing thinking blocks from prior turns.
+   * Triggered when idle exceeds the configured threshold (default 5 min,
+   * aligned with provider prompt-cache TTL). Once latched, stays true to
+   * prevent oscillation: without it, thinking would accumulate → get
+   * stripped → accumulate again, causing the message prefix to change
+   * repeatedly (bad for provider-side prompt caching and wastes context).
+   * Reset on /clear (resetChat).
+   */
+  private thinkingClearLatched = false;
+
  constructor(private readonly config: Config) {
    this.loopDetector = new LoopDetectionService(config);
  }
@ -199,6 +218,9 @@ export class GeminiClient {
  }

  async resetChat(): Promise<void> {
+    // Reset thinking clear latch — fresh chat, no prior thinking to clean up
+    this.thinkingClearLatched = false;
+    this.lastApiCompletionTimestamp = null;
    await this.startChat();
  }

@ -537,8 +559,27 @@ export class GeminiClient {
      // record user message for session management
      this.config.getChatRecordingService()?.recordUserMessage(request);

-      // strip thoughts from history before sending the message
-      this.stripThoughtsFromHistory();
+      // Thinking block cross-turn retention with idle cleanup:
+      // - Active session (< threshold idle): keep thinking blocks for reasoning coherence
+      // - Idle > threshold: clear old thinking, keep only last 1 turn to free context
+      // - Latch: once triggered, never revert — prevents oscillation
+      if (
+        !this.thinkingClearLatched &&
+        this.lastApiCompletionTimestamp !== null
+      ) {
+        const thresholdMs = this.config.getThinkingIdleThresholdMs();
+        const idleMs = Date.now() - this.lastApiCompletionTimestamp;
+        if (idleMs > thresholdMs) {
+          this.thinkingClearLatched = true;
+          debugLogger.debug(
+            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
+          );
+        }
+      }
+      if (this.thinkingClearLatched) {
+        this.getChat().stripThoughtsFromHistoryKeepRecent(1);
+        debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)');
+      }
    }
    if (messageType !== SendMessageType.Retry) {
      this.sessionTurnCount++;
@ -680,6 +721,7 @@ export class GeminiClient {
          if (arenaAgentClient) {
            await arenaAgentClient.reportError('Loop detected');
          }
+          this.lastApiCompletionTimestamp = Date.now();
          return turn;
        }
      }
@ -698,9 +740,14 @@ export class GeminiClient {
              : 'Unknown error';
          await arenaAgentClient.reportError(errorMsg);
        }
+        this.lastApiCompletionTimestamp = Date.now();
        return turn;
      }
    }
+
+    // Track API completion time for thinking block idle cleanup
+    this.lastApiCompletionTimestamp = Date.now();
+
    // Fire Stop hook through MessageBus (only if hooks are enabled and registered)
    // This must be done before any early returns to ensure hooks are always triggered
    if (
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@ -1923,6 +1923,150 @@ describe('GeminiChat', async () => {
    });
  });

+  describe('stripThoughtsFromHistoryKeepRecent', () => {
+    it('should keep the most recent N model turns with thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'old thinking', thought: true },
+            { text: 'response1' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'mid thinking', thought: true },
+            { text: 'response2' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg3' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response3' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // First two model turns should have thoughts stripped
+      expect(history[1]!.parts).toEqual([{ text: 'response1' }]);
+      expect(history[3]!.parts).toEqual([{ text: 'response2' }]);
+      // Last model turn should keep thoughts
+      expect(history[5]!.parts).toEqual([
+        { text: 'recent thinking', thought: true },
+        { text: 'response3' },
+      ]);
+    });
+
+    it('should not strip anything when keepTurns >= model turns with thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      expect(history[1]!.parts).toEqual([
+        { text: 'thinking', thought: true },
+        { text: 'response' },
+      ]);
+    });
+
+    it('should remove model content objects that become empty after stripping', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'only thinking', thought: true }],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // The first model turn (only thoughts) should be removed entirely
+      expect(history).toHaveLength(3);
+      expect(history[0]!.parts).toEqual([{ text: 'msg1' }]);
+      expect(history[1]!.parts).toEqual([{ text: 'msg2' }]);
+      expect(history[2]!.parts).toEqual([
+        { text: 'recent thinking', thought: true },
+        { text: 'response' },
+      ]);
+    });
+
+    it('should also strip thoughtSignature from stripped turns', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'old thinking', thought: true },
+            {
+              text: 'with sig',
+              thoughtSignature: 'sig1',
+            } as unknown as { text: string; thoughtSignature: string },
+            { text: 'response1' },
+          ],
+        },
+        { role: 'user', parts: [{ text: 'msg2' }] },
+        {
+          role: 'model',
+          parts: [
+            { text: 'recent thinking', thought: true },
+            { text: 'response2' },
+          ],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(1);
+
+      const history = chat.getHistory();
+      // First model turn: thought stripped, thoughtSignature stripped
+      expect(history[1]!.parts).toEqual([
+        { text: 'with sig' },
+        { text: 'response1' },
+      ]);
+      expect(
+        (history[1]!.parts![0] as { thoughtSignature?: string })
+          .thoughtSignature,
+      ).toBeUndefined();
+    });
+
+    it('should handle keepTurns=0 by stripping all thoughts', () => {
+      chat.setHistory([
+        { role: 'user', parts: [{ text: 'msg1' }] },
+        {
+          role: 'model',
+          parts: [{ text: 'thinking', thought: true }, { text: 'response' }],
+        },
+      ]);
+
+      chat.stripThoughtsFromHistoryKeepRecent(0);
+
+      const history = chat.getHistory();
+      expect(history[1]!.parts).toEqual([{ text: 'response' }]);
+    });
+  });
+
  describe('stripOrphanedUserEntriesFromHistory', () => {
    it('should pop a single trailing user entry', () => {
      chat.setHistory([
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@ -16,13 +16,14 @@ import type {
  Tool,
  GenerateContentResponseUsageMetadata,
 } from '@google/genai';
-import { createUserContent } from '@google/genai';
+import { createUserContent, FinishReason } from '@google/genai';
 import { retryWithBackoff } from '../utils/retry.js';
 import { getErrorStatus } from '../utils/errors.js';
 import { createDebugLogger } from '../utils/debugLogger.js';
 import { parseAndFormatApiError } from '../utils/errorParsing.js';
 import { isRateLimitError, type RetryInfo } from '../utils/rateLimit.js';
 import type { Config } from '../config/config.js';
+import { ESCALATED_MAX_TOKENS } from './tokenLimits.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import type { StructuredError } from './turn.js';
 import {
@ -355,6 +356,17 @@ export class GeminiChat {
          cgConfig?.maxRetries ?? RATE_LIMIT_RETRY_OPTIONS.maxRetries;
        const extraRetryErrorCodes = cgConfig?.retryErrorCodes;

+        // Max output tokens escalation: when no user/env override is set,
+        // the capped default (8K) is used. If the model hits MAX_TOKENS,
+        // retry once with escalated limit (64K).
+        let maxTokensEscalated = false;
+        const hasUserMaxTokensOverride =
+          (cgConfig?.samplingParams?.max_tokens !== undefined &&
+            cgConfig?.samplingParams?.max_tokens !== null) ||
+          !!process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
+
+        let lastFinishReason: string | undefined;
+
        for (
          let attempt = 0;
          attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
@ -376,7 +388,10 @@ export class GeminiChat {
              prompt_id,
            );

+            lastFinishReason = undefined;
            for await (const chunk of stream) {
+              const fr = chunk.candidates?.[0]?.finishReason;
+              if (fr) lastFinishReason = fr;
              yield { type: StreamEventType.CHUNK, value: chunk };
            }

@ -481,6 +496,49 @@ export class GeminiChat {
          }
        }

+        // Max output tokens escalation: if the retry loop succeeded with
+        // the capped default (8K) but hit MAX_TOKENS, retry once at 64K.
+        // Placed outside the retry loop so that any errors from the
+        // escalated stream propagate directly (not caught by retry logic).
+        if (
+          lastError === null &&
+          lastFinishReason === FinishReason.MAX_TOKENS &&
+          !maxTokensEscalated &&
+          !hasUserMaxTokensOverride
+        ) {
+          maxTokensEscalated = true;
+          debugLogger.info(
+            `Output truncated at capped default. Escalating to ${ESCALATED_MAX_TOKENS} tokens.`,
+          );
+          // Remove partial model response from history
+          // (processStreamResponse already pushed it)
+          if (
+            self.history.length > 0 &&
+            self.history[self.history.length - 1].role === 'model'
+          ) {
+            self.history.pop();
+          }
+          // Signal UI to discard partial output
+          yield { type: StreamEventType.RETRY };
+          // Retry with escalated max_tokens
+          const escalatedParams: SendMessageParameters = {
+            ...params,
+            config: {
+              ...params.config,
+              maxOutputTokens: ESCALATED_MAX_TOKENS,
+            },
+          };
+          const escalatedStream = await self.makeApiCallAndProcessStream(
+            model,
+            requestContents,
+            escalatedParams,
+            prompt_id,
+          );
+          for await (const chunk of escalatedStream) {
+            yield { type: StreamEventType.CHUNK, value: chunk };
+          }
+        }
+
        if (lastError) {
          if (lastError instanceof InvalidStreamError) {
            const totalAttempts = invalidStreamRetryCount + 1;
@ -625,6 +683,89 @@ export class GeminiChat {
      .filter((content) => content.parts && content.parts.length > 0);
  }

+  /**
+   * Strip thought parts from history, keeping the most recent `keepTurns`
+   * model turns that contain thinking blocks intact.
+   *
+   * Selection is based on thought-containing turns specifically (not all
+   * model turns) so the most recent reasoning chain is always preserved
+   * even if later model turns happen to have no thinking.
+   *
+   * Used for idle cleanup: after exceeding the configured idle threshold
+   * the old thinking blocks are no longer useful for reasoning coherence
+   * but still consume context tokens.
+   */
+  stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
+    keepTurns = Number.isFinite(keepTurns)
+      ? Math.max(0, Math.floor(keepTurns))
+      : 0;
+
+    // Find indices of model turns that contain thought parts
+    const modelTurnIndices: number[] = [];
+    for (let i = 0; i < this.history.length; i++) {
+      const content = this.history[i];
+      if (
+        content.role === 'model' &&
+        content.parts?.some(
+          (part) =>
+            part &&
+            typeof part === 'object' &&
+            'thought' in part &&
+            part.thought,
+        )
+      ) {
+        modelTurnIndices.push(i);
+      }
+    }
+
+    // Determine which model turns to keep (the most recent `keepTurns`)
+    const turnsToStrip = new Set(
+      modelTurnIndices.slice(
+        0,
+        Math.max(0, modelTurnIndices.length - keepTurns),
+      ),
+    );
+
+    if (turnsToStrip.size === 0) return;
+
+    this.history = this.history
+      .map((content, index) => {
+        if (!turnsToStrip.has(index) || !content.parts) return content;
+
+        // Strip thought parts from this turn
+        const filteredParts = content.parts
+          .filter(
+            (part) =>
+              !(
+                part &&
+                typeof part === 'object' &&
+                'thought' in part &&
+                part.thought
+              ),
+          )
+          .map((part) => {
+            if (
+              part &&
+              typeof part === 'object' &&
+              'thoughtSignature' in part
+            ) {
+              const newPart = { ...part };
+              delete (newPart as { thoughtSignature?: string })
+                .thoughtSignature;
+              return newPart;
+            }
+            return part;
+          });
+
+        return {
+          ...content,
+          parts: filteredParts,
+        };
+      })
+      // Remove Content objects that have no parts left after filtering
+      .filter((content) => content.parts && content.parts.length > 0);
+  }
+
  /**
   * Pop all orphaned trailing user entries from chat history.
   * In a valid conversation the last entry is always a model response;
--- a/packages/core/src/core/modalityDefaults.ts
+++ b/packages/core/src/core/modalityDefaults.ts
@ -40,8 +40,9 @@ const MODALITY_PATTERNS: Array<[RegExp, InputModalities]> = [
  // -------------------
  // Alibaba / Qwen
  // -------------------
-  // Qwen3.5-Plus: image support
+  // Qwen3.5-Plus, Qwen3.6-Plus: image + video support
  [/^qwen3\.5-plus/, { image: true, video: true }],
+  [/^qwen3\.6-plus/, { image: true, video: true }],
  [/^coder-model$/, { image: true, video: true }],

  // Qwen VL (vision-language) models: image + video
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@ -786,9 +786,9 @@ describe('DashScopeOpenAICompatibleProvider', () => {

      const result = provider.buildRequest(request, 'test-prompt-id');

-      // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
-      // qwen3-max has 32K output limit, so min(32K, 32K) = 32K
-      expect(result.max_tokens).toBe(32000);
+      // Should set capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS)
+      // qwen3-max has 32K output limit, so min(32K, 8K) = 8K
+      expect(result.max_tokens).toBe(8000);
    });

    it('should set conservative max_tokens when null is provided', () => {
@ -800,8 +800,8 @@ describe('DashScopeOpenAICompatibleProvider', () => {

      const result = provider.buildRequest(request, 'test-prompt-id');

-      // null is treated as not configured, so set conservative default
-      expect(result.max_tokens).toBe(32000);
+      // null is treated as not configured, so set capped default: min(32K, 8K) = 8K
+      expect(result.max_tokens).toBe(8000);
    });

    it('should respect user max_tokens for unknown models', () => {
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
@ -110,8 +110,8 @@ export class DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatiblePr
    }

    // Apply output token limits using parent class logic
-    // Uses conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
-    // to preserve input quota when user hasn't explicitly configured max_tokens
+    // Uses capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS=8K)
+    // Requests hitting the cap get one clean retry at 64K (geminiChat.ts)
    const requestWithTokenLimits = this.applyOutputTokenLimit(request);

    const extraBody = this.contentGeneratorConfig.extra_body;
--- a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts
@ -204,9 +204,9 @@ describe('DefaultOpenAICompatibleProvider', () => {
        'prompt-id',
      );

-      // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
-      // GPT-4 has 16K output limit, so min(16K, 32K) = 16K
-      expect(result.max_tokens).toBe(16384);
+      // Should set capped default (min of model limit and CAPPED_DEFAULT_MAX_TOKENS)
+      // GPT-4 has 16K output limit, so min(16K, 8K) = 8K
+      expect(result.max_tokens).toBe(8000);
    });

    it('should respect user max_tokens for unknown models (deployment aliases, self-hosted)', () => {
@ -223,8 +223,8 @@ describe('DefaultOpenAICompatibleProvider', () => {
      expect(result.max_tokens).toBe(100000);
    });

-    it('should use conservative default for unknown models when max_tokens not configured', () => {
-      // Unknown models without user config: use DEFAULT_OUTPUT_TOKEN_LIMIT
+    it('should use capped default for unknown models when max_tokens not configured', () => {
+      // Unknown models without user config: use CAPPED_DEFAULT_MAX_TOKENS
      const request: OpenAI.Chat.ChatCompletionCreateParams = {
        model: 'custom-deployment-alias',
        messages: [{ role: 'user', content: 'Hello' }],
@ -232,8 +232,8 @@ describe('DefaultOpenAICompatibleProvider', () => {

      const result = provider.buildRequest(request, 'prompt-id');

-      // Uses conservative default (32K)
-      expect(result.max_tokens).toBe(32000);
+      // Uses capped default (8K)
+      expect(result.max_tokens).toBe(8000);
    });

    it('should cap max_tokens for known models to avoid API errors', () => {
@ -259,8 +259,8 @@ describe('DefaultOpenAICompatibleProvider', () => {

      const result = provider.buildRequest(request, 'prompt-id');

-      // GPT-4 has 16K output limit, so conservative default is still 16K
-      expect(result.max_tokens).toBe(16384);
+      // GPT-4 has 16K output limit, capped default is 8K: min(16K, 8K) = 8K
+      expect(result.max_tokens).toBe(8000);
    });

    it('should preserve all sampling parameters', () => {
@ -303,7 +303,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
      // Should set conservative max_tokens default
      expect(result.model).toBe('gpt-4');
      expect(result.messages).toEqual(minimalRequest.messages);
-      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
+      expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
    });

    it('should handle streaming requests', () => {
@ -319,7 +319,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
      expect(result.model).toBe('gpt-4');
      expect(result.messages).toEqual(streamingRequest.messages);
      expect(result.stream).toBe(true);
-      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
+      expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
    });

    it('should not modify the original request object', () => {
@ -363,7 +363,7 @@ describe('DefaultOpenAICompatibleProvider', () => {

      expect(result).toEqual({
        ...originalRequest,
-        max_tokens: 16384, // GPT-4 has 16K limit, min(16K, 32K) = 16K
+        max_tokens: 8000, // GPT-4 has 16K limit, min(16K, 8K) = 8K
        custom_param: 'custom_value',
        nested: { key: 'value' },
      });
@ -382,7 +382,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
      expect(result.model).toBe('gpt-4');
      expect(result.messages).toEqual(originalRequest.messages);
      expect(result.temperature).toBe(0.7);
-      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
+      expect(result.max_tokens).toBe(8000); // GPT-4 has 16K limit, min(16K, 8K) = 8K
      expect(result).not.toHaveProperty('custom_param');
    });
  });
--- a/packages/core/src/core/openaiContentGenerator/provider/default.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/default.ts
@ -7,7 +7,7 @@ import type { OpenAICompatibleProvider } from './types.js';
 import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
 import {
  tokenLimit,
-  DEFAULT_OUTPUT_TOKEN_LIMIT,
+  CAPPED_DEFAULT_MAX_TOKENS,
  hasExplicitOutputLimit,
 } from '../../tokenLimits.js';

@ -101,18 +101,19 @@ export class DefaultOpenAICompatibleProvider
   *    - For unknown models (deployment aliases, self-hosted): respect user's
   *      configured value entirely (backend may support larger limits)
   * 2. If user didn't configure max_tokens:
-   *    - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT)
-   *    - This provides a conservative default (32K) that avoids truncating output
-   *      while preserving input quota (not occupying too much context window)
+   *    - Check QWEN_CODE_MAX_OUTPUT_TOKENS env var first
+   *    - Otherwise use min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS=8K)
+   *    - Requests hitting the 8K cap get one clean retry at 64K (geminiChat.ts)
   * 3. If model has no specific limit (tokenLimit returns default):
-   *    - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard
+   *    - Still apply CAPPED_DEFAULT_MAX_TOKENS as safeguard
   *
   * Examples:
   * - User sets 4K, known model limit 64K → uses 4K (respects user preference)
   * - User sets 100K, known model limit 64K → uses 64K (capped to avoid API error)
   * - User sets 100K, unknown model → uses 100K (respects user, backend may support it)
-   * - User not set, model limit 64K → uses 32K (conservative default)
-   * - User not set, model limit 8K → uses 8K (model limit is lower)
+   * - User not set, model limit 64K → uses 8K (capped default for slot optimization)
+   * - User not set, model limit 4K → uses 4K (model limit is lower)
+   * - User not set, env QWEN_CODE_MAX_OUTPUT_TOKENS=16000 -> uses 16K
   *
   * @param request - The chat completion request parameters
   * @returns The request with max_tokens adjusted according to the logic
@ -140,9 +141,18 @@ export class DefaultOpenAICompatibleProvider
        effectiveMaxTokens = userMaxTokens;
      }
    } else {
-      // User didn't configure, use conservative default:
-      // min(model-specific limit, DEFAULT_OUTPUT_TOKEN_LIMIT)
-      effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
+      // No explicit user config — check env var, then use capped default.
+      // Capped default (8K) reduces GPU slot over-reservation by ~4×.
+      // Requests hitting the cap get one clean retry at 64K (geminiChat.ts).
+      const envVal = process.env['QWEN_CODE_MAX_OUTPUT_TOKENS'];
+      const envMaxTokens = envVal ? parseInt(envVal, 10) : NaN;
+      if (!isNaN(envMaxTokens) && envMaxTokens > 0) {
+        effectiveMaxTokens = isKnownModel
+          ? Math.min(envMaxTokens, modelLimit)
+          : envMaxTokens;
+      } else {
+        effectiveMaxTokens = Math.min(modelLimit, CAPPED_DEFAULT_MAX_TOKENS);
+      }
    }

    return {
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@ -11,6 +11,13 @@ export type TokenLimitType = 'input' | 'output';
 export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
 export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000; // 32K tokens

+// Capped default for slot-reservation optimization. 99% of outputs are under 5K
+// tokens, so 32K defaults over-reserve 4-6× slot capacity. With the cap
+// enabled, <1% of requests hit the limit; those get one clean retry at 64K
+// (see geminiChat.ts max_output_tokens escalation).
+export const CAPPED_DEFAULT_MAX_TOKENS: TokenCount = 8_000;
+export const ESCALATED_MAX_TOKENS: TokenCount = 64_000;
+
 /**
 * Accurate numeric limits:
 * - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.)
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@ -280,8 +280,13 @@ export class Turn {
          return;
        }

-        // Handle the new RETRY event
+        // Handle the new RETRY event: clear accumulated state from the
+        // previous attempt to avoid duplicate tool calls and stale metadata.
        if (streamEvent.type === 'retry') {
+          this.pendingToolCalls.length = 0;
+          this.pendingCitations.clear();
+          this.debugResponses = [];
+          this.finishReason = undefined;
          yield {
            type: GeminiEventType.Retry,
            retryInfo: streamEvent.retryInfo,
--- a/packages/core/src/hooks/hookRunner.ts
+++ b/packages/core/src/hooks/hookRunner.ts
@ -420,6 +420,7 @@ export class HookRunner {
          }
        }

+        const killedBySignal = exitCode === null;
        resolve({
          hookConfig,
          eventName,
@ -427,8 +428,11 @@ export class HookRunner {
          output,
          stdout,
          stderr,
-          exitCode: exitCode || EXIT_CODE_SUCCESS,
+          exitCode: exitCode ?? -1,
          duration,
+          ...(killedBySignal && {
+            error: new Error('Hook killed by signal'),
+          }),
        });
      });

--- a/packages/core/src/tools/exitPlanMode.test.ts
+++ b/packages/core/src/tools/exitPlanMode.test.ts
@ -18,9 +18,11 @@ describe('ExitPlanModeTool', () => {
    approvalMode = ApprovalMode.PLAN;
    mockConfig = {
      getApprovalMode: vi.fn(() => approvalMode),
+      getPrePlanMode: vi.fn(() => ApprovalMode.DEFAULT),
      setApprovalMode: vi.fn((mode: ApprovalMode) => {
        approvalMode = mode;
      }),
+      savePlan: vi.fn(),
    } as unknown as Config;

    tool = new ExitPlanModeTool(mockConfig);
@ -147,6 +149,9 @@ describe('ExitPlanModeTool', () => {
        ApprovalMode.DEFAULT,
      );
      expect(approvalMode).toBe(ApprovalMode.DEFAULT);
+
+      // Plan should be saved to disk
+      expect(mockConfig.savePlan).toHaveBeenCalledWith(params.plan);
    });

    it('should request confirmation with plan details', async () => {
@ -173,6 +178,29 @@ describe('ExitPlanModeTool', () => {
      expect(approvalMode).toBe(ApprovalMode.AUTO_EDIT);
    });

+    it('should set DEFAULT mode on ProceedOnce regardless of pre-plan mode', async () => {
+      // Even if pre-plan mode was AUTO_EDIT, ProceedOnce ("manually approve
+      // edits") should always set DEFAULT to match the option label semantics.
+      (mockConfig.getPrePlanMode as ReturnType<typeof vi.fn>).mockReturnValue(
+        ApprovalMode.AUTO_EDIT,
+      );
+
+      const params: ExitPlanModeParams = { plan: 'Restore test' };
+      const signal = new AbortController().signal;
+
+      const invocation = tool.build(params);
+      const confirmation = await invocation.getConfirmationDetails(signal);
+
+      if (confirmation) {
+        await confirmation.onConfirm(ToolConfirmationOutcome.ProceedOnce);
+      }
+
+      expect(mockConfig.setApprovalMode).toHaveBeenCalledWith(
+        ApprovalMode.DEFAULT,
+      );
+      expect(approvalMode).toBe(ApprovalMode.DEFAULT);
+    });
+
    it('should remain in plan mode when confirmation is rejected', async () => {
      const params: ExitPlanModeParams = {
        plan: 'Remain in planning',
@ -199,6 +227,9 @@ describe('ExitPlanModeTool', () => {
        ApprovalMode.PLAN,
      );
      expect(approvalMode).toBe(ApprovalMode.PLAN);
+
+      // Plan should NOT be saved when rejected
+      expect(mockConfig.savePlan).not.toHaveBeenCalled();
    });

    it('should have correct description', () => {
--- a/packages/core/src/tools/exitPlanMode.ts
+++ b/packages/core/src/tools/exitPlanMode.ts
@ -147,6 +147,15 @@ class ExitPlanModeToolInvocation extends BaseToolInvocation<
        };
      }

+      // Persist the approved plan to disk
+      try {
+        this.config.savePlan(plan);
+      } catch (error) {
+        debugLogger.warn(
+          `[ExitPlanModeTool] Failed to save plan to disk: ${error instanceof Error ? error.message : String(error)}`,
+        );
+      }
+
      const llmMessage = `User has approved your plan. You can now start coding. Start with updating your todo list if applicable.`;
      const displayMessage = 'User approved the plan.';

--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-test-utils",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "private": true,
  "main": "src/index.ts",
  "license": "Apache-2.0",
--- a/packages/vscode-ide-companion/esbuild.js
+++ b/packages/vscode-ide-companion/esbuild.js
@ -179,7 +179,12 @@ async function main() {
    // Since @qwen-code/webui marks it as external in its own Vite build, the
    // browser bundle must also mark it external to avoid bundling Node.js-only
    // modules (undici, @grpc/grpc-js, fs, stream, etc.) into the webview.
-    external: ['@qwen-code/qwen-code-core'],
+    // The wildcard ensures deep sub-path imports (e.g.
+    // '@qwen-code/qwen-code-core/src/core/tokenLimits.js') are also excluded;
+    // without it esbuild only matches the bare package name and attempts to
+    // bundle the sub-path, which triggers "Dynamic require is not supported"
+    // at runtime in the browser.
+    external: ['@qwen-code/qwen-code-core', '@qwen-code/qwen-code-core/*'],
    logLevel: 'silent',
    plugins: [reactDedupPlugin, cssInjectPlugin, esbuildProblemMatcherPlugin],
    jsx: 'automatic', // Use new JSX transform (React 17+)
--- a/packages/vscode-ide-companion/package.json
+++ b/packages/vscode-ide-companion/package.json
@ -2,7 +2,7 @@
  "name": "qwen-code-vscode-ide-companion",
  "displayName": "Qwen Code Companion",
  "description": "Enable Qwen Code with direct access to your VS Code workspace.",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "publisher": "qwenlm",
  "icon": "assets/icon.png",
  "repository": {
--- a/packages/vscode-ide-companion/schemas/settings.schema.json
+++ b/packages/vscode-ide-companion/schemas/settings.schema.json
@ -183,7 +183,7 @@
        "enableFollowupSuggestions": {
          "description": "Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.",
          "type": "boolean",
-          "default": true
+          "default": false
        },
        "enableCacheSharing": {
          "description": "Use cache-aware forked queries for suggestion generation. Reduces cost on providers that support prefix caching (experimental).",
@ -388,6 +388,11 @@
              "default": true
            }
          }
+        },
+        "gapThresholdMinutes": {
+          "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
+          "type": "number",
+          "default": 5
        }
      }
    },
--- a/packages/vscode-ide-companion/src/utils/imageSupport.ts
+++ b/packages/vscode-ide-companion/src/utils/imageSupport.ts
@ -4,8 +4,6 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { isSupportedImageMimeType } from '@qwen-code/qwen-code-core/src/utils/request-tokenizer/supportedImageFormats.js';
-
 // ---------- Types ----------

 export interface ImageAttachment {
@ -61,6 +59,31 @@ export function unescapePath(filePath: string): string {
  );
 }

+// ---------- Supported image MIME types ----------
+// Inlined from @qwen-code/qwen-code-core to avoid pulling Node.js-only modules
+// into the browser webview bundle (esbuild marks core as external, but deep
+// sub-path imports like core/src/utils/... bypass the external filter and cause
+// "Dynamic require is not supported" at runtime).
+
+const SUPPORTED_IMAGE_MIME_TYPES: readonly string[] = [
+  'image/bmp',
+  'image/jpeg',
+  'image/jpg',
+  'image/png',
+  'image/tiff',
+  'image/webp',
+  'image/heic',
+];
+
+/**
+ * Check whether a MIME type is supported for pasted-image processing.
+ * @param mimeType - The MIME type string to validate
+ * @returns `true` when the type is in the supported list
+ */
+function isSupportedImageMimeType(mimeType: string): boolean {
+  return SUPPORTED_IMAGE_MIME_TYPES.includes(mimeType);
+}
+
 // ---------- Image format detection ----------

 const PASTED_IMAGE_MIME_TO_EXTENSION: Record<string, string> = {
--- a/packages/vscode-ide-companion/src/utils/tokenLimits.ts
+++ b/packages/vscode-ide-companion/src/utils/tokenLimits.ts
@ -0,0 +1,196 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Browser-safe subset of @qwen-code/qwen-code-core tokenLimits.
+ *
+ * The webview bundle (IIFE, platform: browser) cannot `require` Node.js
+ * packages. This module replicates the constants and logic the webview
+ * actually uses so that the core package never needs to be pulled into the
+ * browser bundle.
+ *
+ * Keep this file in sync with:
+ *   packages/core/src/core/tokenLimits.ts
+ */
+
+type TokenCount = number;
+
+// ---------------------------------------------------------------------------
+// Public constants
+// ---------------------------------------------------------------------------
+
+/** Default input context window size: 128 K tokens (power-of-two). */
+export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072;
+
+// ---------------------------------------------------------------------------
+// Token limit types
+// ---------------------------------------------------------------------------
+
+export type TokenLimitType = 'input' | 'output';
+
+// ---------------------------------------------------------------------------
+// Internal constants
+// ---------------------------------------------------------------------------
+
+const LIMITS = {
+  '32k': 32_768,
+  '64k': 65_536,
+  '128k': 131_072,
+  '192k': 196_608,
+  '200k': 200_000,
+  '256k': 262_144,
+  '272k': 272_000,
+  '400k': 400_000,
+  '512k': 524_288,
+  '1m': 1_000_000,
+  '4k': 4_096,
+  '8k': 8_192,
+  '16k': 16_384,
+} as const;
+
+const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000;
+
+// ---------------------------------------------------------------------------
+// Model name normaliser
+// ---------------------------------------------------------------------------
+
+/**
+ * Robust normaliser: strips provider prefixes, pipes/colons, date/version
+ * suffixes, quantisation markers, etc.
+ * @param model - Raw model identifier string
+ * @returns Normalised lowercase model name
+ */
+function normalize(model: string): string {
+  let s = (model ?? '').toLowerCase().trim();
+
+  s = s.replace(/^.*\//, '');
+  s = s.split('|').pop() ?? s;
+  s = s.split(':').pop() ?? s;
+  s = s.replace(/\s+/g, '-');
+  s = s.replace(/-preview/g, '');
+
+  if (
+    !s.match(/^qwen-(?:plus|flash|vl-max)-latest$/) &&
+    !s.match(/^kimi-k2-\d{4}$/)
+  ) {
+    s = s.replace(
+      /-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
+      '',
+    );
+  }
+
+  s = s.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, '');
+  return s;
+}
+
+// ---------------------------------------------------------------------------
+// Input context-window patterns (most specific → most general)
+// ---------------------------------------------------------------------------
+
+const INPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
+  // Google Gemini
+  [/^gemini-3/, LIMITS['1m']],
+  [/^gemini-/, LIMITS['1m']],
+
+  // OpenAI
+  [/^gpt-5/, LIMITS['272k']],
+  [/^gpt-/, LIMITS['128k']],
+  [/^o\d/, LIMITS['200k']],
+
+  // Anthropic Claude
+  [/^claude-/, LIMITS['200k']],
+
+  // Alibaba / Qwen
+  [/^qwen3-coder-plus/, LIMITS['1m']],
+  [/^qwen3-coder-flash/, LIMITS['1m']],
+  [/^qwen3\.\d/, LIMITS['1m']],
+  [/^qwen-plus-latest$/, LIMITS['1m']],
+  [/^qwen-flash-latest$/, LIMITS['1m']],
+  [/^coder-model$/, LIMITS['1m']],
+  [/^qwen3-max/, LIMITS['256k']],
+  [/^qwen3-coder-/, LIMITS['256k']],
+  [/^qwen/, LIMITS['256k']],
+
+  // DeepSeek
+  [/^deepseek/, LIMITS['128k']],
+
+  // Zhipu GLM
+  [/^glm-5/, 202_752 as TokenCount],
+  [/^glm-/, 202_752 as TokenCount],
+
+  // MiniMax
+  [/^minimax-m2\.5/i, LIMITS['192k']],
+  [/^minimax-/i, LIMITS['200k']],
+
+  // Moonshot / Kimi
+  [/^kimi-/, LIMITS['256k']],
+
+  // ByteDance Seed-OSS
+  [/^seed-oss/, LIMITS['512k']],
+];
+
+// ---------------------------------------------------------------------------
+// Output token-limit patterns
+// ---------------------------------------------------------------------------
+
+const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
+  [/^gemini-3/, LIMITS['64k']],
+  [/^gemini-/, LIMITS['8k']],
+
+  [/^gpt-5/, LIMITS['128k']],
+  [/^gpt-/, LIMITS['16k']],
+  [/^o\d/, LIMITS['128k']],
+
+  [/^claude-opus-4-6/, LIMITS['128k']],
+  [/^claude-sonnet-4-6/, LIMITS['64k']],
+  [/^claude-/, LIMITS['64k']],
+
+  [/^qwen3\.\d/, LIMITS['64k']],
+  [/^coder-model$/, LIMITS['64k']],
+  [/^qwen/, LIMITS['32k']],
+
+  [/^deepseek-reasoner/, LIMITS['64k']],
+  [/^deepseek-r1/, LIMITS['64k']],
+  [/^deepseek-chat/, LIMITS['8k']],
+
+  [/^glm-5/, LIMITS['16k']],
+  [/^glm-4\.7/, LIMITS['16k']],
+
+  [/^minimax-m2\.5/i, LIMITS['64k']],
+
+  [/^kimi-k2\.5/, LIMITS['32k']],
+];
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Return the token limit for a given model name.
+ *
+ * This is a browser-safe mirror of `tokenLimit()` in
+ * `@qwen-code/qwen-code-core`. The webview only calls this as a fallback
+ * when `modelInfo._meta.contextLimit` is unavailable.
+ *
+ * @param model - The model identifier string
+ * @param type  - 'input' for context window, 'output' for generation limit
+ * @returns Maximum token count for the model and type
+ */
+export function tokenLimit(
+  model: string,
+  type: TokenLimitType = 'input',
+): TokenCount {
+  const norm = normalize(model);
+  const patterns = type === 'output' ? OUTPUT_PATTERNS : INPUT_PATTERNS;
+
+  for (const [regex, limit] of patterns) {
+    if (regex.test(norm)) {
+      return limit;
+    }
+  }
+
+  return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
+}
--- a/packages/vscode-ide-companion/src/webview/App.tsx
+++ b/packages/vscode-ide-companion/src/webview/App.tsx
@ -52,10 +52,7 @@ import type { ApprovalModeValue } from '../types/approvalModeValueTypes.js';
 import type { PlanEntry, UsageStatsPayload } from '../types/chatTypes.js';
 import type { ModelInfo, AvailableCommand } from '@agentclientprotocol/sdk';
 import type { Question } from '../types/acpTypes.js';
-import {
-  DEFAULT_TOKEN_LIMIT,
-  tokenLimit,
-} from '@qwen-code/qwen-code-core/src/core/tokenLimits.js';
+import { DEFAULT_TOKEN_LIMIT, tokenLimit } from '../utils/tokenLimits.js';
 import { useImagePaste, type WebViewImageMessage } from './hooks/useImage.js';

 export const App: React.FC = () => {
--- a/packages/web-templates/package.json
+++ b/packages/web-templates/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/web-templates",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Web templates bundled as embeddable JS/CSS strings",
  "repository": {
    "type": "git",
--- a/packages/webui/package.json
+++ b/packages/webui/package.json
@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/webui",
-  "version": "0.14.1",
+  "version": "0.14.2",
  "description": "Shared UI components for Qwen Code packages",
  "type": "module",
  "main": "./dist/index.cjs",
--- a/packages/webui/src/hooks/useFollowupSuggestions.ts
+++ b/packages/webui/src/hooks/useFollowupSuggestions.ts
@ -112,7 +112,6 @@ function createFollowupController(
        suggestion_length: text.length,
      });
    } catch (e: unknown) {
-       
      console.error('[followup] onOutcome callback threw:', e);
    }

@ -122,7 +121,6 @@ function createFollowupController(
      try {
        getOnAccept?.()?.(text);
      } catch (error: unknown) {
-         
        console.error('[followup] onAccept callback threw:', error);
      } finally {
        if (acceptTimeoutId) {
@ -154,7 +152,6 @@ function createFollowupController(
          suggestion_length: currentState.suggestion.length,
        });
      } catch (e: unknown) {
-         
        console.error('[followup] onOutcome callback threw:', e);
      }
    }