diff --git a/docs/users/configuration/model-providers.md b/docs/users/configuration/model-providers.md
index 53f2c5bcd..bcfc2cc75 100644
--- a/docs/users/configuration/model-providers.md
+++ b/docs/users/configuration/model-providers.md
@@ -63,6 +63,9 @@ This auth type supports not only OpenAI's official API but also any OpenAI-compa
           "maxRetries": 3,
           "enableCacheControl": true,
           "contextWindowSize": 128000,
+          "modalities": {
+            "image": true
+          },
           "customHeaders": {
             "X-Client-Request-ID": "req-123"
           },
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
index 6862e0647..edca4aedd 100644
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@@ -125,18 +125,18 @@ Settings are organized into categories. All settings should be placed within the
 
 #### model
 
-| Setting                                            | Type    | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Default     |
-| -------------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- |
-| `model.name`                                       | string  | The Qwen model to use for conversations.                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `undefined` |
-| `model.maxSessionTurns`                            | number  | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited.                                                                                                                                                                                                                                                                                                                                                                                                                            | `-1`        |
-| `model.summarizeToolOutput`                        | object  | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}`                                                                                                                                                                                                                                     | `undefined` |
-| `model.generationConfig`                           | object  | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `enableCacheControl`, `contextWindowSize` (override model's context window size), `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` |
-| `model.chatCompression.contextPercentageThreshold` | number  | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely.                                                                                                                                       | `0.7`       |
-| `model.skipNextSpeakerCheck`                       | boolean | Skip the next speaker check.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false`     |
-| `model.skipLoopDetection`                          | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions.                                                                                                                                                                                                                                                      | `false`     |
-| `model.skipStartupContext`                         | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup.                                                                                                                                                                                                                                                                                             | `false`     |
-| `model.enableOpenAILogging`                        | boolean | Enables logging of OpenAI API calls for debugging and analysis. When enabled, API requests and responses are logged to JSON files.                                                                                                                                                                                                                                                                                                                                                                           | `false`     |
-| `model.openAILoggingDir`                           | string  | Custom directory path for OpenAI API logs. If not specified, defaults to `logs/openai` in the current working directory. Supports absolute paths, relative paths (resolved from current working directory), and `~` expansion (home directory).                                                                                                                                                                                                                                                              | `undefined` |
+| Setting                                            | Type    | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | Default     |
+| -------------------------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- |
+| `model.name`                                       | string  | The Qwen model to use for conversations.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | `undefined` |
+| `model.maxSessionTurns`                            | number  | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | `-1`        |
+| `model.summarizeToolOutput`                        | object  | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}`                                                                                                                                                                                                                                                                                             | `undefined` |
+| `model.generationConfig`                           | object  | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `enableCacheControl`, `contextWindowSize` (override model's context window size), `modalities` (override auto-detected input modalities), `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` |
+| `model.chatCompression.contextPercentageThreshold` | number  | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely.                                                                                                                                                                                               | `0.7`       |
+| `model.skipNextSpeakerCheck`                       | boolean | Skip the next speaker check.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false`     |
+| `model.skipLoopDetection`                          | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions.                                                                                                                                                                                                                                                                                                              | `false`     |
+| `model.skipStartupContext`                         | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup.                                                                                                                                                                                                                                                                                                                                                     | `false`     |
+| `model.enableOpenAILogging`                        | boolean | Enables logging of OpenAI API calls for debugging and analysis. When enabled, API requests and responses are logged to JSON files.                                                                                                                                                                                                                                                                                                                                                                                                                                   | `false`     |
+| `model.openAILoggingDir`                           | string  | Custom directory path for OpenAI API logs. If not specified, defaults to `logs/openai` in the current working directory. Supports absolute paths, relative paths (resolved from current working directory), and `~` expansion (home directory).                                                                                                                                                                                                                                                                                                                      | `undefined` |
 
 **Example model.generationConfig:**
 
@@ -146,6 +146,9 @@ Settings are organized into categories. All settings should be placed within the
     "generationConfig": {
       "timeout": 60000,
       "contextWindowSize": 128000,
+      "modalities": {
+        "image": true
+      },
       "enableCacheControl": true,
       "customHeaders": {
         "X-Client-Request-ID": "req-123"
@@ -167,6 +170,10 @@ Settings are organized into categories. All settings should be placed within the
 
 Overrides the default context window size for the selected model. Qwen Code determines the context window using built-in defaults based on model name matching, with a constant fallback value. Use this setting when a provider's effective context limit differs from Qwen Code's default. This value defines the model's assumed maximum context capacity, not a per-request token limit.
 
+**modalities:**
+
+Overrides the auto-detected input modalities for the selected model. Qwen Code automatically detects supported modalities (image, PDF, audio, video) based on model name pattern matching. Use this setting when the auto-detection is incorrect — for example, to enable `pdf` for a model that supports it but isn't recognized. Format: `{ "image": true, "pdf": true, "audio": true, "video": true }`. Omit a key or set it to `false` for unsupported types.
+
 **customHeaders:**
 
 Allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels.
diff --git a/docs/users/overview.md b/docs/users/overview.md
index 3b45cc2f0..f3c52be91 100644
--- a/docs/users/overview.md
+++ b/docs/users/overview.md
@@ -7,25 +7,24 @@
 
 ## Get started in 30 seconds
 
-Prerequisites:
-
-- A [Qwen Code](https://chat.qwen.ai/auth?mode=register) account
-- Requires [Node.js 20+](https://nodejs.org/zh-cn/download), you can use `node -v` to check the version. If it's not installed, use the following command to install it.
-
 ### Install Qwen Code:
 
-**NPM**(recommended)
+**Linux / macOS**
 
-```bash
-npm install -g @qwen-code/qwen-code@latest
+```sh
+curl -fsSL https://qwen-code-assets.oss-cn-hangzhou.aliyuncs.com/installation/install-qwen.sh | bash
 ```
 
-**Homebrew**(macOS, Linux)
+**Windows (Run as Administrator CMD)**
 
-```bash
-brew install qwen-code
+```sh
+curl -fsSL -o %TEMP%\install-qwen.bat https://qwen-code-assets.oss-cn-hangzhou.aliyuncs.com/installation/install-qwen.bat && %TEMP%\install-qwen.bat
 ```
 
+> [!note]
+>
+> It's recommended to restart your terminal after installation to ensure environment variables take effect. If the installation fails, please refer to [Manual Installation](./quickstart#manual-installation) in the Quickstart guide.
+
 ### Start using Qwen Code:
 
 ```bash
diff --git a/docs/users/quickstart.md b/docs/users/quickstart.md
index eac8f9474..3c4eafcea 100644
--- a/docs/users/quickstart.md
+++ b/docs/users/quickstart.md
@@ -16,19 +16,39 @@ Make sure you have:
 
 To install Qwen Code, use one of the following methods:
 
-### NPM (recommended)
+### Quick Install (Recommended)
 
-Requires [Node.js 20+](https://nodejs.org/download), you can use `node -v` check the version. If it's not installed, use the following command to install it.
-
-If you have [Node.js or newer installed](https://nodejs.org/en/download/):
+**Linux / macOS**
 
 ```sh
+curl -fsSL https://qwen-code-assets.oss-cn-hangzhou.aliyuncs.com/installation/install-qwen.sh | bash
+```
+
+**Windows (Run as Administrator CMD)**
+
+```sh
+curl -fsSL -o %TEMP%\install-qwen.bat https://qwen-code-assets.oss-cn-hangzhou.aliyuncs.com/installation/install-qwen.bat && %TEMP%\install-qwen.bat
+```
+
+> [!note]
+>
+> It's recommended to restart your terminal after installation to ensure environment variables take effect.
+
+### Manual Installation
+
+**Prerequisites**
+
+Make sure you have Node.js 20 or later installed. Download it from [nodejs.org](https://nodejs.org/en/download).
+
+**NPM**
+
+```bash
 npm install -g @qwen-code/qwen-code@latest
 ```
 
-### Homebrew (macOS, Linux)
+**Homebrew (macOS, Linux)**
 
-```sh
+```bash
 brew install qwen-code
 ```
 
diff --git a/docs/users/reference/keyboard-shortcuts.md b/docs/users/reference/keyboard-shortcuts.md
index f0cbd7b16..fdfc41b87 100644
--- a/docs/users/reference/keyboard-shortcuts.md
+++ b/docs/users/reference/keyboard-shortcuts.md
@@ -40,6 +40,7 @@ This document lists the available keyboard shortcuts in Qwen Code.
 | `Ctrl+N`                                           | Navigate down through the input history.                                                                                            |
 | `Ctrl+P`                                           | Navigate up through the input history.                                                                                              |
 | `Ctrl+R`                                           | Reverse search through input/shell history.                                                                                         |
+| `Ctrl+Y`                                           | Retry the last failed request.                                                                                                      |
 | `Ctrl+Right Arrow` / `Meta+Right Arrow` / `Meta+F` | Move the cursor one word to the right.                                                                                              |
 | `Ctrl+U`                                           | Delete from the cursor to the beginning of the line.                                                                                |
 | `Ctrl+V` (Windows: `Alt+V`)                        | Paste clipboard content. If the clipboard contains an image, it will be saved and a reference to it will be inserted in the prompt. |
diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts
index a7ae2cf4c..11878017a 100644
--- a/packages/cli/src/acp-integration/acpAgent.ts
+++ b/packages/cli/src/acp-integration/acpAgent.ts
@@ -107,6 +107,10 @@ class GeminiAgent {
           audio: true,
           embeddedContext: true,
         },
+        sessionCapabilities: {
+          list: {},
+          resume: {},
+        },
       },
     };
   }
@@ -153,10 +157,14 @@ class GeminiAgent {
 
     const session = await this.createAndStoreSession(config);
     const availableModels = this.buildAvailableModels(config);
+    const modesData = this.buildModesData(config);
+    const configOptions = this.buildConfigOptions(config);
 
     return {
       sessionId: session.getId(),
       models: availableModels,
+      modes: modesData,
+      configOptions,
     };
   }
 
@@ -239,25 +247,31 @@ class GeminiAgent {
   async listSessions(
     params: acp.ListSessionsRequest,
   ): Promise<acp.ListSessionsResponse> {
-    const sessionService = new SessionService(params.cwd);
+    const cwd = params.cwd || process.cwd();
+    const sessionService = new SessionService(cwd);
     const result = await sessionService.listSessions({
       cursor: params.cursor,
       size: params.size,
     });
 
+    const sessions = result.items.map((item) => ({
+      cwd: item.cwd,
+      filePath: item.filePath,
+      gitBranch: item.gitBranch,
+      messageCount: item.messageCount,
+      mtime: item.mtime,
+      prompt: item.prompt,
+      sessionId: item.sessionId,
+      startTime: item.startTime,
+      title: item.prompt || '(session)',
+      updatedAt: new Date(item.mtime).toISOString(),
+    }));
+
     return {
-      items: result.items.map((item) => ({
-        sessionId: item.sessionId,
-        cwd: item.cwd,
-        startTime: item.startTime,
-        mtime: item.mtime,
-        prompt: item.prompt,
-        gitBranch: item.gitBranch,
-        filePath: item.filePath,
-        messageCount: item.messageCount,
-      })),
-      nextCursor: result.nextCursor,
       hasMore: result.hasMore,
+      items: sessions,
+      nextCursor: result.nextCursor,
+      sessions,
     };
   }
 
@@ -449,6 +463,70 @@ class GeminiAgent {
     };
   }
 
+  private buildModesData(config: Config): acp.ModesData {
+    const currentApprovalMode = config.getApprovalMode();
+
+    const availableModes = APPROVAL_MODES.map((mode) => ({
+      id: mode as ApprovalModeValue,
+      name: APPROVAL_MODE_INFO[mode].name,
+      description: APPROVAL_MODE_INFO[mode].description,
+    }));
+
+    return {
+      currentModeId: currentApprovalMode as ApprovalModeValue,
+      availableModes,
+    };
+  }
+
+  private buildConfigOptions(config: Config): acp.ConfigOption[] {
+    const currentApprovalMode = config.getApprovalMode();
+    const currentModelId = this.formatCurrentModelId(
+      config.getModel() || this.config.getModel() || '',
+      config.getAuthType(),
+    );
+
+    const modeOptions = APPROVAL_MODES.map((mode) => ({
+      value: mode,
+      name: APPROVAL_MODE_INFO[mode].name,
+      description: APPROVAL_MODE_INFO[mode].description,
+    }));
+
+    const allConfiguredModels = config.getAllConfiguredModels();
+    const modelOptions = allConfiguredModels.map((model) => {
+      const effectiveModelId =
+        model.isRuntimeModel && model.runtimeSnapshotId
+          ? model.runtimeSnapshotId
+          : model.id;
+
+      return {
+        value: formatAcpModelId(effectiveModelId, model.authType),
+        name: model.label,
+        description: model.description ?? '',
+      };
+    });
+
+    return [
+      {
+        id: 'mode',
+        name: 'Mode',
+        description: 'Session permission mode',
+        category: 'mode',
+        type: 'select',
+        currentValue: currentApprovalMode,
+        options: modeOptions,
+      },
+      {
+        id: 'model',
+        name: 'Model',
+        description: 'AI model to use',
+        category: 'model',
+        type: 'select',
+        currentValue: currentModelId,
+        options: modelOptions,
+      },
+    ];
+  }
+
   private formatCurrentModelId(
     baseModelId: string,
     authType?: AuthType,
diff --git a/packages/cli/src/acp-integration/schema.ts b/packages/cli/src/acp-integration/schema.ts
index 952ad0bd5..1df709c45 100644
--- a/packages/cli/src/acp-integration/schema.ts
+++ b/packages/cli/src/acp-integration/schema.ts
@@ -59,7 +59,7 @@ export type CancelNotification = z.infer<typeof cancelNotificationSchema>;
 
 export type AuthenticateRequest = z.infer<typeof authenticateRequestSchema>;
 
-export type NewSessionResponse = z.infer<typeof newSessionResponseSchema>;
+// Note: NewSessionResponse type is defined later after newSessionResponseSchema
 
 export type LoadSessionResponse = z.infer<typeof loadSessionResponseSchema>;
 
@@ -285,33 +285,33 @@ export const sessionModelStateSchema = z.object({
   currentModelId: modelIdSchema,
 });
 
-export const newSessionResponseSchema = z.object({
-  sessionId: z.string(),
-  models: sessionModelStateSchema,
-});
+// Note: newSessionResponseSchema is defined later in the file after modesDataSchema
 
 export const loadSessionResponseSchema = z.null();
 
 export const sessionListItemSchema = z.object({
   cwd: z.string(),
-  filePath: z.string(),
+  filePath: z.string().optional(),
   gitBranch: z.string().optional(),
-  messageCount: z.number(),
-  mtime: z.number(),
-  prompt: z.string(),
+  messageCount: z.number().optional(),
+  mtime: z.number().optional(),
+  prompt: z.string().optional(),
   sessionId: z.string(),
-  startTime: z.string(),
+  startTime: z.string().optional(),
+  title: z.string(),
+  updatedAt: z.string(),
 });
 
 export const listSessionsResponseSchema = z.object({
-  hasMore: z.boolean(),
-  items: z.array(sessionListItemSchema),
+  hasMore: z.boolean().optional(),
+  items: z.array(sessionListItemSchema).optional(),
   nextCursor: z.number().optional(),
+  sessions: z.array(sessionListItemSchema),
 });
 
 export const listSessionsRequestSchema = z.object({
   cursor: z.number().optional(),
-  cwd: z.string(),
+  cwd: z.string().optional(),
   size: z.number().optional(),
 });
 
@@ -405,6 +405,12 @@ export const promptCapabilitiesSchema = z.object({
 export const agentCapabilitiesSchema = z.object({
   loadSession: z.boolean().optional(),
   promptCapabilities: promptCapabilitiesSchema.optional(),
+  sessionCapabilities: z
+    .object({
+      list: z.object({}).optional(),
+      resume: z.object({}).optional(),
+    })
+    .optional(),
 });
 
 export const authMethodSchema = z.object({
@@ -451,6 +457,34 @@ export const modesDataSchema = z.object({
   availableModes: z.array(modeInfoSchema),
 });
 
+export const configOptionSchema = z.object({
+  id: z.string(),
+  name: z.string(),
+  description: z.string(),
+  category: z.string(),
+  type: z.string(),
+  currentValue: z.string(),
+  options: z.array(
+    z.object({
+      value: z.string(),
+      name: z.string(),
+      description: z.string(),
+    }),
+  ),
+});
+
+export type ConfigOption = z.infer<typeof configOptionSchema>;
+
+// newSessionResponseSchema includes modes and configOptions for ACP/Zed integration
+export const newSessionResponseSchema = z.object({
+  sessionId: z.string(),
+  models: sessionModelStateSchema,
+  modes: modesDataSchema,
+  configOptions: z.array(configOptionSchema),
+});
+
+export type NewSessionResponse = z.infer<typeof newSessionResponseSchema>;
+
 export const agentInfoSchema = z.object({
   name: z.string(),
   title: z.string(),
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 2440d6804..48961cdca 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -693,14 +693,21 @@ export async function loadCliConfig(
   }
 
   // Automatically load output-language.md if it exists
-  let outputLanguageFilePath: string | undefined = path.join(
+  const projectStorage = new Storage(cwd);
+  const projectOutputLanguagePath = path.join(
+    projectStorage.getQwenDir(),
+    'output-language.md',
+  );
+  const globalOutputLanguagePath = path.join(
     Storage.getGlobalQwenDir(),
     'output-language.md',
   );
-  if (fs.existsSync(outputLanguageFilePath)) {
-    // output-language.md found - will be added to context files
-  } else {
-    outputLanguageFilePath = undefined;
+
+  let outputLanguageFilePath: string | undefined;
+  if (fs.existsSync(projectOutputLanguagePath)) {
+    outputLanguageFilePath = projectOutputLanguagePath;
+  } else if (fs.existsSync(globalOutputLanguagePath)) {
+    outputLanguageFilePath = globalOutputLanguagePath;
   }
 
   const fileService = new FileDiscoveryService(cwd);
diff --git a/packages/cli/src/config/keyBindings.ts b/packages/cli/src/config/keyBindings.ts
index 226727c5b..7499a8c68 100644
--- a/packages/cli/src/config/keyBindings.ts
+++ b/packages/cli/src/config/keyBindings.ts
@@ -50,6 +50,7 @@ export enum Command {
   QUIT = 'quit',
   EXIT = 'exit',
   SHOW_MORE_LINES = 'showMoreLines',
+  RETRY_LAST = 'retryLast',
 
   // Shell commands
   REVERSE_SEARCH = 'reverseSearch',
@@ -170,6 +171,7 @@ export const defaultKeyBindings: KeyBindingConfig = {
   [Command.QUIT]: [{ key: 'c', ctrl: true }],
   [Command.EXIT]: [{ key: 'd', ctrl: true }],
   [Command.SHOW_MORE_LINES]: [{ key: 's', ctrl: true }],
+  [Command.RETRY_LAST]: [{ key: 'y', ctrl: true }],
 
   // Shell commands
   [Command.REVERSE_SEARCH]: [{ key: 'r', ctrl: true }],
diff --git a/packages/cli/src/i18n/locales/de.js b/packages/cli/src/i18n/locales/de.js
index 660f3ff95..b72aa6042 100644
--- a/packages/cli/src/i18n/locales/de.js
+++ b/packages/cli/src/i18n/locales/de.js
@@ -1041,6 +1041,17 @@ export default {
   '(default)': '(Standard)',
   '(set)': '(gesetzt)',
   '(not set)': '(nicht gesetzt)',
+  Modality: 'Modalität',
+  'Context Window': 'Kontextfenster',
+  text: 'Text',
+  'text-only': 'nur Text',
+  image: 'Bild',
+  pdf: 'PDF',
+  audio: 'Audio',
+  video: 'Video',
+  'not set': 'nicht gesetzt',
+  none: 'keine',
+  unknown: 'unbekannt',
   "Failed to switch model to '{{modelId}}'.\n\n{{error}}":
     "Modell konnte nicht auf '{{modelId}}' umgestellt werden.\n\n{{error}}",
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
diff --git a/packages/cli/src/i18n/locales/en.js b/packages/cli/src/i18n/locales/en.js
index 328c73150..c4bb0f144 100644
--- a/packages/cli/src/i18n/locales/en.js
+++ b/packages/cli/src/i18n/locales/en.js
@@ -1028,6 +1028,17 @@ export default {
   '(default)': '(default)',
   '(set)': '(set)',
   '(not set)': '(not set)',
+  Modality: 'Modality',
+  'Context Window': 'Context Window',
+  text: 'text',
+  'text-only': 'text-only',
+  image: 'image',
+  pdf: 'pdf',
+  audio: 'audio',
+  video: 'video',
+  'not set': 'not set',
+  none: 'none',
+  unknown: 'unknown',
   "Failed to switch model to '{{modelId}}'.\n\n{{error}}":
     "Failed to switch model to '{{modelId}}'.\n\n{{error}}",
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
@@ -1391,6 +1402,9 @@ export default {
   'Rate limit error: {{reason}}': 'Rate limit error: {{reason}}',
   'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})':
     'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})',
+  'Press Ctrl+Y to retry': 'Press Ctrl+Y to retry',
+  'No failed request to retry.': 'No failed request to retry.',
+  'to retry last request': 'to retry last request',
 
   // ============================================================================
   // Coding Plan Authentication
diff --git a/packages/cli/src/i18n/locales/ja.js b/packages/cli/src/i18n/locales/ja.js
index 5ca0dd739..e70a87f81 100644
--- a/packages/cli/src/i18n/locales/ja.js
+++ b/packages/cli/src/i18n/locales/ja.js
@@ -737,6 +737,17 @@ export default {
   // Dialogs - Model
   'Select Model': 'モデルを選択',
   '(Press Esc to close)': '(Esc で閉じる)',
+  Modality: 'モダリティ',
+  'Context Window': 'コンテキストウィンドウ',
+  text: 'テキスト',
+  'text-only': 'テキストのみ',
+  image: '画像',
+  pdf: 'PDF',
+  audio: '音声',
+  video: '動画',
+  'not set': '未設定',
+  none: 'なし',
+  unknown: '不明',
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
     'Qwen 3.5 Plus — 効率的なハイブリッドモデル、業界トップクラスのコーディング性能',
   'The latest Qwen Vision model from Alibaba Cloud ModelStudio (version: qwen3-vl-plus-2025-09-23)':
diff --git a/packages/cli/src/i18n/locales/pt.js b/packages/cli/src/i18n/locales/pt.js
index f8d24b647..ec929c45e 100644
--- a/packages/cli/src/i18n/locales/pt.js
+++ b/packages/cli/src/i18n/locales/pt.js
@@ -1044,6 +1044,17 @@ export default {
   '(default)': '(padrão)',
   '(set)': '(definido)',
   '(not set)': '(não definido)',
+  Modality: 'Modalidade',
+  'Context Window': 'Janela de Contexto',
+  text: 'texto',
+  'text-only': 'somente texto',
+  image: 'imagem',
+  pdf: 'PDF',
+  audio: 'áudio',
+  video: 'vídeo',
+  'not set': 'não definido',
+  none: 'nenhum',
+  unknown: 'desconhecido',
   "Failed to switch model to '{{modelId}}'.\n\n{{error}}":
     "Falha ao trocar o modelo para '{{modelId}}'.\n\n{{error}}",
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
diff --git a/packages/cli/src/i18n/locales/ru.js b/packages/cli/src/i18n/locales/ru.js
index dfb40901a..92e8fa66b 100644
--- a/packages/cli/src/i18n/locales/ru.js
+++ b/packages/cli/src/i18n/locales/ru.js
@@ -1043,6 +1043,17 @@ export default {
   '(default)': '(по умолчанию)',
   '(set)': '(установлено)',
   '(not set)': '(не задано)',
+  Modality: 'Модальность',
+  'Context Window': 'Контекстное окно',
+  text: 'текст',
+  'text-only': 'только текст',
+  image: 'изображение',
+  pdf: 'PDF',
+  audio: 'аудио',
+  video: 'видео',
+  'not set': 'не задано',
+  none: 'нет',
+  unknown: 'неизвестно',
   "Failed to switch model to '{{modelId}}'.\n\n{{error}}":
     "Не удалось переключиться на модель '{{modelId}}'.\n\n{{error}}",
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
diff --git a/packages/cli/src/i18n/locales/zh.js b/packages/cli/src/i18n/locales/zh.js
index 0a251860b..141c0c8d4 100644
--- a/packages/cli/src/i18n/locales/zh.js
+++ b/packages/cli/src/i18n/locales/zh.js
@@ -967,6 +967,17 @@ export default {
   '(default)': '(默认)',
   '(set)': '(已设置)',
   '(not set)': '(未设置)',
+  Modality: '模态',
+  'Context Window': '上下文窗口',
+  text: '文本',
+  'text-only': '纯文本',
+  image: '图像',
+  pdf: 'PDF',
+  audio: '音频',
+  video: '视频',
+  'not set': '未设置',
+  none: '无',
+  unknown: '未知',
   "Failed to switch model to '{{modelId}}'.\n\n{{error}}":
     "无法切换到模型 '{{modelId}}'.\n\n{{error}}",
   'Qwen 3.5 Plus — efficient hybrid model with leading coding performance':
@@ -1223,6 +1234,9 @@ export default {
   'Rate limit error: {{reason}}': '触发限流：{{reason}}',
   'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})':
     '将于 {{seconds}} 秒后重试…（第 {{attempt}}/{{maxRetries}} 次）',
+  'Press Ctrl+Y to retry': '按 Ctrl+Y 重试。',
+  'No failed request to retry.': '没有可重试的失败请求。',
+  'to retry last request': '重试上一次请求',
 
   // ============================================================================
   // Coding Plan Authentication
diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx
index 1edec79f9..9e9d4f673 100644
--- a/packages/cli/src/ui/AppContainer.test.tsx
+++ b/packages/cli/src/ui/AppContainer.test.tsx
@@ -209,6 +209,7 @@ describe('AppContainer State Management', () => {
       pendingHistoryItems: [],
       thought: null,
       cancelOngoingRequest: vi.fn(),
+      retryLastPrompt: vi.fn(),
     });
     mockedUseVim.mockReturnValue({ handleInput: vi.fn() });
     mockedUseFolderTrust.mockReturnValue({
@@ -607,6 +608,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: { subject: thoughtSubject },
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -652,6 +654,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: null,
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -698,6 +701,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: { subject: thoughtSubject },
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -744,6 +748,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: { subject: shortTitle },
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -794,6 +799,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: { subject: title },
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -841,6 +847,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: null,
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
       });
 
       // Act: Render the container
@@ -882,6 +889,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: null,
         cancelOngoingRequest: vi.fn(),
+        retryLastPrompt: vi.fn(),
         activePtyId: 'some-id',
       });
 
@@ -1013,6 +1021,7 @@ describe('AppContainer State Management', () => {
         pendingHistoryItems: [],
         thought: null,
         cancelOngoingRequest: mockCancelOngoingRequest,
+        retryLastPrompt: vi.fn(),
       });
 
       const mockHandleSlashCommand = vi.fn();
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index 2ab8eeec4..781aab375 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -629,6 +629,7 @@ export const AppContainer = (props: AppContainerProps) => {
     pendingHistoryItems: pendingGeminiHistoryItems,
     thought,
     cancelOngoingRequest,
+    retryLastPrompt,
     handleApprovalModeChange,
     activePtyId,
     loopDetectionConfirmationRequest,
@@ -1532,6 +1533,7 @@ export const AppContainer = (props: AppContainerProps) => {
       onSuggestionsVisibilityChange: setHasSuggestionsVisible,
       refreshStatic,
       handleFinalSubmit,
+      handleRetryLastPrompt: retryLastPrompt,
       handleClearScreen,
       // Welcome back dialog
       handleWelcomeBackSelection,
@@ -1575,6 +1577,7 @@ export const AppContainer = (props: AppContainerProps) => {
       handleEscapePromptChange,
       refreshStatic,
       handleFinalSubmit,
+      retryLastPrompt,
       handleClearScreen,
       handleWelcomeBackSelection,
       handleWelcomeBackClose,
diff --git a/packages/cli/src/ui/auth/AuthDialog.test.tsx b/packages/cli/src/ui/auth/AuthDialog.test.tsx
index e063b2a19..90b15c968 100644
--- a/packages/cli/src/ui/auth/AuthDialog.test.tsx
+++ b/packages/cli/src/ui/auth/AuthDialog.test.tsx
@@ -32,6 +32,7 @@ const createMockUIActions = (overrides: Partial<UIActions> = {}): UIActions => {
   // AuthDialog only uses handleAuthSelect
   const baseActions = {
     handleAuthSelect: vi.fn(),
+    handleRetryLastPrompt: vi.fn(),
   } as Partial<UIActions>;
 
   return {
diff --git a/packages/cli/src/ui/components/AppHeader.tsx b/packages/cli/src/ui/components/AppHeader.tsx
index ba044d10d..0254a2012 100644
--- a/packages/cli/src/ui/components/AppHeader.tsx
+++ b/packages/cli/src/ui/components/AppHeader.tsx
@@ -5,16 +5,43 @@
  */
 
 import { Box } from 'ink';
-import { Header } from './Header.js';
+import { AuthType } from '@qwen-code/qwen-code-core';
+import { Header, AuthDisplayType } from './Header.js';
 import { Tips } from './Tips.js';
 import { useSettings } from '../contexts/SettingsContext.js';
 import { useConfig } from '../contexts/ConfigContext.js';
 import { useUIState } from '../contexts/UIStateContext.js';
+import { isCodingPlanConfig } from '../../constants/codingPlan.js';
 
 interface AppHeaderProps {
   version: string;
 }
 
+/**
+ * Determine the auth display type based on auth type and configuration.
+ */
+function getAuthDisplayType(
+  authType?: AuthType,
+  baseUrl?: string,
+  apiKeyEnvKey?: string,
+): AuthDisplayType {
+  if (!authType) {
+    return AuthDisplayType.UNKNOWN;
+  }
+
+  // Check if it's a Coding Plan config
+  if (isCodingPlanConfig(baseUrl, apiKeyEnvKey)) {
+    return AuthDisplayType.CODING_PLAN;
+  }
+
+  switch (authType) {
+    case AuthType.QWEN_OAUTH:
+      return AuthDisplayType.QWEN_OAUTH;
+    default:
+      return AuthDisplayType.API_KEY;
+  }
+}
+
 export const AppHeader = ({ version }: AppHeaderProps) => {
   const settings = useSettings();
   const config = useConfig();
@@ -27,12 +54,18 @@ export const AppHeader = ({ version }: AppHeaderProps) => {
   const showBanner = !config.getScreenReader();
   const showTips = !(settings.merged.ui?.hideTips || config.getScreenReader());
 
+  const authDisplayType = getAuthDisplayType(
+    authType,
+    contentGeneratorConfig?.baseUrl,
+    contentGeneratorConfig?.apiKeyEnvKey,
+  );
+
   return (
     <Box flexDirection="column">
       {showBanner && (
         <Header
           version={version}
-          authType={authType}
+          authDisplayType={authDisplayType}
           model={model}
           workingDirectory={targetDir}
         />
diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx
index 1db02d6f9..67d992dbe 100644
--- a/packages/cli/src/ui/components/Composer.test.tsx
+++ b/packages/cli/src/ui/components/Composer.test.tsx
@@ -117,6 +117,7 @@ const createMockUIState = (overrides: Partial<UIState> = {}): UIState =>
 const createMockUIActions = (): UIActions =>
   ({
     handleFinalSubmit: vi.fn(),
+    handleRetryLastPrompt: vi.fn(),
     handleClearScreen: vi.fn(),
     setShellModeActive: vi.fn(),
     onEscapePromptChange: vi.fn(),
diff --git a/packages/cli/src/ui/components/Header.test.tsx b/packages/cli/src/ui/components/Header.test.tsx
index 1d3a4d7f1..99bb053da 100644
--- a/packages/cli/src/ui/components/Header.test.tsx
+++ b/packages/cli/src/ui/components/Header.test.tsx
@@ -6,8 +6,7 @@
 
 import { render } from 'ink-testing-library';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { AuthType } from '@qwen-code/qwen-code-core';
-import { Header } from './Header.js';
+import { Header, AuthDisplayType } from './Header.js';
 import * as useTerminalSize from '../hooks/useTerminalSize.js';
 
 vi.mock('../hooks/useTerminalSize.js');
@@ -15,86 +14,70 @@ const useTerminalSizeMock = vi.mocked(useTerminalSize.useTerminalSize);
 
 const defaultProps = {
   version: '1.0.0',
-  authType: AuthType.QWEN_OAUTH,
+  authDisplayType: AuthDisplayType.QWEN_OAUTH,
   model: 'qwen-coder-plus',
   workingDirectory: '/home/user/projects/test',
 };
 
 describe('<Header />', () => {
   beforeEach(() => {
-    // Default to wide terminal (shows both logo and info panel)
     useTerminalSizeMock.mockReturnValue({ columns: 120, rows: 24 });
   });
 
   it('renders the ASCII logo on wide terminal', () => {
     const { lastFrame } = render(<Header {...defaultProps} />);
-    // Check that parts of the shortAsciiLogo are rendered
     expect(lastFrame()).toContain('██╔═══██╗');
   });
 
   it('hides the ASCII logo on narrow terminal', () => {
     useTerminalSizeMock.mockReturnValue({ columns: 60, rows: 24 });
     const { lastFrame } = render(<Header {...defaultProps} />);
-    // Should not contain the logo but still show the info panel
     expect(lastFrame()).not.toContain('██╔═══██╗');
     expect(lastFrame()).toContain('>_ Qwen Code');
   });
 
-  it('renders custom ASCII art when provided on wide terminal', () => {
-    const customArt = 'CUSTOM ART';
-    const { lastFrame } = render(
-      <Header {...defaultProps} customAsciiArt={customArt} />,
-    );
-    expect(lastFrame()).toContain(customArt);
-  });
-
   it('displays the version number', () => {
     const { lastFrame } = render(<Header {...defaultProps} />);
     expect(lastFrame()).toContain('v1.0.0');
   });
 
-  it('displays Qwen Code title with >_ prefix', () => {
-    const { lastFrame } = render(<Header {...defaultProps} />);
-    expect(lastFrame()).toContain('>_ Qwen Code');
-  });
-
   it('displays auth type and model', () => {
     const { lastFrame } = render(<Header {...defaultProps} />);
     expect(lastFrame()).toContain('Qwen OAuth');
     expect(lastFrame()).toContain('qwen-coder-plus');
   });
 
+  it('displays Coding Plan auth type', () => {
+    const { lastFrame } = render(
+      <Header
+        {...defaultProps}
+        authDisplayType={AuthDisplayType.CODING_PLAN}
+      />,
+    );
+    expect(lastFrame()).toContain('Coding Plan');
+  });
+
+  it('displays API Key auth type', () => {
+    const { lastFrame } = render(
+      <Header {...defaultProps} authDisplayType={AuthDisplayType.API_KEY} />,
+    );
+    expect(lastFrame()).toContain('API Key');
+  });
+
+  it('displays Unknown when auth type is not set', () => {
+    const { lastFrame } = render(
+      <Header {...defaultProps} authDisplayType={undefined} />,
+    );
+    expect(lastFrame()).toContain('Unknown');
+  });
+
   it('displays working directory', () => {
     const { lastFrame } = render(<Header {...defaultProps} />);
     expect(lastFrame()).toContain('/home/user/projects/test');
   });
 
-  it('renders a custom working directory display', () => {
-    const { lastFrame } = render(
-      <Header {...defaultProps} workingDirectory="custom display" />,
-    );
-    expect(lastFrame()).toContain('custom display');
-  });
-
-  it('displays working directory without branch name', () => {
-    const { lastFrame } = render(<Header {...defaultProps} />);
-    // Branch name is no longer shown in header
-    expect(lastFrame()).toContain('/home/user/projects/test');
-    expect(lastFrame()).not.toContain('(main*)');
-  });
-
-  it('formats home directory with tilde', () => {
-    const { lastFrame } = render(
-      <Header {...defaultProps} workingDirectory="/Users/testuser/projects" />,
-    );
-    // The actual home dir replacement depends on os.homedir()
-    // Just verify the path is shown
-    expect(lastFrame()).toContain('projects');
-  });
-
   it('renders with border around info panel', () => {
     const { lastFrame } = render(<Header {...defaultProps} />);
-    // Check for border characters (round border style uses these)
     expect(lastFrame()).toContain('╭');
     expect(lastFrame()).toContain('╯');
   });
diff --git a/packages/cli/src/ui/components/Header.tsx b/packages/cli/src/ui/components/Header.tsx
index adbe13071..45fce4385 100644
--- a/packages/cli/src/ui/components/Header.tsx
+++ b/packages/cli/src/ui/components/Header.tsx
@@ -7,59 +7,35 @@
 import type React from 'react';
 import { Box, Text } from 'ink';
 import Gradient from 'ink-gradient';
-import { AuthType, shortenPath, tildeifyPath } from '@qwen-code/qwen-code-core';
+import { shortenPath, tildeifyPath } from '@qwen-code/qwen-code-core';
 import { theme } from '../semantic-colors.js';
 import { shortAsciiLogo } from './AsciiArt.js';
 import { getAsciiArtWidth, getCachedStringWidth } from '../utils/textUtils.js';
 import { useTerminalSize } from '../hooks/useTerminalSize.js';
 
+/**
+ * Auth display type for the Header component.
+ * Simplified representation of authentication method shown to users.
+ */
+export enum AuthDisplayType {
+  QWEN_OAUTH = 'Qwen OAuth',
+  CODING_PLAN = 'Coding Plan',
+  API_KEY = 'API Key',
+  UNKNOWN = 'Unknown',
+}
+
 interface HeaderProps {
   customAsciiArt?: string; // For user-defined ASCII art
   version: string;
-  authType?: AuthType;
+  authDisplayType?: AuthDisplayType;
   model: string;
   workingDirectory: string;
 }
 
-function titleizeAuthType(value: string): string {
-  return value
-    .split(/[-_]/g)
-    .filter(Boolean)
-    .map((part) => {
-      if (part.toLowerCase() === 'ai') {
-        return 'AI';
-      }
-      return part.charAt(0).toUpperCase() + part.slice(1);
-    })
-    .join(' ');
-}
-
-// Format auth type for display
-function formatAuthType(authType?: AuthType): string {
-  if (!authType) {
-    return 'Unknown';
-  }
-
-  switch (authType) {
-    case AuthType.QWEN_OAUTH:
-      return 'Qwen OAuth';
-    case AuthType.USE_OPENAI:
-      return 'OpenAI';
-    case AuthType.USE_GEMINI:
-      return 'Gemini';
-    case AuthType.USE_VERTEX_AI:
-      return 'Vertex AI';
-    case AuthType.USE_ANTHROPIC:
-      return 'Anthropic';
-    default:
-      return titleizeAuthType(String(authType));
-  }
-}
-
 export const Header: React.FC<HeaderProps> = ({
   customAsciiArt,
   version,
-  authType,
+  authDisplayType,
   model,
   workingDirectory,
 }) => {
@@ -67,7 +43,7 @@ export const Header: React.FC<HeaderProps> = ({
 
   const displayLogo = customAsciiArt ?? shortAsciiLogo;
   const logoWidth = getAsciiArtWidth(displayLogo);
-  const formattedAuthType = formatAuthType(authType);
+  const formattedAuthType = authDisplayType ?? AuthDisplayType.UNKNOWN;
 
   // Calculate available space properly:
   // First determine if logo can be shown, then use remaining space for path
@@ -95,7 +71,7 @@ export const Header: React.FC<HeaderProps> = ({
     ? Math.min(availableTerminalWidth - logoWidth - logoGap, maxInfoPanelWidth)
     : availableTerminalWidth;
 
-  // Calculate max path length (subtract padding/borders from available space)
+  // Calculate max path lengths (subtract padding/borders from available space)
   const maxPathLength = Math.max(
     0,
     availableInfoPanelWidth - infoPanelChromeWidth,
diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
index b12adcf13..3bb6780ca 100644
--- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
@@ -126,7 +126,7 @@ const HistoryItemDisplayComponent: React.FC<HistoryItemDisplayProps> = ({
         <WarningMessage text={itemForDisplay.text} />
       )}
       {itemForDisplay.type === 'error' && (
-        <ErrorMessage text={itemForDisplay.text} />
+        <ErrorMessage text={itemForDisplay.text} hint={itemForDisplay.hint} />
       )}
       {itemForDisplay.type === 'retry_countdown' && (
         <RetryCountdownMessage text={itemForDisplay.text} />
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index d5ace1c53..61584b8c7 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -38,6 +38,7 @@ vi.mock('../contexts/UIStateContext.js', () => ({
 }));
 vi.mock('../contexts/UIActionsContext.js', () => ({
   useUIActions: vi.fn(() => ({
+    handleRetryLastPrompt: vi.fn(),
     temporaryCloseFeedbackDialog: vi.fn(),
   })),
 }));
@@ -2436,6 +2437,140 @@ describe('InputPrompt', () => {
       unmount();
     });
   });
+
+  /**
+   * Ctrl+Y (RETRY_LAST) shortcut tests
+   *
+   * The Ctrl+Y shortcut should trigger handleRetryLastPrompt when:
+   * 1. The user presses Ctrl+Y
+   * 2. The InputPrompt is focused
+   * 3. No other modal/dialog is open that would consume the key
+   *
+   * This shortcut is handled in InputPrompt.tsx at line 585-588:
+   * if (keyMatchers[Command.RETRY_LAST](key)) {
+   *   uiActions.handleRetryLastPrompt();
+   *   return;
+   * }
+   */
+  describe('Ctrl+Y retry shortcut', () => {
+    let mockUIActions: {
+      handleRetryLastPrompt: ReturnType<typeof vi.fn>;
+      temporaryCloseFeedbackDialog: ReturnType<typeof vi.fn>;
+    };
+
+    beforeEach(() => {
+      mockUIActions = {
+        handleRetryLastPrompt: vi.fn(),
+        temporaryCloseFeedbackDialog: vi.fn(),
+      };
+
+      // Override the mock for useUIActions
+      vi.doMock('../contexts/UIActionsContext.js', () => ({
+        useUIActions: vi.fn(() => mockUIActions),
+      }));
+    });
+
+    afterEach(() => {
+      vi.doUnmock('../contexts/UIActionsContext.js');
+    });
+
+    /**
+     * Ctrl+Y should trigger handleRetryLastPrompt to retry the last failed request.
+     * This is the primary activation path for the retry feature.
+     */
+    it('should trigger handleRetryLastPrompt on Ctrl+Y', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} />,
+      );
+      await wait();
+
+      // Send Ctrl+Y (ASCII 25)
+      stdin.write('\x19');
+      await wait();
+
+      // The key matcher should have been triggered
+      // Note: In the actual implementation, this would call uiActions.handleRetryLastPrompt()
+      unmount();
+    });
+
+    /**
+     * The 'y' key alone (without Ctrl) should NOT trigger retry.
+     * This ensures the shortcut doesn't interfere with normal typing.
+     */
+    it('should NOT trigger retry on plain y key', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} />,
+      );
+      await wait();
+
+      // Send plain 'y'
+      stdin.write('y');
+      await wait();
+
+      // Should insert 'y' into buffer, not trigger retry
+      expect(mockBuffer.handleInput).toHaveBeenCalledWith(
+        expect.objectContaining({
+          name: 'y',
+          sequence: 'y',
+        }),
+      );
+
+      unmount();
+    });
+
+    /**
+     * Ctrl+R should NOT trigger retry - it should trigger reverse search instead.
+     * This ensures the retry shortcut doesn't conflict with existing shortcuts.
+     */
+    it('should NOT trigger retry on Ctrl+R (reverse search)', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} />,
+      );
+      await wait();
+
+      // Send Ctrl+R (ASCII 18)
+      stdin.write('\x12');
+      await wait();
+
+      // Should activate reverse search, not retry
+      // Verify the input was handled (not ignored)
+      expect(mockBuffer.handleInput).not.toHaveBeenCalledWith(
+        expect.objectContaining({
+          ctrl: true,
+          name: 'y',
+        }),
+      );
+
+      unmount();
+    });
+
+    /**
+     * When feedback dialog is open, Ctrl+Y should be passed through after
+     * temporarily closing the dialog.
+     */
+    it('should handle Ctrl+Y when feedback dialog is open', async () => {
+      // Mock feedback dialog as open
+      const mockUIState = { isFeedbackDialogOpen: true };
+      vi.doMock('../contexts/UIStateContext.js', () => ({
+        useUIState: vi.fn(() => mockUIState),
+      }));
+
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} />,
+      );
+      await wait();
+
+      // Send Ctrl+Y
+      stdin.write('\x19');
+      await wait();
+
+      // Dialog should be temporarily closed
+      // Note: In actual implementation, temporaryCloseFeedbackDialog would be called
+
+      vi.doUnmock('../contexts/UIStateContext.js');
+      unmount();
+    });
+  });
 });
 function clean(str: string | undefined): string {
   if (!str) return '';
diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx
index 09c2b27f1..42ec7efbb 100644
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@@ -582,6 +582,16 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
         return;
       }
 
+      // Ctrl+Y: Retry the last failed request.
+      // This shortcut is available when:
+      // - There is a failed request in the current session
+      // - The stream is not currently responding or waiting for confirmation
+      // If no failed request exists, a message will be shown to the user.
+      if (keyMatchers[Command.RETRY_LAST](key)) {
+        uiActions.handleRetryLastPrompt();
+        return;
+      }
+
       if (shellModeActive && keyMatchers[Command.REVERSE_SEARCH](key)) {
         setReverseSearchActive(true);
         setTextBeforeReverseSearch(buffer.text);
diff --git a/packages/cli/src/ui/components/KeyboardShortcuts.tsx b/packages/cli/src/ui/components/KeyboardShortcuts.tsx
index ada240b02..df84d0c27 100644
--- a/packages/cli/src/ui/components/KeyboardShortcuts.tsx
+++ b/packages/cli/src/ui/components/KeyboardShortcuts.tsx
@@ -39,6 +39,7 @@ const getShortcuts = (): Shortcut[] => [
   { key: getNewlineKey(), description: t('for newline') + ' ⏎' },
   { key: 'ctrl+l', description: t('to clear screen') },
   { key: 'ctrl+r', description: t('to search history') },
+  { key: 'ctrl+y', description: t('to retry last request') },
   { key: getPasteKey(), description: t('to paste images') },
   { key: getExternalEditorKey(), description: t('for external editor') },
 ];
@@ -54,11 +55,11 @@ const COLUMN_GAP = 4;
 const MARGIN_LEFT = 2;
 const MARGIN_RIGHT = 2;
 
-// Column distribution for different layouts (3+4+4 for 3 cols, 6+5 for 2 cols)
+// Column distribution for different layouts (4+4+4 for 3 cols, 6+6 for 2 cols)
 const COLUMN_SPLITS: Record<number, number[]> = {
-  3: [3, 4, 4],
-  2: [6, 5],
-  1: [11],
+  3: [4, 4, 4],
+  2: [6, 6],
+  1: [12],
 };
 
 export const KeyboardShortcuts: React.FC = () => {
diff --git a/packages/cli/src/ui/components/ModelDialog.test.tsx b/packages/cli/src/ui/components/ModelDialog.test.tsx
index 7e05bdc43..dc5cc108a 100644
--- a/packages/cli/src/ui/components/ModelDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.test.tsx
@@ -114,10 +114,9 @@ describe('<ModelDialog />', () => {
     cleanup();
   });
 
-  it('renders the title and help text', () => {
+  it('renders the title', () => {
     const { getByText } = renderComponent();
     expect(getByText('Select Model')).toBeDefined();
-    expect(getByText('(Press Esc to close)')).toBeDefined();
   });
 
   it('passes all model options to DescriptiveRadioButtonSelect', () => {
@@ -289,11 +288,12 @@ describe('<ModelDialog />', () => {
     expect(props.onClose).toHaveBeenCalledTimes(1);
   });
 
-  it('does not pass onHighlight to DescriptiveRadioButtonSelect', () => {
+  it('passes onHighlight to DescriptiveRadioButtonSelect', () => {
     renderComponent();
 
     const childOnHighlight = mockedSelect.mock.calls[0][0].onHighlight;
-    expect(childOnHighlight).toBeUndefined();
+    expect(childOnHighlight).toBeDefined();
+    expect(typeof childOnHighlight).toBe('function');
   });
 
   it('calls onClose prop when "escape" key is pressed', () => {
diff --git a/packages/cli/src/ui/components/ModelDialog.tsx b/packages/cli/src/ui/components/ModelDialog.tsx
index 8fdbbe38d..09723dcdd 100644
--- a/packages/cli/src/ui/components/ModelDialog.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.tsx
@@ -14,8 +14,7 @@ import {
   MAINLINE_CODER_MODEL,
   type AvailableModel as CoreAvailableModel,
   type ContentGeneratorConfig,
-  type ContentGeneratorConfigSource,
-  type ContentGeneratorConfigSources,
+  type InputModalities,
 } from '@qwen-code/qwen-code-core';
 import { useKeypress } from '../hooks/useKeypress.js';
 import { theme } from '../semantic-colors.js';
@@ -26,61 +25,25 @@ import { useSettings } from '../contexts/SettingsContext.js';
 import { getPersistScopeForModelSelection } from '../../config/modelProvidersScope.js';
 import { t } from '../../i18n/index.js';
 
+function formatModalities(modalities?: InputModalities): string {
+  if (!modalities) return t('text-only');
+  const parts: string[] = [];
+  if (modalities.image) parts.push(t('image'));
+  if (modalities.pdf) parts.push(t('pdf'));
+  if (modalities.audio) parts.push(t('audio'));
+  if (modalities.video) parts.push(t('video'));
+  if (parts.length === 0) return t('text-only');
+  return `${t('text')} · ${parts.join(' · ')}`;
+}
+
 interface ModelDialogProps {
   onClose: () => void;
 }
 
-function formatSourceBadge(
-  source: ContentGeneratorConfigSource | undefined,
-): string | undefined {
-  if (!source) return undefined;
-
-  switch (source.kind) {
-    case 'cli':
-      return source.detail ? `CLI ${source.detail}` : 'CLI';
-    case 'env':
-      return source.envKey ? `ENV ${source.envKey}` : 'ENV';
-    case 'settings':
-      return source.settingsPath
-        ? `Settings ${source.settingsPath}`
-        : 'Settings';
-    case 'modelProviders': {
-      const suffix =
-        source.authType && source.modelId
-          ? `${source.authType}:${source.modelId}`
-          : source.authType
-            ? `${source.authType}`
-            : source.modelId
-              ? `${source.modelId}`
-              : '';
-      return suffix ? `ModelProviders ${suffix}` : 'ModelProviders';
-    }
-    case 'default':
-      return source.detail ? `Default ${source.detail}` : 'Default';
-    case 'computed':
-      return source.detail ? `Computed ${source.detail}` : 'Computed';
-    case 'programmatic':
-      return source.detail ? `Programmatic ${source.detail}` : 'Programmatic';
-    case 'unknown':
-    default:
-      return undefined;
-  }
-}
-
-function readSourcesFromConfig(config: unknown): ContentGeneratorConfigSources {
-  if (!config) {
-    return {};
-  }
-  const maybe = config as {
-    getContentGeneratorConfigSources?: () => ContentGeneratorConfigSources;
-  };
-  return maybe.getContentGeneratorConfigSources?.() ?? {};
-}
-
 function maskApiKey(apiKey: string | undefined): string {
-  if (!apiKey) return '(not set)';
+  if (!apiKey) return `(${t('not set')})`;
   const trimmed = apiKey.trim();
-  if (trimmed.length === 0) return '(not set)';
+  if (trimmed.length === 0) return `(${t('not set')})`;
   if (trimmed.length <= 6) return '***';
   const head = trimmed.slice(0, 3);
   const tail = trimmed.slice(-4);
@@ -131,7 +94,7 @@ function handleModelSwitchSuccess({
     {
       type: 'info',
       text:
-        `authType: ${effectiveAuthType ?? '(none)'}` +
+        `authType: ${effectiveAuthType ?? `(${t('none')})`}` +
         `\n` +
         `Using ${isRuntime ? 'runtime ' : ''}model: ${effectiveModelId}` +
         `\n` +
@@ -143,35 +106,26 @@ function handleModelSwitchSuccess({
   );
 }
 
-function ConfigRow({
+function formatContextWindow(size?: number): string {
+  if (!size) return `(${t('unknown')})`;
+  return `${size.toLocaleString('en-US')} tokens`;
+}
+
+function DetailRow({
   label,
   value,
-  badge,
 }: {
   label: string;
   value: React.ReactNode;
-  badge?: string;
 }): React.JSX.Element {
   return (
-    <Box flexDirection="column">
-      <Box>
-        <Box minWidth={12} flexShrink={0}>
-          <Text color={theme.text.secondary}>{label}:</Text>
-        </Box>
-        <Box flexGrow={1} flexDirection="row" flexWrap="wrap">
-          <Text>{value}</Text>
-        </Box>
+    <Box>
+      <Box minWidth={16} flexShrink={0}>
+        <Text color={theme.text.secondary}>{label}:</Text>
+      </Box>
+      <Box flexGrow={1} flexDirection="row" flexWrap="wrap">
+        <Text>{value}</Text>
       </Box>
-      {badge ? (
-        <Box>
-          <Box minWidth={12} flexShrink={0}>
-            <Text> </Text>
-          </Box>
-          <Box flexGrow={1}>
-            <Text color={theme.text.secondary}>{badge}</Text>
-          </Box>
-        </Box>
-      ) : null}
     </Box>
   );
 }
@@ -183,13 +137,9 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
 
   // Local error state for displaying errors within the dialog
   const [errorMessage, setErrorMessage] = useState<string | null>(null);
+  const [highlightedValue, setHighlightedValue] = useState<string | null>(null);
 
   const authType = config?.getAuthType();
-  const effectiveConfig =
-    (config?.getContentGeneratorConfig?.() as
-      | ContentGeneratorConfig
-      | undefined) ?? undefined;
-  const sources = readSourcesFromConfig(config);
 
   const availableModelEntries = useMemo(() => {
     const allModels = config ? config.getAllConfiguredModels() : [];
@@ -319,6 +269,20 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
     return index === -1 ? 0 : index;
   }, [MODEL_OPTIONS, preferredKey]);
 
+  const handleHighlight = useCallback((value: string) => {
+    setHighlightedValue(value);
+  }, []);
+
+  const highlightedEntry = useMemo(() => {
+    const key = highlightedValue ?? preferredKey;
+    return availableModelEntries.find(
+      ({ authType: t2, model, isRuntime, snapshotId }) => {
+        const v = isRuntime && snapshotId ? snapshotId : `${t2}::${model.id}`;
+        return v === key;
+      },
+    );
+  }, [highlightedValue, preferredKey, availableModelEntries]);
+
   const handleSelect = useCallback(
     async (selected: string) => {
       setErrorMessage(null);
@@ -413,35 +377,6 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
     >
       <Text bold>{t('Select Model')}</Text>
 
-      <Box marginTop={1} flexDirection="column">
-        <Text color={theme.text.secondary}>
-          {t('Current (effective) configuration')}
-        </Text>
-        <Box flexDirection="column" marginTop={1}>
-          <ConfigRow label="AuthType" value={authType} />
-          <ConfigRow
-            label="Model"
-            value={effectiveConfig?.model ?? config?.getModel?.() ?? ''}
-            badge={formatSourceBadge(sources['model'])}
-          />
-
-          {authType !== AuthType.QWEN_OAUTH && (
-            <>
-              <ConfigRow
-                label="Base URL"
-                value={effectiveConfig?.baseUrl ?? t('(default)')}
-                badge={formatSourceBadge(sources['baseUrl'])}
-              />
-              <ConfigRow
-                label="API Key"
-                value={effectiveConfig?.apiKey ? t('(set)') : t('(not set)')}
-                badge={formatSourceBadge(sources['apiKey'])}
-              />
-            </>
-          )}
-        </Box>
-      </Box>
-
       {!hasModels ? (
         <Box marginTop={1} flexDirection="column">
           <Text color={theme.status.warning}>
@@ -465,12 +400,48 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
           <DescriptiveRadioButtonSelect
             items={MODEL_OPTIONS}
             onSelect={handleSelect}
+            onHighlight={handleHighlight}
             initialIndex={initialIndex}
             showNumbers={true}
           />
         </Box>
       )}
 
+      {highlightedEntry && (
+        <Box marginTop={1} flexDirection="column">
+          <Box
+            borderStyle="single"
+            borderTop
+            borderBottom={false}
+            borderLeft={false}
+            borderRight={false}
+            borderColor={theme.border.default}
+          />
+          <DetailRow
+            label={t('Modality')}
+            value={formatModalities(highlightedEntry.model.modalities)}
+          />
+          <DetailRow
+            label={t('Context Window')}
+            value={formatContextWindow(
+              highlightedEntry.model.contextWindowSize,
+            )}
+          />
+          {highlightedEntry.authType !== AuthType.QWEN_OAUTH && (
+            <>
+              <DetailRow
+                label="Base URL"
+                value={highlightedEntry.model.baseUrl ?? t('(default)')}
+              />
+              <DetailRow
+                label="API Key"
+                value={highlightedEntry.model.envKey ?? t('(not set)')}
+              />
+            </>
+          )}
+        </Box>
+      )}
+
       {errorMessage && (
         <Box marginTop={1} flexDirection="column" paddingX={1}>
           <Text color={theme.status.error} wrap="wrap">
@@ -480,7 +451,9 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
       )}
 
       <Box marginTop={1} flexDirection="column">
-        <Text color={theme.text.secondary}>{t('(Press Esc to close)')}</Text>
+        <Text color={theme.text.secondary}>
+          {t('Enter to select, ↑↓ to navigate, Esc to close')}
+        </Text>
       </Box>
     </Box>
   );
diff --git a/packages/cli/src/ui/components/messages/ErrorMessage.tsx b/packages/cli/src/ui/components/messages/ErrorMessage.tsx
index 8e10a4fed..14cb8a91f 100644
--- a/packages/cli/src/ui/components/messages/ErrorMessage.tsx
+++ b/packages/cli/src/ui/components/messages/ErrorMessage.tsx
@@ -10,9 +10,17 @@ import { theme } from '../../semantic-colors.js';
 
 interface ErrorMessageProps {
   text: string;
+  /** Optional inline hint displayed after the error text in secondary/dimmed color */
+  hint?: string;
 }
 
-export const ErrorMessage: React.FC<ErrorMessageProps> = ({ text }) => {
+/**
+ * Renders an error message with a "✕" prefix.
+ * When a hint is provided (e.g., retry countdown), it is displayed inline
+ * in parentheses with a dimmed secondary color, similar to the ESC hint
+ * style used in LoadingIndicator.
+ */
+export const ErrorMessage: React.FC<ErrorMessageProps> = ({ text, hint }) => {
   const prefix = '✕ ';
   const prefixWidth = prefix.length;
 
@@ -21,10 +29,9 @@ export const ErrorMessage: React.FC<ErrorMessageProps> = ({ text }) => {
       <Box width={prefixWidth}>
         <Text color={theme.status.error}>{prefix}</Text>
       </Box>
-      <Box flexGrow={1}>
-        <Text wrap="wrap" color={theme.status.error}>
-          {text}
-        </Text>
+      <Box flexGrow={1} flexWrap="wrap" flexDirection="row">
+        <Text color={theme.status.error}>{text}</Text>
+        {hint && <Text color={theme.text.secondary}> ({hint})</Text>}
       </Box>
     </Box>
   );
diff --git a/packages/cli/src/ui/contexts/UIActionsContext.tsx b/packages/cli/src/ui/contexts/UIActionsContext.tsx
index 1965ceb26..af15e72b6 100644
--- a/packages/cli/src/ui/contexts/UIActionsContext.tsx
+++ b/packages/cli/src/ui/contexts/UIActionsContext.tsx
@@ -66,6 +66,7 @@ export interface UIActions {
   onSuggestionsVisibilityChange: (visible: boolean) => void;
   refreshStatic: () => void;
   handleFinalSubmit: (value: string) => void;
+  handleRetryLastPrompt: () => void;
   handleClearScreen: () => void;
   // Welcome back dialog
   handleWelcomeBackSelection: (choice: 'continue' | 'restart') => void;
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index e855eefc3..42f28f5e2 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -2304,40 +2304,30 @@ describe('useGeminiStream', () => {
           result.current.pendingHistoryItems.find(
             (item) => item.type === MessageType.ERROR,
           );
-        const findCountdownItem = () =>
-          result.current.pendingHistoryItems.find(
-            (item) => item.type === 'retry_countdown',
-          );
 
         let errorItem = findErrorItem();
-        let countdownItem = findCountdownItem();
-        for (
-          let attempts = 0;
-          attempts < 5 && (!errorItem || !countdownItem);
-          attempts++
-        ) {
+        for (let attempts = 0; attempts < 5 && !errorItem; attempts++) {
           await act(async () => {
             await Promise.resolve();
           });
           errorItem = findErrorItem();
-          countdownItem = findCountdownItem();
         }
 
-        // Error line should be rendered as ERROR type (wrapped by parseAndFormatApiError)
+        // Error item should contain the error text and a retry hint
         expect(errorItem?.text).toContain('Rate limit exceeded');
-
-        // Countdown line should be rendered as retry_countdown type
-        expect(countdownItem?.text).toContain('Retrying in 3 seconds');
+        // Countdown hint should be inline on the error item (not a separate item)
+        expect((errorItem as { hint?: string })?.hint).toContain('3s');
+        expect((errorItem as { hint?: string })?.hint).toContain('attempt 1/3');
 
         await act(async () => {
           await vi.advanceTimersByTimeAsync(1000);
         });
 
-        const countdownAfterOneSecond = result.current.pendingHistoryItems.find(
-          (item) => item.type === 'retry_countdown',
+        const errorAfterOneSecond = result.current.pendingHistoryItems.find(
+          (item) => item.type === MessageType.ERROR,
         );
-        expect(countdownAfterOneSecond?.text).toContain(
-          'Retrying in 2 seconds',
+        expect((errorAfterOneSecond as { hint?: string })?.hint).toContain(
+          '2s',
         );
 
         resolveStream?.();
@@ -2347,15 +2337,11 @@ describe('useGeminiStream', () => {
           await vi.runAllTimersAsync();
         });
 
-        // Both error and countdown should be cleared after retry succeeds
+        // Error item (with hint) should be cleared after retry succeeds
         const remainingError = result.current.pendingHistoryItems.find(
           (item) => item.type === MessageType.ERROR,
         );
-        const remainingCountdown = result.current.pendingHistoryItems.find(
-          (item) => item.type === 'retry_countdown',
-        );
         expect(remainingError).toBeUndefined();
-        expect(remainingCountdown).toBeUndefined();
       } finally {
         vi.useRealTimers();
       }
@@ -2525,14 +2511,13 @@ describe('useGeminiStream', () => {
         await result.current.submitQuery('Test query');
       });
 
-      // Verify error message was added
+      // Verify error message appears in pending history items (not via addItem,
+      // since errors with retry hints are now stored as pending items)
       await waitFor(() => {
-        expect(mockAddItem).toHaveBeenCalledWith(
-          expect.objectContaining({
-            type: 'error',
-          }),
-          expect.any(Number),
+        const errorItem = result.current.pendingHistoryItems.find(
+          (item) => item.type === 'error',
         );
+        expect(errorItem).toBeDefined();
       });
 
       // Verify parseAndFormatApiError was called
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index 2da4eed53..0e5f29216 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -169,12 +169,17 @@ export const useGeminiStream = (
   const abortControllerRef = useRef<AbortController | null>(null);
   const turnCancelledRef = useRef(false);
   const isSubmittingQueryRef = useRef(false);
+  const lastPromptRef = useRef<PartListUnion | null>(null);
+  const lastPromptErroredRef = useRef(false);
   const [isResponding, setIsResponding] = useState<boolean>(false);
   const [thought, setThought] = useState<ThoughtSummary | null>(null);
   const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
     useStateAndRef<HistoryItemWithoutId | null>(null);
-  const [pendingRetryErrorItem, setPendingRetryErrorItem] =
-    useState<HistoryItemWithoutId | null>(null);
+  const [
+    pendingRetryErrorItem,
+    pendingRetryErrorItemRef,
+    setPendingRetryErrorItem,
+  ] = useStateAndRef<HistoryItemWithoutId | null>(null);
   const [
     pendingRetryCountdownItem,
     pendingRetryCountdownItemRef,
@@ -254,11 +259,18 @@ export const useGeminiStream = (
     }
   }, []);
 
+  /**
+   * Clears the retry countdown timer and pending retry items.
+   */
   const clearRetryCountdown = useCallback(() => {
     stopRetryCountdownTimer();
     setPendingRetryErrorItem(null);
     setPendingRetryCountdownItem(null);
-  }, [setPendingRetryCountdownItem, stopRetryCountdownTimer]);
+  }, [
+    setPendingRetryErrorItem,
+    setPendingRetryCountdownItem,
+    stopRetryCountdownTimer,
+  ]);
 
   const startRetryCountdown = useCallback(
     (retryInfo: {
@@ -273,18 +285,21 @@ export const useGeminiStream = (
       const retryReasonText =
         message ?? t('Rate limit exceeded. Please wait and try again.');
 
-      // Error line stays static (red with ✕ prefix)
-      setPendingRetryErrorItem({
-        type: MessageType.ERROR,
-        text: retryReasonText,
-      });
-
       // Countdown line updates every second (dim/secondary color)
       const updateCountdown = () => {
         const elapsedMs = Date.now() - startTime;
         const remainingMs = Math.max(0, delayMs - elapsedMs);
         const remainingSec = Math.ceil(remainingMs / 1000);
 
+        // Update error item with hint containing countdown info (short format)
+        const hintText = `Retrying in ${remainingSec}s… (attempt ${attempt}/${maxRetries})`;
+
+        setPendingRetryErrorItem({
+          type: MessageType.ERROR,
+          text: retryReasonText,
+          hint: hintText,
+        });
+
         setPendingRetryCountdownItem({
           type: 'retry_countdown',
           text: t(
@@ -305,7 +320,11 @@ export const useGeminiStream = (
       updateCountdown();
       retryCountdownTimerRef.current = setInterval(updateCountdown, 1000);
     },
-    [setPendingRetryCountdownItem, stopRetryCountdownTimer],
+    [
+      setPendingRetryErrorItem,
+      setPendingRetryCountdownItem,
+      stopRetryCountdownTimer,
+    ],
   );
 
   useEffect(() => () => stopRetryCountdownTimer(), [stopRetryCountdownTimer]);
@@ -693,6 +712,7 @@ export const useGeminiStream = (
         return;
       }
 
+      lastPromptErroredRef.current = false;
       if (pendingHistoryItemRef.current) {
         if (pendingHistoryItemRef.current.type === 'tool_group') {
           const updatedTools = pendingHistoryItemRef.current.tools.map(
@@ -732,27 +752,36 @@ export const useGeminiStream = (
 
   const handleErrorEvent = useCallback(
     (eventValue: GeminiErrorEventValue, userMessageTimestamp: number) => {
+      lastPromptErroredRef.current = true;
       if (pendingHistoryItemRef.current) {
         addItem(pendingHistoryItemRef.current, userMessageTimestamp);
         setPendingHistoryItem(null);
       }
-      addItem(
-        {
-          type: MessageType.ERROR,
+      // Only show Ctrl+Y hint if not already showing an auto-retry countdown
+      // (auto-retry countdown is shown when retryCountdownTimerRef is active)
+      const isShowingAutoRetry = retryCountdownTimerRef.current !== null;
+      clearRetryCountdown();
+      if (!isShowingAutoRetry) {
+        const retryHint = t('Press Ctrl+Y to retry');
+        // Store error with hint as a pending item (not in history).
+        // This allows the hint to be removed when the user retries with Ctrl+Y,
+        // since pending items are in the dynamic rendering area (not <Static>).
+        setPendingRetryErrorItem({
+          type: 'error' as const,
           text: parseAndFormatApiError(
             eventValue.error,
             config.getContentGeneratorConfig()?.authType,
           ),
-        },
-        userMessageTimestamp,
-      );
-      clearRetryCountdown();
+          hint: retryHint,
+        });
+      }
       setThought(null); // Reset thought when there's an error
     },
     [
       addItem,
       pendingHistoryItemRef,
       setPendingHistoryItem,
+      setPendingRetryErrorItem,
       config,
       setThought,
       clearRetryCountdown,
@@ -816,7 +845,10 @@ export const useGeminiStream = (
           userMessageTimestamp,
         );
       }
-      clearRetryCountdown();
+      // Only clear auto-retry countdown errors (those with active timer)
+      if (retryCountdownTimerRef.current) {
+        clearRetryCountdown();
+      }
     },
     [addItem, clearRetryCountdown],
   );
@@ -1023,7 +1055,7 @@ export const useGeminiStream = (
   const submitQuery = useCallback(
     async (
       query: PartListUnion,
-      options?: { isContinuation: boolean },
+      options?: { isContinuation: boolean; skipPreparation?: boolean },
       prompt_id?: string,
     ) => {
       // Prevent concurrent executions of submitQuery, but allow continuations
@@ -1047,7 +1079,11 @@ export const useGeminiStream = (
       // Reset quota error flag when starting a new query (not a continuation)
       if (!options?.isContinuation) {
         setModelSwitchedFromQuotaError(false);
-        // No quota-error / fallback routing mechanism currently; keep state minimal.
+        // Commit any pending retry error to history (without hint) since the
+        // user is starting a new conversation turn
+        if (pendingRetryCountdownItemRef.current) {
+          clearRetryCountdown();
+        }
       }
 
       abortControllerRef.current = new AbortController();
@@ -1059,12 +1095,14 @@ export const useGeminiStream = (
       }
 
       return promptIdContext.run(prompt_id, async () => {
-        const { queryToSend, shouldProceed } = await prepareQueryForGemini(
-          query,
-          userMessageTimestamp,
-          abortSignal,
-          prompt_id!,
-        );
+        const { queryToSend, shouldProceed } = options?.skipPreparation
+          ? { queryToSend: query, shouldProceed: true }
+          : await prepareQueryForGemini(
+              query,
+              userMessageTimestamp,
+              abortSignal,
+              prompt_id!,
+            );
 
         if (!shouldProceed || queryToSend === null) {
           isSubmittingQueryRef.current = false;
@@ -1086,6 +1124,8 @@ export const useGeminiStream = (
         }
 
         const finalQueryToSend = queryToSend;
+        lastPromptRef.current = finalQueryToSend;
+        lastPromptErroredRef.current = false;
 
         if (!options?.isContinuation) {
           // trigger new prompt event for session stats in CLI
@@ -1134,6 +1174,12 @@ export const useGeminiStream = (
             addItem(pendingHistoryItemRef.current, userMessageTimestamp);
             setPendingHistoryItem(null);
           }
+          // Only clear auto-retry countdown errors (those with an active timer).
+          // Do NOT clear static error+hint from handleErrorEvent — those should
+          // remain visible until the user presses Ctrl+Y to retry.
+          if (retryCountdownTimerRef.current) {
+            clearRetryCountdown();
+          }
           if (loopDetectedRef.current) {
             loopDetectedRef.current = false;
             handleLoopDetectedEvent();
@@ -1142,16 +1188,17 @@ export const useGeminiStream = (
           if (error instanceof UnauthorizedError) {
             onAuthError('Session expired or is unauthorized.');
           } else if (!isNodeError(error) || error.name !== 'AbortError') {
-            addItem(
-              {
-                type: MessageType.ERROR,
-                text: parseAndFormatApiError(
-                  getErrorMessage(error) || 'Unknown error',
-                  config.getContentGeneratorConfig()?.authType,
-                ),
-              },
-              userMessageTimestamp,
-            );
+            lastPromptErroredRef.current = true;
+            const retryHint = t('Press Ctrl+Y to retry');
+            // Store error with hint as a pending item (same as handleErrorEvent)
+            setPendingRetryErrorItem({
+              type: 'error' as const,
+              text: parseAndFormatApiError(
+                getErrorMessage(error) || 'Unknown error',
+                config.getContentGeneratorConfig()?.authType,
+              ),
+              hint: retryHint,
+            });
           }
         } finally {
           setIsResponding(false);
@@ -1174,9 +1221,71 @@ export const useGeminiStream = (
       startNewPrompt,
       getPromptCount,
       handleLoopDetectedEvent,
+      clearRetryCountdown,
+      pendingRetryCountdownItemRef,
+      setPendingRetryErrorItem,
     ],
   );
 
+  /**
+   * Retries the last failed prompt when the user presses Ctrl+Y.
+   *
+   * Activation conditions for Ctrl+Y shortcut:
+   * 1. ✅ The last request must have failed (lastPromptErroredRef.current === true)
+   * 2. ✅ Current streaming state must NOT be "Responding" (avoid interrupting ongoing stream)
+   * 3. ✅ Current streaming state must NOT be "WaitingForConfirmation" (avoid conflicting with tool confirmation flow)
+   * 4. ✅ There must be a stored lastPrompt in lastPromptRef.current
+   *
+   * When conditions are not met:
+   * - If streaming is active (Responding/WaitingForConfirmation): silently return without action
+   * - If no failed request exists: display "No failed request to retry." info message
+   *
+   * When conditions are met:
+   * - Clears any pending auto-retry countdown to avoid duplicate retries
+   * - Re-submits the last query with skipPreparation: true for faster retry
+   *
+   * This function is exposed via UIActionsContext and triggered by InputPrompt
+   * when the user presses Ctrl+Y (bound to Command.RETRY_LAST in keyBindings.ts).
+   */
+  const retryLastPrompt = useCallback(async () => {
+    if (
+      streamingState === StreamingState.Responding ||
+      streamingState === StreamingState.WaitingForConfirmation
+    ) {
+      return;
+    }
+
+    const lastPrompt = lastPromptRef.current;
+    if (!lastPrompt || !lastPromptErroredRef.current) {
+      addItem(
+        {
+          type: MessageType.INFO,
+          text: t('No failed request to retry.'),
+        },
+        Date.now(),
+      );
+      return;
+    }
+
+    // Commit the error to history (without hint) before clearing
+    const errorItem = pendingRetryErrorItemRef.current;
+    if (errorItem) {
+      addItem({ type: errorItem.type, text: errorItem.text }, Date.now());
+    }
+    clearRetryCountdown();
+
+    await submitQuery(lastPrompt, {
+      isContinuation: false,
+      skipPreparation: true,
+    });
+  }, [
+    streamingState,
+    addItem,
+    clearRetryCountdown,
+    submitQuery,
+    pendingRetryErrorItemRef,
+  ]);
+
   const handleApprovalModeChange = useCallback(
     async (newApprovalMode: ApprovalMode) => {
       // Auto-approve pending tool calls when switching to auto-approval modes
@@ -1480,6 +1589,7 @@ export const useGeminiStream = (
     pendingHistoryItems,
     thought,
     cancelOngoingRequest,
+    retryLastPrompt,
     pendingToolCalls: toolCalls,
     handleApprovalModeChange,
     activePtyId,
diff --git a/packages/cli/src/ui/keyMatchers.test.ts b/packages/cli/src/ui/keyMatchers.test.ts
index 15d45fdab..8961f9ff7 100644
--- a/packages/cli/src/ui/keyMatchers.test.ts
+++ b/packages/cli/src/ui/keyMatchers.test.ts
@@ -59,6 +59,7 @@ describe('keyMatchers', () => {
     [Command.QUIT]: (key: Key) => key.ctrl && key.name === 'c',
     [Command.EXIT]: (key: Key) => key.ctrl && key.name === 'd',
     [Command.SHOW_MORE_LINES]: (key: Key) => key.ctrl && key.name === 's',
+    [Command.RETRY_LAST]: (key: Key) => key.ctrl && key.name === 'y',
     [Command.REVERSE_SEARCH]: (key: Key) => key.ctrl && key.name === 'r',
     [Command.SUBMIT_REVERSE_SEARCH]: (key: Key) =>
       key.name === 'return' && !key.ctrl,
@@ -252,6 +253,11 @@ describe('keyMatchers', () => {
       positive: [createKey('s', { ctrl: true })],
       negative: [createKey('s'), createKey('l', { ctrl: true })],
     },
+    {
+      command: Command.RETRY_LAST,
+      positive: [createKey('y', { ctrl: true })],
+      negative: [createKey('y'), createKey('r', { ctrl: true })],
+    },
 
     // Shell commands
     {
diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts
index b2e86de62..d2483f371 100644
--- a/packages/cli/src/ui/types.ts
+++ b/packages/cli/src/ui/types.ts
@@ -121,6 +121,7 @@ export type HistoryItemInfo = HistoryItemBase & {
 export type HistoryItemError = HistoryItemBase & {
   type: 'error';
   text: string;
+  hint?: string; // Optional inline hint (e.g., retry countdown) displayed in secondary color
 };
 
 export type HistoryItemWarning = HistoryItemBase & {
diff --git a/packages/cli/src/utils/languageUtils.test.ts b/packages/cli/src/utils/languageUtils.test.ts
index a0f0ca717..7081f0c94 100644
--- a/packages/cli/src/utils/languageUtils.test.ts
+++ b/packages/cli/src/utils/languageUtils.test.ts
@@ -380,4 +380,62 @@ describe('languageUtils', () => {
       expect(fs.writeFileSync).not.toHaveBeenCalled();
     });
   });
+
+  describe('output-language.md path resolution priority', () => {
+    it('should prefer project-level path over global path', () => {
+      const projectPath = '/project/.qwen/output-language.md';
+      const globalPath = '/mock/home/.qwen/output-language.md';
+
+      vi.mocked(fs.existsSync).mockImplementation((p) => {
+        if (p.toString() === projectPath) return true;
+        if (p.toString() === globalPath) return true;
+        return false;
+      });
+
+      let resolvedPath: string | undefined;
+      if (fs.existsSync(projectPath)) {
+        resolvedPath = projectPath;
+      } else if (fs.existsSync(globalPath)) {
+        resolvedPath = globalPath;
+      }
+
+      expect(resolvedPath).toBe(projectPath);
+    });
+
+    it('should fall back to global path when project-level does not exist', () => {
+      const projectPath = '/project/.qwen/output-language.md';
+      const globalPath = '/mock/home/.qwen/output-language.md';
+
+      vi.mocked(fs.existsSync).mockImplementation((p) => {
+        if (p.toString() === projectPath) return false;
+        if (p.toString() === globalPath) return true;
+        return false;
+      });
+
+      let resolvedPath: string | undefined;
+      if (fs.existsSync(projectPath)) {
+        resolvedPath = projectPath;
+      } else if (fs.existsSync(globalPath)) {
+        resolvedPath = globalPath;
+      }
+
+      expect(resolvedPath).toBe(globalPath);
+    });
+
+    it('should return undefined when neither path exists', () => {
+      const projectPath = '/project/.qwen/output-language.md';
+      const globalPath = '/mock/home/.qwen/output-language.md';
+
+      vi.mocked(fs.existsSync).mockReturnValue(false);
+
+      let resolvedPath: string | undefined;
+      if (fs.existsSync(projectPath)) {
+        resolvedPath = projectPath;
+      } else if (fs.existsSync(globalPath)) {
+        resolvedPath = globalPath;
+      }
+
+      expect(resolvedPath).toBeUndefined();
+    });
+  });
 });
diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts
index f3af06bda..d809193d7 100644
--- a/packages/core/src/core/contentGenerator.ts
+++ b/packages/core/src/core/contentGenerator.ts
@@ -60,6 +60,17 @@ export enum AuthType {
   USE_ANTHROPIC = 'anthropic',
 }
 
+/**
+ * Supported input modalities for a model.
+ * Omitted or false fields mean the model does not support that input type.
+ */
+export type InputModalities = {
+  image?: boolean;
+  pdf?: boolean;
+  audio?: boolean;
+  video?: boolean;
+};
+
 export type ContentGeneratorConfig = {
   model: string;
   apiKey?: string;
@@ -70,7 +81,8 @@ export type ContentGeneratorConfig = {
   enableOpenAILogging?: boolean;
   openAILoggingDir?: string;
   timeout?: number; // Timeout configuration in milliseconds
-  maxRetries?: number; // Maximum retries for failed requests
+  maxRetries?: number; // Maximum retries for rate-limit errors
+  retryErrorCodes?: number[]; // Additional error codes that trigger rate-limit retry
   enableCacheControl?: boolean; // Enable cache control for DashScope providers
   samplingParams?: {
     top_p?: number;
@@ -98,6 +110,9 @@ export type ContentGeneratorConfig = {
   customHeaders?: Record<string, string>;
   // Extra body parameters to be merged into the request body
   extra_body?: Record<string, unknown>;
+  // Supported input modalities. Unsupported media types are replaced with text
+  // placeholders. Leave undefined to use automatic detection from model name.
+  modalities?: InputModalities;
 };
 
 // Keep the public ContentGeneratorConfigSources API, but reuse the generic
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index fc0455a8a..9d4d1017d 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -1089,156 +1089,156 @@ export class CoreToolScheduler {
       );
 
       for (const toolCall of callsToExecute) {
-        if (toolCall.status !== 'scheduled') continue;
+        await this.executeSingleToolCall(toolCall, signal);
+      }
+    }
+  }
 
-        const scheduledCall = toolCall;
-        const { callId, name: toolName } = scheduledCall.request;
-        const invocation = scheduledCall.invocation;
-        this.setStatusInternal(callId, 'executing');
+  private async executeSingleToolCall(
+    toolCall: ToolCall,
+    signal: AbortSignal,
+  ): Promise<void> {
+    if (toolCall.status !== 'scheduled') return;
 
-        const liveOutputCallback = scheduledCall.tool.canUpdateOutput
-          ? (outputChunk: ToolResultDisplay) => {
-              if (this.outputUpdateHandler) {
-                this.outputUpdateHandler(callId, outputChunk);
-              }
-              this.toolCalls = this.toolCalls.map((tc) =>
-                tc.request.callId === callId && tc.status === 'executing'
-                  ? { ...tc, liveOutput: outputChunk }
-                  : tc,
-              );
-              this.notifyToolCallsUpdate();
-            }
-          : undefined;
+    const scheduledCall = toolCall;
+    const { callId, name: toolName } = scheduledCall.request;
+    const invocation = scheduledCall.invocation;
+    this.setStatusInternal(callId, 'executing');
 
-        const shellExecutionConfig = this.config.getShellExecutionConfig();
-
-        // TODO: Refactor to remove special casing for ShellToolInvocation.
-        // Introduce a generic callbacks object for the execute method to handle
-        // things like `onPid` and `onLiveOutput`. This will make the scheduler
-        // agnostic to the invocation type.
-        let promise: Promise<ToolResult>;
-        if (invocation instanceof ShellToolInvocation) {
-          const setPidCallback = (pid: number) => {
-            this.toolCalls = this.toolCalls.map((tc) =>
-              tc.request.callId === callId && tc.status === 'executing'
-                ? { ...tc, pid }
-                : tc,
-            );
-            this.notifyToolCallsUpdate();
-          };
-          promise = invocation.execute(
-            signal,
-            liveOutputCallback,
-            shellExecutionConfig,
-            setPidCallback,
-          );
-        } else {
-          promise = invocation.execute(
-            signal,
-            liveOutputCallback,
-            shellExecutionConfig,
-          );
-        }
-
-        try {
-          const toolResult: ToolResult = await promise;
-          if (signal.aborted) {
-            this.setStatusInternal(
-              callId,
-              'cancelled',
-              'User cancelled tool execution.',
-            );
-            continue;
+    const liveOutputCallback = scheduledCall.tool.canUpdateOutput
+      ? (outputChunk: ToolResultDisplay) => {
+          if (this.outputUpdateHandler) {
+            this.outputUpdateHandler(callId, outputChunk);
           }
+          this.toolCalls = this.toolCalls.map((tc) =>
+            tc.request.callId === callId && tc.status === 'executing'
+              ? { ...tc, liveOutput: outputChunk }
+              : tc,
+          );
+          this.notifyToolCallsUpdate();
+        }
+      : undefined;
 
-          if (toolResult.error === undefined) {
-            let content = toolResult.llmContent;
-            let outputFile: string | undefined = undefined;
-            const contentLength =
-              typeof content === 'string' ? content.length : undefined;
-            if (
-              typeof content === 'string' &&
-              toolName === ShellTool.Name &&
-              this.config.getEnableToolOutputTruncation() &&
-              this.config.getTruncateToolOutputThreshold() > 0 &&
-              this.config.getTruncateToolOutputLines() > 0
-            ) {
-              const originalContentLength = content.length;
-              const threshold = this.config.getTruncateToolOutputThreshold();
-              const lines = this.config.getTruncateToolOutputLines();
-              const truncatedResult = await truncateAndSaveToFile(
-                content,
-                callId,
-                this.config.storage.getProjectTempDir(),
+    const shellExecutionConfig = this.config.getShellExecutionConfig();
+
+    // TODO: Refactor to remove special casing for ShellToolInvocation.
+    // Introduce a generic callbacks object for the execute method to handle
+    // things like `onPid` and `onLiveOutput`. This will make the scheduler
+    // agnostic to the invocation type.
+    let promise: Promise<ToolResult>;
+    if (invocation instanceof ShellToolInvocation) {
+      const setPidCallback = (pid: number) => {
+        this.toolCalls = this.toolCalls.map((tc) =>
+          tc.request.callId === callId && tc.status === 'executing'
+            ? { ...tc, pid }
+            : tc,
+        );
+        this.notifyToolCallsUpdate();
+      };
+      promise = invocation.execute(
+        signal,
+        liveOutputCallback,
+        shellExecutionConfig,
+        setPidCallback,
+      );
+    } else {
+      promise = invocation.execute(
+        signal,
+        liveOutputCallback,
+        shellExecutionConfig,
+      );
+    }
+
+    try {
+      const toolResult: ToolResult = await promise;
+      if (signal.aborted) {
+        this.setStatusInternal(
+          callId,
+          'cancelled',
+          'User cancelled tool execution.',
+        );
+        return;
+      }
+
+      if (toolResult.error === undefined) {
+        let content = toolResult.llmContent;
+        let outputFile: string | undefined = undefined;
+        const contentLength =
+          typeof content === 'string' ? content.length : undefined;
+        if (
+          typeof content === 'string' &&
+          toolName === ShellTool.Name &&
+          this.config.getEnableToolOutputTruncation() &&
+          this.config.getTruncateToolOutputThreshold() > 0 &&
+          this.config.getTruncateToolOutputLines() > 0
+        ) {
+          const originalContentLength = content.length;
+          const threshold = this.config.getTruncateToolOutputThreshold();
+          const lines = this.config.getTruncateToolOutputLines();
+          const truncatedResult = await truncateAndSaveToFile(
+            content,
+            callId,
+            this.config.storage.getProjectTempDir(),
+            threshold,
+            lines,
+          );
+          content = truncatedResult.content;
+          outputFile = truncatedResult.outputFile;
+
+          if (outputFile) {
+            logToolOutputTruncated(
+              this.config,
+              new ToolOutputTruncatedEvent(scheduledCall.request.prompt_id, {
+                toolName,
+                originalContentLength,
+                truncatedContentLength: content.length,
                 threshold,
                 lines,
-              );
-              content = truncatedResult.content;
-              outputFile = truncatedResult.outputFile;
-
-              if (outputFile) {
-                logToolOutputTruncated(
-                  this.config,
-                  new ToolOutputTruncatedEvent(
-                    scheduledCall.request.prompt_id,
-                    {
-                      toolName,
-                      originalContentLength,
-                      truncatedContentLength: content.length,
-                      threshold,
-                      lines,
-                    },
-                  ),
-                );
-              }
-            }
-
-            const response = convertToFunctionResponse(
-              toolName,
-              callId,
-              content,
-            );
-            const successResponse: ToolCallResponseInfo = {
-              callId,
-              responseParts: response,
-              resultDisplay: toolResult.returnDisplay,
-              error: undefined,
-              errorType: undefined,
-              outputFile,
-              contentLength,
-            };
-            this.setStatusInternal(callId, 'success', successResponse);
-          } else {
-            // It is a failure
-            const error = new Error(toolResult.error.message);
-            const errorResponse = createErrorResponse(
-              scheduledCall.request,
-              error,
-              toolResult.error.type,
-            );
-            this.setStatusInternal(callId, 'error', errorResponse);
-          }
-        } catch (executionError: unknown) {
-          if (signal.aborted) {
-            this.setStatusInternal(
-              callId,
-              'cancelled',
-              'User cancelled tool execution.',
-            );
-          } else {
-            this.setStatusInternal(
-              callId,
-              'error',
-              createErrorResponse(
-                scheduledCall.request,
-                executionError instanceof Error
-                  ? executionError
-                  : new Error(String(executionError)),
-                ToolErrorType.UNHANDLED_EXCEPTION,
-              ),
+              }),
             );
           }
         }
+
+        const response = convertToFunctionResponse(toolName, callId, content);
+        const successResponse: ToolCallResponseInfo = {
+          callId,
+          responseParts: response,
+          resultDisplay: toolResult.returnDisplay,
+          error: undefined,
+          errorType: undefined,
+          outputFile,
+          contentLength,
+        };
+        this.setStatusInternal(callId, 'success', successResponse);
+      } else {
+        // It is a failure
+        const error = new Error(toolResult.error.message);
+        const errorResponse = createErrorResponse(
+          scheduledCall.request,
+          error,
+          toolResult.error.type,
+        );
+        this.setStatusInternal(callId, 'error', errorResponse);
+      }
+    } catch (executionError: unknown) {
+      if (signal.aborted) {
+        this.setStatusInternal(
+          callId,
+          'cancelled',
+          'User cancelled tool execution.',
+        );
+      } else {
+        this.setStatusInternal(
+          callId,
+          'error',
+          createErrorResponse(
+            scheduledCall.request,
+            executionError instanceof Error
+              ? executionError
+              : new Error(String(executionError)),
+            ToolErrorType.UNHANDLED_EXCEPTION,
+          ),
+        );
       }
     }
   }
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 0bac7066f..2e1923355 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -286,6 +286,12 @@ export class GeminiChat {
         let lastError: unknown = new Error('Request failed after all retries.');
         let rateLimitRetryCount = 0;
 
+        // Read per-config overrides; fall back to built-in defaults.
+        const cgConfig = self.config.getContentGeneratorConfig();
+        const maxRateLimitRetries =
+          cgConfig?.maxRetries ?? RATE_LIMIT_RETRY_OPTIONS.maxRetries;
+        const extraRetryErrorCodes = cgConfig?.retryErrorCodes;
+
         for (
           let attempt = 0;
           attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
@@ -316,18 +322,15 @@ export class GeminiChat {
             // These arrive as StreamContentError with finish_reason="error_finish"
             // from the pipeline, containing the throttling message in the content.
             // Covers TPM throttling, GLM rate limits, and other provider throttling.
-            const isRateLimit = isRateLimitError(error);
-            if (
-              isRateLimit &&
-              rateLimitRetryCount < RATE_LIMIT_RETRY_OPTIONS.maxRetries
-            ) {
+            const isRateLimit = isRateLimitError(error, extraRetryErrorCodes);
+            if (isRateLimit && rateLimitRetryCount < maxRateLimitRetries) {
               rateLimitRetryCount++;
               const delayMs = RATE_LIMIT_RETRY_OPTIONS.delayMs;
               const message = parseAndFormatApiError(
                 error instanceof Error ? error.message : String(error),
               );
               debugLogger.warn(
-                `Rate limit throttling detected (retry ${rateLimitRetryCount}/${RATE_LIMIT_RETRY_OPTIONS.maxRetries}). ` +
+                `Rate limit throttling detected (retry ${rateLimitRetryCount}/${maxRateLimitRetries}). ` +
                   `Waiting ${delayMs / 1000}s before retrying...`,
               );
               yield {
@@ -335,7 +338,7 @@ export class GeminiChat {
                 retryInfo: {
                   message,
                   attempt: rateLimitRetryCount,
-                  maxRetries: RATE_LIMIT_RETRY_OPTIONS.maxRetries,
+                  maxRetries: maxRateLimitRetries,
                   delayMs,
                 },
               };
diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
index 88e9e2c87..3c64c1267 100644
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
@@ -154,7 +154,6 @@ export class LoggingContentGenerator implements ContentGenerator {
         response.modelVersion || req.model,
         userPromptId,
         response.usageMetadata,
-        JSON.stringify(response),
       );
       await this.logOpenAIInteraction(openaiRequest, response);
       return response;
@@ -219,7 +218,6 @@ export class LoggingContentGenerator implements ContentGenerator {
         responses[0]?.modelVersion || model,
         userPromptId,
         lastUsageMetadata,
-        JSON.stringify(responses),
       );
       const consolidatedResponse =
         this.consolidateGeminiResponsesForLogging(responses);
diff --git a/packages/core/src/core/modalityDefaults.test.ts b/packages/core/src/core/modalityDefaults.test.ts
new file mode 100644
index 000000000..b90bc069e
--- /dev/null
+++ b/packages/core/src/core/modalityDefaults.test.ts
@@ -0,0 +1,213 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { defaultModalities } from './modalityDefaults.js';
+
+describe('defaultModalities', () => {
+  describe('Google Gemini', () => {
+    it('returns full multimodal for gemini-3-pro', () => {
+      expect(defaultModalities('gemini-3-pro-preview')).toEqual({
+        image: true,
+        pdf: true,
+        audio: true,
+        video: true,
+      });
+    });
+
+    it('returns full multimodal for gemini-3-flash', () => {
+      expect(defaultModalities('gemini-3-flash-preview')).toEqual({
+        image: true,
+        pdf: true,
+        audio: true,
+        video: true,
+      });
+    });
+
+    it('returns full multimodal for gemini-3.1-pro', () => {
+      expect(defaultModalities('gemini-3.1-pro-preview')).toEqual({
+        image: true,
+        pdf: true,
+        audio: true,
+        video: true,
+      });
+    });
+
+    it('returns full multimodal for gemini-2.5-pro', () => {
+      expect(defaultModalities('gemini-2.5-pro')).toEqual({
+        image: true,
+        pdf: true,
+        audio: true,
+        video: true,
+      });
+    });
+
+    it('returns full multimodal for gemini-1.5-flash', () => {
+      expect(defaultModalities('gemini-1.5-flash')).toEqual({
+        image: true,
+        pdf: true,
+        audio: true,
+        video: true,
+      });
+    });
+  });
+
+  describe('OpenAI', () => {
+    it('returns image for gpt-5.2', () => {
+      const m = defaultModalities('gpt-5.2');
+      expect(m.image).toBe(true);
+      expect(m.audio).toBeUndefined();
+      expect(m.pdf).toBeUndefined();
+      expect(m.video).toBeUndefined();
+    });
+
+    it('returns image for gpt-5-mini', () => {
+      expect(defaultModalities('gpt-5-mini').image).toBe(true);
+    });
+
+    it('returns image for gpt-4o', () => {
+      expect(defaultModalities('gpt-4o').image).toBe(true);
+    });
+
+    it('returns image for o3', () => {
+      expect(defaultModalities('o3').image).toBe(true);
+    });
+  });
+
+  describe('Anthropic Claude', () => {
+    it('returns image + pdf for claude-opus-4-6', () => {
+      const m = defaultModalities('claude-opus-4-6');
+      expect(m.image).toBe(true);
+      expect(m.pdf).toBe(true);
+      expect(m.audio).toBeUndefined();
+      expect(m.video).toBeUndefined();
+    });
+
+    it('returns image + pdf for claude-sonnet-4-6', () => {
+      const m = defaultModalities('claude-sonnet-4-6');
+      expect(m.image).toBe(true);
+      expect(m.pdf).toBe(true);
+    });
+
+    it('returns image + pdf for claude-sonnet-4', () => {
+      const m = defaultModalities('claude-sonnet-4');
+      expect(m.image).toBe(true);
+      expect(m.pdf).toBe(true);
+    });
+
+    it('returns image + pdf for claude-3.5-sonnet', () => {
+      const m = defaultModalities('claude-3.5-sonnet');
+      expect(m.image).toBe(true);
+      expect(m.pdf).toBe(true);
+    });
+  });
+
+  describe('Qwen', () => {
+    it('returns image + video for qwen-vl-max', () => {
+      const m = defaultModalities('qwen-vl-max');
+      expect(m.image).toBe(true);
+      expect(m.video).toBe(true);
+      expect(m.pdf).toBeUndefined();
+      expect(m.audio).toBeUndefined();
+    });
+
+    it('returns image + video for qwen3-vl-plus', () => {
+      const m = defaultModalities('qwen3-vl-plus');
+      expect(m.image).toBe(true);
+      expect(m.video).toBe(true);
+    });
+
+    it('returns text-only for qwen3-coder-plus', () => {
+      expect(defaultModalities('qwen3-coder-plus')).toEqual({});
+    });
+
+    it('returns image + video for coder-model (same as qwen3.5-plus)', () => {
+      expect(defaultModalities('coder-model')).toEqual({
+        image: true,
+        video: true,
+      });
+    });
+
+    it('returns image + video for qwen3.5-plus', () => {
+      const m = defaultModalities('qwen3.5-plus');
+      expect(m.image).toBe(true);
+      expect(m.video).toBe(true);
+      expect(m.pdf).toBeUndefined();
+      expect(m.audio).toBeUndefined();
+    });
+
+    it('returns text-only for qwen-turbo', () => {
+      expect(defaultModalities('qwen-turbo')).toEqual({});
+    });
+  });
+
+  describe('DeepSeek', () => {
+    it('returns text-only for deepseek-chat', () => {
+      expect(defaultModalities('deepseek-chat')).toEqual({});
+    });
+
+    it('returns text-only for deepseek-reasoner', () => {
+      expect(defaultModalities('deepseek-reasoner')).toEqual({});
+    });
+  });
+
+  describe('Zhipu GLM', () => {
+    it('returns image for glm-4.5v', () => {
+      const m = defaultModalities('glm-4.5v');
+      expect(m.image).toBe(true);
+      expect(m.pdf).toBeUndefined();
+    });
+
+    it('returns text-only for glm-5', () => {
+      expect(defaultModalities('glm-5')).toEqual({});
+    });
+
+    it('returns text-only for glm-4.7', () => {
+      expect(defaultModalities('glm-4.7')).toEqual({});
+    });
+  });
+
+  describe('MiniMax', () => {
+    it('returns text-only for MiniMax-M2.5', () => {
+      expect(defaultModalities('MiniMax-M2.5')).toEqual({});
+    });
+  });
+
+  describe('Kimi', () => {
+    it('returns image + video for kimi-k2.5', () => {
+      const m = defaultModalities('kimi-k2.5');
+      expect(m.image).toBe(true);
+      expect(m.video).toBe(true);
+      expect(m.pdf).toBeUndefined();
+      expect(m.audio).toBeUndefined();
+    });
+
+    it('returns text-only for kimi-k2', () => {
+      expect(defaultModalities('kimi-k2')).toEqual({});
+    });
+  });
+
+  describe('unknown models', () => {
+    it('returns text-only for unrecognized models', () => {
+      expect(defaultModalities('some-random-model-xyz')).toEqual({});
+    });
+  });
+
+  describe('normalization', () => {
+    it('normalizes provider prefixes', () => {
+      expect(defaultModalities('openai/gpt-4o')).toEqual(
+        defaultModalities('gpt-4o'),
+      );
+    });
+
+    it('returns a fresh copy each time', () => {
+      const a = defaultModalities('gemini-2.5-pro');
+      const b = defaultModalities('gemini-2.5-pro');
+      expect(a).toEqual(b);
+      expect(a).not.toBe(b);
+    });
+  });
+});
diff --git a/packages/core/src/core/modalityDefaults.ts b/packages/core/src/core/modalityDefaults.ts
new file mode 100644
index 000000000..f17927325
--- /dev/null
+++ b/packages/core/src/core/modalityDefaults.ts
@@ -0,0 +1,94 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { InputModalities } from './contentGenerator.js';
+import { normalize } from './tokenLimits.js';
+
+const FULL_MULTIMODAL: InputModalities = {
+  image: true,
+  pdf: true,
+  audio: true,
+  video: true,
+};
+
+/**
+ * Ordered regex patterns: most specific -> most general (first match wins).
+ * Default for unknown models is text-only (empty object = all false).
+ */
+const MODALITY_PATTERNS: Array<[RegExp, InputModalities]> = [
+  // -------------------
+  // Google Gemini — full multimodal
+  // -------------------
+  [/^gemini-3/, FULL_MULTIMODAL],
+  [/^gemini-/, FULL_MULTIMODAL],
+
+  // -------------------
+  // OpenAI — image by default for all gpt/o-series models
+  // -------------------
+  [/^gpt-5/, { image: true }],
+  [/^gpt-/, { image: true }],
+  [/^o\d/, { image: true }],
+
+  // -------------------
+  // Anthropic Claude — image + pdf
+  // -------------------
+  [/^claude-/, { image: true, pdf: true }],
+
+  // -------------------
+  // Alibaba / Qwen
+  // -------------------
+  // Qwen3.5-Plus: image support
+  [/^qwen3\.5-plus/, { image: true, video: true }],
+  [/^coder-model$/, { image: true, video: true }],
+
+  // Qwen VL (vision-language) models: image + video
+  [/^qwen-vl-/, { image: true, video: true }],
+  [/^qwen3-vl-/, { image: true, video: true }],
+
+  // Qwen coder / text models: text-only
+  [/^qwen3-coder-/, {}],
+  [/^qwen/, {}],
+
+  // -------------------
+  // DeepSeek — text-only
+  // -------------------
+  [/^deepseek/, {}],
+
+  // -------------------
+  // Zhipu GLM
+  // -------------------
+  [/^glm-4\.5v/, { image: true }],
+  [/^glm-5(?:-|$)/, {}],
+  [/^glm-/, {}],
+
+  // -------------------
+  // MiniMax — text-only
+  // -------------------
+  [/^minimax-/, {}],
+
+  // -------------------
+  // Moonshot / Kimi
+  // -------------------
+  [/^kimi-k2\.5/, { image: true, video: true }],
+  [/^kimi-/, {}],
+];
+
+/**
+ * Return the default input modalities for a model based on its name.
+ *
+ * Uses the same normalize-then-regex pattern as {@link tokenLimit}.
+ * Unknown models default to text-only (empty object) to avoid sending
+ * unsupported media types that would cause unrecoverable API errors.
+ */
+export function defaultModalities(model: string): InputModalities {
+  const norm = normalize(model);
+  for (const [regex, modalities] of MODALITY_PATTERNS) {
+    if (regex.test(norm)) {
+      return { ...modalities };
+    }
+  }
+  return {};
+}
diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts
index 36bbc812d..edad4992c 100644
--- a/packages/core/src/core/openaiContentGenerator/converter.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts
@@ -22,7 +22,12 @@ describe('OpenAIContentConverter', () => {
   let converter: OpenAIContentConverter;
 
   beforeEach(() => {
-    converter = new OpenAIContentConverter('test-model');
+    converter = new OpenAIContentConverter('test-model', 'auto', {
+      image: true,
+      pdf: true,
+      audio: true,
+      video: true,
+    });
   });
 
   describe('resetStreamingToolCalls', () => {
@@ -1684,7 +1689,12 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
   let converter: OpenAIContentConverter;
 
   beforeEach(() => {
-    converter = new OpenAIContentConverter('test-model');
+    converter = new OpenAIContentConverter('test-model', 'auto', {
+      image: true,
+      pdf: true,
+      audio: true,
+      video: true,
+    });
   });
 
   it('should preserve MCP multi-text content in tool message (not leak to user message)', () => {
@@ -1957,3 +1967,159 @@ describe('MCP tool result end-to-end through OpenAI converter (issue #1520)', ()
     expect(contentArray[1].image_url?.url).toContain('data:image/png');
   });
 });
+
+describe('modality filtering', () => {
+  function makeRequest(parts: Part[]): GenerateContentParameters {
+    return {
+      model: 'test-model',
+      contents: [{ role: 'user', parts }],
+    };
+  }
+
+  function getUserContentParts(
+    messages: OpenAI.Chat.ChatCompletionMessageParam[],
+  ): Array<{ type: string; text?: string }> {
+    const userMsg = messages.find((m) => m.role === 'user');
+    if (
+      !userMsg ||
+      !('content' in userMsg) ||
+      !Array.isArray(userMsg.content)
+    ) {
+      return [];
+    }
+    return userMsg.content as Array<{ type: string; text?: string }>;
+  }
+
+  it('replaces image with placeholder when image modality is disabled', () => {
+    const conv = new OpenAIContentConverter('deepseek-chat', 'auto', {});
+    const request = makeRequest([
+      {
+        inlineData: { mimeType: 'image/png', data: 'abc123' },
+        displayName: 'screenshot.png',
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('image file');
+    expect(parts[0].text).toContain('does not support image input');
+  });
+
+  it('keeps image when image modality is enabled', () => {
+    const conv = new OpenAIContentConverter('gpt-4o', 'auto', { image: true });
+    const request = makeRequest([
+      {
+        inlineData: { mimeType: 'image/png', data: 'abc123' },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('image_url');
+  });
+
+  it('replaces PDF with placeholder when pdf modality is disabled', () => {
+    const conv = new OpenAIContentConverter('test-model', 'auto', {
+      image: true,
+    });
+    const request = makeRequest([
+      {
+        inlineData: {
+          mimeType: 'application/pdf',
+          data: 'pdf-data',
+          displayName: 'doc.pdf',
+        },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('pdf file');
+    expect(parts[0].text).toContain('does not support PDF input');
+  });
+
+  it('keeps PDF when pdf modality is enabled', () => {
+    const conv = new OpenAIContentConverter('claude-sonnet', 'auto', {
+      image: true,
+      pdf: true,
+    });
+    const request = makeRequest([
+      {
+        inlineData: {
+          mimeType: 'application/pdf',
+          data: 'pdf-data',
+          displayName: 'doc.pdf',
+        },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('file');
+  });
+
+  it('replaces video with placeholder when video modality is disabled', () => {
+    const conv = new OpenAIContentConverter('test-model', 'auto', {});
+    const request = makeRequest([
+      {
+        inlineData: { mimeType: 'video/mp4', data: 'vid-data' },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('video file');
+  });
+
+  it('replaces audio with placeholder when audio modality is disabled', () => {
+    const conv = new OpenAIContentConverter('test-model', 'auto', {});
+    const request = makeRequest([
+      {
+        inlineData: { mimeType: 'audio/wav', data: 'audio-data' },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('audio file');
+  });
+
+  it('handles mixed content: keeps text + supported media, replaces unsupported', () => {
+    const conv = new OpenAIContentConverter('gpt-4o', 'auto', { image: true });
+    const request = makeRequest([
+      { text: 'Analyze these files' },
+      {
+        inlineData: { mimeType: 'image/png', data: 'img-data' },
+      } as unknown as Part,
+      {
+        inlineData: { mimeType: 'video/mp4', data: 'vid-data' },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(3);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toBe('Analyze these files');
+    expect(parts[1].type).toBe('image_url');
+    expect(parts[2].type).toBe('text');
+    expect(parts[2].text).toContain('video file');
+  });
+
+  it('defaults to text-only when no modalities are specified', () => {
+    const conv = new OpenAIContentConverter('unknown-model');
+    const request = makeRequest([
+      {
+        inlineData: { mimeType: 'image/png', data: 'img-data' },
+      } as unknown as Part,
+    ]);
+    const messages = conv.convertGeminiRequestToOpenAI(request);
+    const parts = getUserContentParts(messages);
+    expect(parts).toHaveLength(1);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('image file');
+  });
+});
diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts
index 2ca7428bd..bdfc0286e 100644
--- a/packages/core/src/core/openaiContentGenerator/converter.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.ts
@@ -20,12 +20,16 @@ import type {
 import { GenerateContentResponse, FinishReason } from '@google/genai';
 import type OpenAI from 'openai';
 import { safeJsonParse } from '../../utils/safeJsonParse.js';
+import { createDebugLogger } from '../../utils/debugLogger.js';
+import type { InputModalities } from '../contentGenerator.js';
 import { StreamingToolCallParser } from './streamingToolCallParser.js';
 import {
   convertSchema,
   type SchemaComplianceMode,
 } from '../../utils/schemaConverter.js';
 
+const debugLogger = createDebugLogger('CONVERTER');
+
 /**
  * Extended usage type that supports both OpenAI standard format and alternative formats
  * Some models return cached_tokens at the top level instead of in prompt_tokens_details
@@ -92,12 +96,18 @@ type OpenAIContentPart =
 export class OpenAIContentConverter {
   private model: string;
   private schemaCompliance: SchemaComplianceMode;
+  private modalities: InputModalities;
   private streamingToolCallParser: StreamingToolCallParser =
     new StreamingToolCallParser();
 
-  constructor(model: string, schemaCompliance: SchemaComplianceMode = 'auto') {
+  constructor(
+    model: string,
+    schemaCompliance: SchemaComplianceMode = 'auto',
+    modalities: InputModalities = {},
+  ) {
     this.model = model;
     this.schemaCompliance = schemaCompliance;
+    this.modalities = modalities;
   }
 
   /**
@@ -108,6 +118,13 @@ export class OpenAIContentConverter {
     this.model = model;
   }
 
+  /**
+   * Update the supported input modalities.
+   */
+  setModalities(modalities: InputModalities): void {
+    this.modalities = modalities;
+  }
+
   /**
    * Reset streaming tool calls parser for new stream processing
    * This should be called at the beginning of each stream to prevent
@@ -585,13 +602,19 @@ export class OpenAIContentConverter {
   }
 
   /**
-   * Create OpenAI media content part from Gemini part
+   * Create OpenAI media content part from Gemini part.
+   * Checks modality support before building each media type.
    */
   private createMediaContentPart(part: Part): OpenAIContentPart | null {
     if (part.inlineData?.mimeType && part.inlineData?.data) {
       const mimeType = part.inlineData.mimeType;
       const mediaType = this.getMediaType(mimeType);
+      const displayName = part.inlineData.displayName || mimeType;
+
       if (mediaType === 'image') {
+        if (!this.modalities.image) {
+          return this.unsupportedModalityPlaceholder('image', displayName);
+        }
         const dataUrl = `data:${mimeType};base64,${part.inlineData.data}`;
         return {
           type: 'image_url' as const,
@@ -600,6 +623,9 @@ export class OpenAIContentConverter {
       }
 
       if (mimeType === 'application/pdf') {
+        if (!this.modalities.pdf) {
+          return this.unsupportedModalityPlaceholder('pdf', displayName);
+        }
         const filename = part.inlineData.displayName || 'document.pdf';
         return {
           type: 'file' as const,
@@ -611,6 +637,9 @@ export class OpenAIContentConverter {
       }
 
       if (mediaType === 'audio') {
+        if (!this.modalities.audio) {
+          return this.unsupportedModalityPlaceholder('audio', displayName);
+        }
         const format = this.getAudioFormat(mimeType);
         if (format) {
           return {
@@ -624,6 +653,9 @@ export class OpenAIContentConverter {
       }
 
       if (mediaType === 'video') {
+        if (!this.modalities.video) {
+          return this.unsupportedModalityPlaceholder('video', displayName);
+        }
         return {
           type: 'video_url' as const,
           video_url: {
@@ -632,12 +664,9 @@ export class OpenAIContentConverter {
         };
       }
 
-      const displayName = part.inlineData.displayName
-        ? ` (${part.inlineData.displayName})`
-        : '';
       return {
         type: 'text' as const,
-        text: `Unsupported inline media type: ${mimeType}${displayName}.`,
+        text: `Unsupported inline media type: ${mimeType} (${displayName}).`,
       };
     }
 
@@ -648,6 +677,9 @@ export class OpenAIContentConverter {
       const mediaType = this.getMediaType(mimeType);
 
       if (mediaType === 'image') {
+        if (!this.modalities.image) {
+          return this.unsupportedModalityPlaceholder('image', filename);
+        }
         return {
           type: 'image_url' as const,
           image_url: { url: fileUri },
@@ -655,6 +687,9 @@ export class OpenAIContentConverter {
       }
 
       if (mimeType === 'application/pdf') {
+        if (!this.modalities.pdf) {
+          return this.unsupportedModalityPlaceholder('pdf', filename);
+        }
         return {
           type: 'file' as const,
           file: {
@@ -665,6 +700,9 @@ export class OpenAIContentConverter {
       }
 
       if (mediaType === 'video') {
+        if (!this.modalities.video) {
+          return this.unsupportedModalityPlaceholder('video', filename);
+        }
         return {
           type: 'video_url' as const,
           video_url: {
@@ -673,18 +711,42 @@ export class OpenAIContentConverter {
         };
       }
 
-      const displayName = part.fileData.displayName
+      const displayNameStr = part.fileData.displayName
         ? ` (${part.fileData.displayName})`
         : '';
       return {
         type: 'text' as const,
-        text: `Unsupported file media type: ${mimeType}${displayName}.`,
+        text: `Unsupported file media type: ${mimeType}${displayNameStr}.`,
       };
     }
 
     return null;
   }
 
+  /**
+   * Create a text placeholder for unsupported modalities.
+   */
+  private unsupportedModalityPlaceholder(
+    modality: string,
+    displayName: string,
+  ): OpenAIContentPart {
+    debugLogger.warn(
+      `Model '${this.model}' does not support ${modality} input. ` +
+        `Replacing with text placeholder: ${displayName}`,
+    );
+    let hint: string;
+    if (modality === 'pdf') {
+      hint =
+        'This model does not support PDF input directly. The read_file tool cannot extract PDF content either. To extract text from the PDF file, try using skills if applicable, or guide user to install pdf skill by running this slash command:\n/extensions install https://github.com/anthropics/skills:document-skills';
+    } else {
+      hint = `This model does not support ${modality} input. The read_file tool cannot process this type of file either. To handle this file, try using skills if applicable, or any tools installed at system wide, or let the user know you cannot process this type of file.`;
+    }
+    return {
+      type: 'text' as const,
+      text: `[Unsupported ${modality} file: "${displayName}". ${hint}]`,
+    };
+  }
+
   /**
    * Determine media type from MIME type
    */
diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
index 964f768a3..d71e23e91 100644
--- a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
@@ -47,6 +47,7 @@ describe('ContentGenerationPipeline', () => {
     // Mock converter
     mockConverter = {
       setModel: vi.fn(),
+      setModalities: vi.fn(),
       convertGeminiRequestToOpenAI: vi.fn(),
       convertOpenAIResponseToGemini: vi.fn(),
       convertOpenAIChunkToGemini: vi.fn(),
@@ -104,6 +105,7 @@ describe('ContentGenerationPipeline', () => {
       expect(OpenAIContentConverter).toHaveBeenCalledWith(
         'test-model',
         undefined,
+        {},
       );
     });
   });
diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts
index 1865adb48..8d2cc9fc7 100644
--- a/packages/core/src/core/openaiContentGenerator/pipeline.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts
@@ -46,6 +46,7 @@ export class ContentGenerationPipeline {
     this.converter = new OpenAIContentConverter(
       this.contentGeneratorConfig.model,
       this.contentGeneratorConfig.schemaCompliance,
+      this.contentGeneratorConfig.modalities ?? {},
     );
   }
 
@@ -58,6 +59,7 @@ export class ContentGenerationPipeline {
     // that is not valid/available for the OpenAI-compatible backend.
     const effectiveModel = this.contentGeneratorConfig.model;
     this.converter.setModel(effectiveModel);
+    this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {});
     return this.executeWithErrorHandling(
       request,
       userPromptId,
@@ -85,6 +87,7 @@ export class ContentGenerationPipeline {
   ): Promise<AsyncGenerator<GenerateContentResponse>> {
     const effectiveModel = this.contentGeneratorConfig.model;
     this.converter.setModel(effectiveModel);
+    this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {});
     return this.executeWithErrorHandling(
       request,
       userPromptId,
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
index f9d7a0fd6..2e528120a 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@@ -733,7 +733,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
   describe('output token limits', () => {
     it('should limit max_tokens when it exceeds model limit', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         max_tokens: 100000, // Exceeds the model's output limit
       };
@@ -757,7 +757,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
     it('should not modify max_tokens when it is within model limit', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         max_tokens: 1000, // Within the model's output limit
       };
@@ -769,7 +769,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
     it('should not add max_tokens when not present in request', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         // No max_tokens parameter
       };
@@ -781,7 +781,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
     it('should handle null max_tokens parameter', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         max_tokens: null,
       };
@@ -800,12 +800,12 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      expect(result.max_tokens).toBe(4096); // Should be limited to default output limit (4K)
+      expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K)
     });
 
     it('should preserve other request parameters when limiting max_tokens', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         max_tokens: 100000, // Will be limited
         temperature: 0.8,
@@ -872,12 +872,10 @@ describe('DashScopeOpenAICompatibleProvider', () => {
             ],
           },
         ],
-        max_tokens: 50000,
       };
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      expect(result.max_tokens).toBe(32768); // Limited to model's output limit (32K)
       expect(
         (result as { vl_high_resolution_images?: boolean })
           .vl_high_resolution_images,
@@ -904,8 +902,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      // coder-model has 64K output limit, so max_tokens should be capped
-      expect(result.max_tokens).toBe(65536);
+      expect(result.max_tokens).toBe(65536); // Limited to model's output limit (64K)
       expect(
         (result as { vl_high_resolution_images?: boolean })
           .vl_high_resolution_images,
@@ -914,7 +911,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
     it('should handle streaming requests with output token limits', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
+        model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
         max_tokens: 100000, // Exceeds the model's output limit
         stream: true,
diff --git a/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts b/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
index 68693393b..9a69cd326 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/deepseek.test.ts
@@ -5,7 +5,6 @@
  */
 
 import { describe, it, expect, vi, beforeEach } from 'vitest';
-import type OpenAI from 'openai';
 import { DeepSeekOpenAICompatibleProvider } from './deepseek.js';
 import type { ContentGeneratorConfig } from '../../contentGenerator.js';
 import type { Config } from '../../../config/config.js';
@@ -18,7 +17,6 @@ vi.mock('openai', () => ({
 }));
 
 describe('DeepSeekOpenAICompatibleProvider', () => {
-  let provider: DeepSeekOpenAICompatibleProvider;
   let mockContentGeneratorConfig: ContentGeneratorConfig;
   let mockCliConfig: Config;
 
@@ -34,11 +32,6 @@ describe('DeepSeekOpenAICompatibleProvider', () => {
     mockCliConfig = {
       getCliVersion: vi.fn().mockReturnValue('1.0.0'),
     } as unknown as Config;
-
-    provider = new DeepSeekOpenAICompatibleProvider(
-      mockContentGeneratorConfig,
-      mockCliConfig,
-    );
   });
 
   describe('isDeepSeekProvider', () => {
@@ -61,72 +54,15 @@ describe('DeepSeekOpenAICompatibleProvider', () => {
     });
   });
 
-  describe('buildRequest', () => {
-    const userPromptId = 'prompt-123';
-
-    it('converts array content into a string', () => {
-      const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'deepseek-chat',
-        messages: [
-          {
-            role: 'user',
-            content: [
-              { type: 'text', text: 'Hello' },
-              { type: 'text', text: ' world' },
-            ],
-          },
-        ],
-      };
-
-      const result = provider.buildRequest(originalRequest, userPromptId);
-
-      expect(result.messages).toHaveLength(1);
-      expect(result.messages?.[0]).toEqual({
-        role: 'user',
-        content: 'Hello world',
+  describe('getDefaultGenerationConfig', () => {
+    it('returns temperature 0', () => {
+      const provider = new DeepSeekOpenAICompatibleProvider(
+        mockContentGeneratorConfig,
+        mockCliConfig,
+      );
+      expect(provider.getDefaultGenerationConfig()).toEqual({
+        temperature: 0,
       });
-      expect(originalRequest.messages?.[0].content).toEqual([
-        { type: 'text', text: 'Hello' },
-        { type: 'text', text: ' world' },
-      ]);
-    });
-
-    it('leaves string content unchanged', () => {
-      const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'deepseek-chat',
-        messages: [
-          {
-            role: 'user',
-            content: 'Hello world',
-          },
-        ],
-      };
-
-      const result = provider.buildRequest(originalRequest, userPromptId);
-
-      expect(result.messages?.[0].content).toBe('Hello world');
-    });
-
-    it('throws when encountering non-text multimodal parts', () => {
-      const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'deepseek-chat',
-        messages: [
-          {
-            role: 'user',
-            content: [
-              { type: 'text', text: 'Hello' },
-              {
-                type: 'image_url',
-                image_url: { url: 'https://example.com/image.png' },
-              },
-            ],
-          },
-        ],
-      };
-
-      expect(() =>
-        provider.buildRequest(originalRequest, userPromptId),
-      ).toThrow(/only supports text content/i);
     });
   });
 });
diff --git a/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts b/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
index 9b5fd7479..0e246725f 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/deepseek.ts
@@ -4,7 +4,6 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type OpenAI from 'openai';
 import type { Config } from '../../../config/config.js';
 import type { ContentGeneratorConfig } from '../../contentGenerator.js';
 import { DefaultOpenAICompatibleProvider } from './default.js';
@@ -26,58 +25,6 @@ export class DeepSeekOpenAICompatibleProvider extends DefaultOpenAICompatiblePro
     return baseUrl.toLowerCase().includes('api.deepseek.com');
   }
 
-  override buildRequest(
-    request: OpenAI.Chat.ChatCompletionCreateParams,
-    userPromptId: string,
-  ): OpenAI.Chat.ChatCompletionCreateParams {
-    const baseRequest = super.buildRequest(request, userPromptId);
-    if (!baseRequest.messages?.length) {
-      return baseRequest;
-    }
-
-    const messages = baseRequest.messages.map((message) => {
-      if (!('content' in message)) {
-        return message;
-      }
-
-      const { content } = message;
-
-      if (
-        typeof content === 'string' ||
-        content === null ||
-        content === undefined
-      ) {
-        return message;
-      }
-
-      if (!Array.isArray(content)) {
-        return message;
-      }
-
-      const text = content
-        .map((part) => {
-          if (part.type !== 'text') {
-            throw new Error(
-              `DeepSeek provider only supports text content. Found non-text part of type '${part.type}' in message with role '${message.role}'.`,
-            );
-          }
-
-          return part.text ?? '';
-        })
-        .join('');
-
-      return {
-        ...message,
-        content: text,
-      } as OpenAI.Chat.ChatCompletionMessageParam;
-    });
-
-    return {
-      ...baseRequest,
-      messages,
-    };
-  }
-
   override getDefaultGenerationConfig(): GenerateContentConfig {
     return {
       temperature: 0,
diff --git a/packages/core/src/core/tokenLimits.test.ts b/packages/core/src/core/tokenLimits.test.ts
index ffd71cd4b..edea10a10 100644
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@@ -91,183 +91,143 @@ describe('normalize', () => {
 });
 
 describe('tokenLimit', () => {
-  // Test cases for each model family
   describe('Google Gemini', () => {
-    it('should return the correct limit for Gemini 1.5 Pro', () => {
-      expect(tokenLimit('gemini-1.5-pro')).toBe(2097152);
+    it('should return 1M for Gemini 3.x (latest)', () => {
+      expect(tokenLimit('gemini-3-pro-preview')).toBe(1000000);
+      expect(tokenLimit('gemini-3-flash-preview')).toBe(1000000);
+      expect(tokenLimit('gemini-3.1-pro-preview')).toBe(1000000);
     });
-    it('should return the correct limit for Gemini 1.5 Flash', () => {
-      expect(tokenLimit('gemini-1.5-flash')).toBe(1048576);
-    });
-    it('should return the correct limit for Gemini 2.5 Pro', () => {
-      expect(tokenLimit('gemini-2.5-pro')).toBe(1048576);
-    });
-    it('should return the correct limit for Gemini 2.5 Flash', () => {
-      expect(tokenLimit('gemini-2.5-flash')).toBe(1048576);
-    });
-    it('should return the correct limit for Gemini 2.0 Flash with image generation', () => {
-      expect(tokenLimit('gemini-2.0-flash-image-generation')).toBe(32768);
-    });
-    it('should return the correct limit for Gemini 2.0 Flash', () => {
-      expect(tokenLimit('gemini-2.0-flash')).toBe(1048576);
+
+    it('should return 1M for legacy Gemini (fallback)', () => {
+      expect(tokenLimit('gemini-2.5-pro')).toBe(1000000);
+      expect(tokenLimit('gemini-2.5-flash')).toBe(1000000);
+      expect(tokenLimit('gemini-2.0-flash')).toBe(1000000);
+      expect(tokenLimit('gemini-1.5-pro')).toBe(1000000);
+      expect(tokenLimit('gemini-1.5-flash')).toBe(1000000);
     });
   });
 
   describe('OpenAI', () => {
-    it('should return the correct limit for o3-mini', () => {
-      expect(tokenLimit('o3-mini')).toBe(200000);
+    it('should return 400K for GPT-5.x (latest)', () => {
+      expect(tokenLimit('gpt-5')).toBe(400000);
+      expect(tokenLimit('gpt-5-mini')).toBe(400000);
+      expect(tokenLimit('gpt-5.2')).toBe(400000);
+      expect(tokenLimit('gpt-5.2-pro')).toBe(400000);
     });
-    it('should return the correct limit for o3 models', () => {
-      expect(tokenLimit('o3')).toBe(200000);
-    });
-    it('should return the correct limit for o4-mini', () => {
-      expect(tokenLimit('o4-mini')).toBe(200000);
-    });
-    it('should return the correct limit for gpt-4o-mini', () => {
-      expect(tokenLimit('gpt-4o-mini')).toBe(131072);
-    });
-    it('should return the correct limit for gpt-4o', () => {
+
+    it('should return 128K for legacy GPT (fallback)', () => {
       expect(tokenLimit('gpt-4o')).toBe(131072);
-    });
-    it('should return the correct limit for gpt-4.1-mini', () => {
-      expect(tokenLimit('gpt-4.1-mini')).toBe(1048576);
-    });
-    it('should return the correct limit for gpt-4.1 models', () => {
-      expect(tokenLimit('gpt-4.1')).toBe(1048576);
-    });
-    it('should return the correct limit for gpt-4', () => {
+      expect(tokenLimit('gpt-4o-mini')).toBe(131072);
+      expect(tokenLimit('gpt-4.1')).toBe(131072);
       expect(tokenLimit('gpt-4')).toBe(131072);
     });
+
+    it('should return 200K for o-series', () => {
+      expect(tokenLimit('o3')).toBe(200000);
+      expect(tokenLimit('o3-mini')).toBe(200000);
+      expect(tokenLimit('o4-mini')).toBe(200000);
+    });
   });
 
   describe('Anthropic Claude', () => {
-    it('should return the correct limit for Claude 3.5 Sonnet', () => {
+    it('should return 200K for all Claude models', () => {
+      expect(tokenLimit('claude-opus-4-6')).toBe(200000);
+      expect(tokenLimit('claude-sonnet-4-6')).toBe(200000);
+      expect(tokenLimit('claude-sonnet-4')).toBe(200000);
+      expect(tokenLimit('claude-opus-4')).toBe(200000);
       expect(tokenLimit('claude-3.5-sonnet')).toBe(200000);
-    });
-    it('should return the correct limit for Claude 3.7 Sonnet', () => {
-      expect(tokenLimit('claude-3.7-sonnet')).toBe(1048576);
-    });
-    it('should return the correct limit for Claude Sonnet 4', () => {
-      expect(tokenLimit('claude-sonnet-4')).toBe(1048576);
-    });
-    it('should return the correct limit for Claude Opus 4', () => {
-      expect(tokenLimit('claude-opus-4')).toBe(1048576);
+      expect(tokenLimit('claude-3.7-sonnet')).toBe(200000);
     });
   });
 
   describe('Alibaba Qwen', () => {
-    it('should return the correct limit for qwen3-coder commercial models', () => {
-      expect(tokenLimit('qwen3-coder-plus')).toBe(1048576);
-      expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1048576);
-      expect(tokenLimit('qwen3-coder-flash')).toBe(1048576);
-      expect(tokenLimit('qwen3-coder-flash-20250601')).toBe(1048576);
+    it('should return 1M for commercial Qwen3 models', () => {
+      expect(tokenLimit('qwen3-coder-plus')).toBe(1000000);
+      expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1000000);
+      expect(tokenLimit('qwen3-coder-flash')).toBe(1000000);
+      expect(tokenLimit('qwen3.5-plus')).toBe(1000000);
+      expect(tokenLimit('coder-model')).toBe(1000000);
     });
 
-    it('should return the correct limit for qwen3-coder open source models', () => {
+    it('should return 256K for Qwen3 non-commercial models', () => {
+      expect(tokenLimit('qwen3-max')).toBe(262144);
+      expect(tokenLimit('qwen3-max-2026-01-23')).toBe(262144);
+      expect(tokenLimit('qwen3-vl-plus')).toBe(262144);
       expect(tokenLimit('qwen3-coder-7b')).toBe(262144);
-      expect(tokenLimit('qwen3-coder-480b-a35b-instruct')).toBe(262144);
-      expect(tokenLimit('qwen3-coder-30b-a3b-instruct')).toBe(262144);
+      expect(tokenLimit('qwen3-coder-next')).toBe(262144);
     });
 
-    it('should return the correct limit for qwen3 2507 variants', () => {
-      expect(tokenLimit('qwen3-some-model-2507-instruct')).toBe(262144);
+    it('should return 1M for studio latest models', () => {
+      expect(tokenLimit('qwen-plus-latest')).toBe(1000000);
+      expect(tokenLimit('qwen-flash-latest')).toBe(1000000);
     });
 
-    it('should return the correct limit for qwen2.5-1m', () => {
-      expect(tokenLimit('qwen2.5-1m')).toBe(1048576);
-      expect(tokenLimit('qwen2.5-1m-instruct')).toBe(1048576);
-    });
-
-    it('should return the correct limit for qwen2.5', () => {
-      expect(tokenLimit('qwen2.5')).toBe(131072);
-      expect(tokenLimit('qwen2.5-instruct')).toBe(131072);
-    });
-
-    it('should return the correct limit for qwen-plus', () => {
-      expect(tokenLimit('qwen-plus-latest')).toBe(1048576);
-      expect(tokenLimit('qwen-plus')).toBe(131072);
-    });
-
-    it('should return the correct limit for qwen-flash', () => {
-      expect(tokenLimit('qwen-flash-latest')).toBe(1048576);
-    });
-
-    it('should return the correct limit for qwen-turbo', () => {
-      expect(tokenLimit('qwen-turbo')).toBe(131072);
-      expect(tokenLimit('qwen-turbo-latest')).toBe(131072);
-    });
-  });
-
-  describe('ByteDance Seed-OSS', () => {
-    it('should return the correct limit for seed-oss', () => {
-      expect(tokenLimit('seed-oss')).toBe(524288);
-    });
-  });
-
-  describe('Zhipu GLM', () => {
-    it('should return the correct limit for glm-4.5v', () => {
-      expect(tokenLimit('glm-4.5v')).toBe(65536);
-    });
-    it('should return the correct limit for glm-4.5-air', () => {
-      expect(tokenLimit('glm-4.5-air')).toBe(131072);
-    });
-    it('should return the correct limit for glm-4.5', () => {
-      expect(tokenLimit('glm-4.5')).toBe(131072);
-    });
-    it('should return the correct limit for glm-4.6', () => {
-      expect(tokenLimit('glm-4.6')).toBe(202752);
+    it('should return 256K for Qwen fallback', () => {
+      expect(tokenLimit('qwen-plus')).toBe(262144);
+      expect(tokenLimit('qwen-turbo')).toBe(262144);
+      expect(tokenLimit('qwen2.5')).toBe(262144);
+      expect(tokenLimit('qwen-vl-max-latest')).toBe(262144);
     });
   });
 
   describe('DeepSeek', () => {
-    it('should return the correct limit for deepseek-r1', () => {
+    it('should return 128K for DeepSeek models', () => {
       expect(tokenLimit('deepseek-r1')).toBe(131072);
-    });
-    it('should return the correct limit for deepseek-v3', () => {
       expect(tokenLimit('deepseek-v3')).toBe(131072);
+      expect(tokenLimit('deepseek-chat')).toBe(131072);
     });
-    it('should return the correct limit for deepseek-v3.1', () => {
-      expect(tokenLimit('deepseek-v3.1')).toBe(131072);
+  });
+
+  describe('Zhipu GLM', () => {
+    it('should return 200K for GLM-5 and GLM-4.7 (latest)', () => {
+      expect(tokenLimit('glm-5')).toBe(202752);
+      expect(tokenLimit('glm-4.7')).toBe(202752);
     });
-    it('should return the correct limit for deepseek-v3.2', () => {
-      expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
+
+    it('should return 200K for legacy GLM (fallback)', () => {
+      expect(tokenLimit('glm-4.5')).toBe(202752);
+      expect(tokenLimit('glm-4.5v')).toBe(202752);
+      expect(tokenLimit('glm-4.5-air')).toBe(202752);
+    });
+  });
+
+  describe('MiniMax', () => {
+    it('should return 1M for MiniMax-M2.5 (latest)', () => {
+      expect(tokenLimit('MiniMax-M2.5')).toBe(1000000);
+    });
+
+    it('should return 200K for MiniMax fallback', () => {
+      expect(tokenLimit('MiniMax-M2.1')).toBe(200000);
     });
   });
 
   describe('Moonshot Kimi', () => {
-    it('should return the correct limit for kimi-k2 variants', () => {
-      expect(tokenLimit('kimi-k2-0905-preview')).toBe(262144); // 256K
+    it('should return 256K for Kimi models', () => {
+      expect(tokenLimit('kimi-k2.5')).toBe(262144);
       expect(tokenLimit('kimi-k2-0905')).toBe(262144);
-      expect(tokenLimit('kimi-k2-turbo-preview')).toBe(262144);
       expect(tokenLimit('kimi-k2-turbo')).toBe(262144);
-      expect(tokenLimit('kimi-k2-0711-preview')).toBe(262144);
-      expect(tokenLimit('kimi-k2-instruct')).toBe(262144);
     });
   });
 
   describe('Other models', () => {
-    it('should return the correct limit for gpt-oss', () => {
-      expect(tokenLimit('gpt-oss')).toBe(131072);
+    it('should return correct limits for other known models', () => {
+      expect(tokenLimit('seed-oss')).toBe(524288);
     });
-    it('should return the correct limit for llama-4-scout', () => {
-      expect(tokenLimit('llama-4-scout')).toBe(10485760);
-    });
-    it('should return the correct limit for mistral-large-2', () => {
-      expect(tokenLimit('mistral-large-2')).toBe(131072);
+
+    it('should return the default token limit for unknown models', () => {
+      expect(tokenLimit('llama-4-scout')).toBe(DEFAULT_TOKEN_LIMIT);
     });
   });
 
-  // Test for default limit
   it('should return the default token limit for an unknown model', () => {
     expect(tokenLimit('unknown-model-v1.0')).toBe(DEFAULT_TOKEN_LIMIT);
+    expect(tokenLimit('mistral-large-2')).toBe(DEFAULT_TOKEN_LIMIT);
   });
 
-  // Test with complex model string
   it('should return the correct limit for a complex model string', () => {
     expect(tokenLimit('  a/b/c|GPT-4o:gpt-4o-2024-05-13-q4  ')).toBe(131072);
   });
 
-  // Test case-insensitive matching
   it('should handle case-insensitive model names', () => {
     expect(tokenLimit('GPT-4O')).toBe(131072);
     expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
@@ -275,99 +235,97 @@ describe('tokenLimit', () => {
 });
 
 describe('tokenLimit with output type', () => {
-  describe('Qwen models with output limits', () => {
-    it('should return the correct output limit for qwen3-coder-plus', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
-      expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
+  describe('latest models output limits', () => {
+    it('should return correct output limits for GPT-5.x', () => {
+      expect(tokenLimit('gpt-5.2', 'output')).toBe(131072);
+      expect(tokenLimit('gpt-5-mini', 'output')).toBe(131072);
     });
 
-    it('should return the correct output limit for qwen-vl-max-latest', () => {
+    it('should return correct output limits for Gemini 3.x', () => {
+      expect(tokenLimit('gemini-3-pro-preview', 'output')).toBe(65536);
+      expect(tokenLimit('gemini-3-flash-preview', 'output')).toBe(65536);
+    });
+
+    it('should return correct output limits for Claude 4.6', () => {
+      expect(tokenLimit('claude-opus-4-6', 'output')).toBe(131072);
+      expect(tokenLimit('claude-sonnet-4-6', 'output')).toBe(65536);
+    });
+  });
+
+  describe('legacy model output fallbacks', () => {
+    it('should return fallback output limits for legacy GPT', () => {
+      expect(tokenLimit('gpt-4o', 'output')).toBe(16384);
+    });
+
+    it('should return fallback output limits for legacy Gemini', () => {
+      expect(tokenLimit('gemini-2.5-pro', 'output')).toBe(8192);
+    });
+
+    it('should return fallback output limits for legacy Claude', () => {
+      expect(tokenLimit('claude-sonnet-4', 'output')).toBe(65536);
+      expect(tokenLimit('claude-opus-4', 'output')).toBe(65536);
+    });
+  });
+
+  describe('Qwen output limits', () => {
+    it('should return correct output limits for Qwen models', () => {
+      expect(tokenLimit('qwen3.5-plus', 'output')).toBe(65536);
+      expect(tokenLimit('qwen3-max', 'output')).toBe(65536);
+      expect(tokenLimit('qwen3-max-2026-01-23', 'output')).toBe(65536);
+      expect(tokenLimit('coder-model', 'output')).toBe(65536);
+      // Models without specific output limits fall back to default
+      expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(8192);
+      expect(tokenLimit('qwen3-coder-next', 'output')).toBe(8192);
+      expect(tokenLimit('qwen3-vl-plus', 'output')).toBe(8192);
       expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
     });
   });
 
-  describe('Default output limits', () => {
+  describe('other output limits', () => {
+    it('should return correct output limits for DeepSeek', () => {
+      expect(tokenLimit('deepseek-reasoner', 'output')).toBe(65536);
+      expect(tokenLimit('deepseek-chat', 'output')).toBe(8192);
+    });
+
+    it('should return correct output limits for GLM', () => {
+      expect(tokenLimit('glm-5', 'output')).toBe(16384);
+      expect(tokenLimit('glm-4.7', 'output')).toBe(16384);
+    });
+
+    it('should return correct output limits for MiniMax', () => {
+      expect(tokenLimit('MiniMax-M2.5', 'output')).toBe(65536);
+    });
+
+    it('should return correct output limits for Kimi', () => {
+      expect(tokenLimit('kimi-k2.5', 'output')).toBe(32768);
+    });
+  });
+
+  describe('default output limits', () => {
     it('should return the default output limit for unknown models', () => {
       expect(tokenLimit('unknown-model', 'output')).toBe(
         DEFAULT_OUTPUT_TOKEN_LIMIT,
       );
-      expect(tokenLimit('gpt-4', 'output')).toBe(DEFAULT_OUTPUT_TOKEN_LIMIT);
-      expect(tokenLimit('claude-3.5-sonnet', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-    });
-
-    it('should return the default output limit for models without specific output patterns', () => {
-      expect(tokenLimit('qwen3-coder-7b', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-      expect(tokenLimit('qwen-plus', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-      expect(tokenLimit('qwen-vl-max', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
     });
   });
 
-  describe('Input vs Output limits comparison', () => {
-    it('should return different limits for input vs output for qwen3-coder-plus', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576); // 1M input
-      expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536); // 64K output
+  describe('input vs output comparison', () => {
+    it('should return different limits for input vs output', () => {
+      expect(tokenLimit('qwen3-max', 'input')).toBe(262144);
+      expect(tokenLimit('qwen3-max', 'output')).toBe(65536);
     });
 
-    it('should return different limits for input vs output for qwen-vl-max-latest', () => {
-      expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072); // 128K input
-      expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192); // 8K output
-    });
-
-    it('should return different limits for input vs output for qwen3-vl-plus', () => {
-      expect(tokenLimit('qwen3-vl-plus', 'input')).toBe(262144); // 256K input
-      expect(tokenLimit('qwen3-vl-plus', 'output')).toBe(32768); // 32K output
-    });
-
-    it('should return same default limits for unknown models', () => {
-      expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT); // 128K input
-      expect(tokenLimit('unknown-model', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      ); // 4K output
-    });
-  });
-
-  describe('Backward compatibility', () => {
     it('should default to input type when no type is specified', () => {
-      expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); // Should be input limit
-      expect(tokenLimit('qwen-vl-max-latest')).toBe(131072); // Should be input limit
-      expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT); // Should be input default
-    });
-
-    it('should work with explicit input type', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576);
-      expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072);
-      expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT);
+      expect(tokenLimit('qwen3-coder-plus')).toBe(1000000);
+      expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT);
     });
   });
 
-  describe('Model normalization with output limits', () => {
+  describe('normalization with output limits', () => {
     it('should handle normalized model names for output limits', () => {
-      expect(tokenLimit('QWEN3-CODER-PLUS', 'output')).toBe(65536);
-      expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
+      expect(tokenLimit('QWEN3-MAX', 'output')).toBe(65536);
+      expect(tokenLimit('qwen3-max-20250601', 'output')).toBe(65536);
       expect(tokenLimit('QWEN-VL-MAX-LATEST', 'output')).toBe(8192);
     });
-
-    it('should handle complex model strings for output limits', () => {
-      expect(
-        tokenLimit(
-          '  a/b/c|QWEN3-CODER-PLUS:qwen3-coder-plus-2024-05-13  ',
-          'output',
-        ),
-      ).toBe(65536);
-      expect(
-        tokenLimit(
-          'provider/qwen-vl-max-latest:qwen-vl-max-latest-v1',
-          'output',
-        ),
-      ).toBe(8192);
-    });
   });
 });
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index 2419e51a1..d038133cb 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -9,23 +9,23 @@ type TokenCount = number;
 export type TokenLimitType = 'input' | 'output';
 
 export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
-export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 4_096; // 4K tokens
+export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens
 
 /**
  * Accurate numeric limits:
  * - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.)
- * - vendor-declared exact values (e.g., 200k -> 200000) are used as stated in docs.
+ * - vendor-declared exact values (e.g., 200k -> 200000, 1m -> 1000000) are
+ *   used as stated in docs.
  */
 const LIMITS = {
   '32k': 32_768,
   '64k': 65_536,
   '128k': 131_072,
-  '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
+  '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, etc.
   '256k': 262_144,
+  '400k': 400_000, // vendor-declared decimal, used by OpenAI GPT-5.x
   '512k': 524_288,
-  '1m': 1_048_576,
-  '2m': 2_097_152,
-  '10m': 10_485_760, // 10 million tokens
+  '1m': 1_000_000,
   // Output token limits (typically much smaller than input limits)
   '4k': 4_096,
   '8k': 8_192,
@@ -81,110 +81,64 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
   // -------------------
   // Google Gemini
   // -------------------
-  [/^gemini-1\.5-pro$/, LIMITS['2m']],
-  [/^gemini-1\.5-flash$/, LIMITS['1m']],
-  [/^gemini-2\.5-pro.*$/, LIMITS['1m']],
-  [/^gemini-2\.5-flash.*$/, LIMITS['1m']],
-  [/^gemini-2\.0-flash-image-generation$/, LIMITS['32k']],
-  [/^gemini-2\.0-flash.*$/, LIMITS['1m']],
+  [/^gemini-3/, LIMITS['1m']], // Gemini 3.x (Pro, Flash, 3.1, etc.): 1M
+  [/^gemini-/, LIMITS['1m']], // Gemini fallback (1.5, 2.x): 1M
 
   // -------------------
-  // OpenAI (o3 / o4-mini / gpt-4.1 / gpt-4o family)
-  // o3 and o4-mini document a 200,000-token context window (decimal).
-  // Note: GPT-4.1 models typically report 1_048_576 (1M) context in OpenAI announcements.
-  [/^o3(?:-mini|$).*$/, LIMITS['200k']],
-  [/^o3.*$/, LIMITS['200k']],
-  [/^o4-mini.*$/, LIMITS['200k']],
-  [/^gpt-4\.1-mini.*$/, LIMITS['1m']],
-  [/^gpt-4\.1.*$/, LIMITS['1m']],
-  [/^gpt-4o-mini.*$/, LIMITS['128k']],
-  [/^gpt-4o.*$/, LIMITS['128k']],
-  [/^gpt-4.*$/, LIMITS['128k']],
+  // OpenAI
+  // -------------------
+  [/^gpt-5/, LIMITS['400k']], // GPT-5.x: 400K
+  [/^gpt-/, LIMITS['128k']], // GPT fallback (4o, 4.1, etc.): 128K
+  [/^o\d/, LIMITS['200k']], // o-series (o3, o4-mini, etc.): 200K
 
   // -------------------
   // Anthropic Claude
-  // - Claude Sonnet / Sonnet 3.5 and related Sonnet variants: 200,000 tokens documented.
-  // - Some Sonnet/Opus models offer 1M in beta/enterprise tiers (handled separately if needed).
-  [/^claude-3\.5-sonnet.*$/, LIMITS['200k']],
-  [/^claude-3\.7-sonnet.*$/, LIMITS['1m']], // some Sonnet 3.7/Opus variants advertise 1M beta in docs
-  [/^claude-sonnet-4.*$/, LIMITS['1m']],
-  [/^claude-opus-4.*$/, LIMITS['1m']],
+  // -------------------
+  [/^claude-/, LIMITS['200k']], // All Claude models: 200K
 
   // -------------------
   // Alibaba / Qwen
   // -------------------
-  // Commercial Qwen3-Coder-Plus: 1M token context
-  [/^qwen3-coder-plus(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-plus" and date variants
-
-  // Commercial Qwen3-Coder-Flash: 1M token context
-  [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
-
-  // Commercial Qwen3.5-Plus: 1M token context
-  [/^qwen3\.5-plus(-.*)?$/, LIMITS['1m']], // catches "qwen3.5-plus" and date variants
-
-  // Generic coder-model: same as qwen3.5-plus (1M token context)
-  [/^coder-model$/, LIMITS['1m']],
-
-  // Commercial Qwen3-Max-Preview: 256K token context
-  [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['256k']], // catches "qwen3-max" or "qwen3-max-preview" and date variants
-
-  // Open-source Qwen3-Coder variants: 256K native
-  [/^qwen3-coder-.*$/, LIMITS['256k']],
-  // Open-source Qwen3 2507 variants: 256K native
-  [/^qwen3-.*-2507-.*$/, LIMITS['256k']],
-
-  // Open-source long-context Qwen2.5-1M
-  [/^qwen2\.5-1m.*$/, LIMITS['1m']],
-
-  // Standard Qwen2.5: 128K
-  [/^qwen2\.5.*$/, LIMITS['128k']],
-
-  // Studio commercial Qwen-Plus / Qwen-Flash / Qwen-Turbo
-  [/^qwen-plus-latest$/, LIMITS['1m']], // Commercial latest: 1M
-  [/^qwen-plus.*$/, LIMITS['128k']], // Standard: 128K
+  // Commercial API models (1,000,000 context)
+  [/^qwen3-coder-plus/, LIMITS['1m']],
+  [/^qwen3-coder-flash/, LIMITS['1m']],
+  [/^qwen3\.5-plus/, LIMITS['1m']],
+  [/^qwen-plus-latest$/, LIMITS['1m']],
   [/^qwen-flash-latest$/, LIMITS['1m']],
-  [/^qwen-turbo.*$/, LIMITS['128k']],
-
-  // Qwen Vision Models
-  [/^qwen3-vl-plus$/, LIMITS['256k']], // Qwen3-VL-Plus: 256K input
-  [/^qwen-vl-max.*$/, LIMITS['128k']],
-
-  // -------------------
-  // ByteDance Seed-OSS (512K)
-  // -------------------
-  [/^seed-oss.*$/, LIMITS['512k']],
-
-  // -------------------
-  // Zhipu GLM
-  // -------------------
-  [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
-  [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
-  [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
-  [/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
-  [/^glm-4\.7(?:-.*)?$/, LIMITS['200k']],
+  [/^coder-model$/, LIMITS['1m']],
+  // Commercial API models (256K context)
+  [/^qwen3-max/, LIMITS['256k']],
+  // Open-source Qwen3 variants: 256K native
+  [/^qwen3-coder-/, LIMITS['256k']],
+  // Qwen fallback (VL, turbo, plus, 2.5, etc.): 128K
+  [/^qwen/, LIMITS['256k']],
 
   // -------------------
   // DeepSeek
   // -------------------
-  [/^deepseek(?:-.*)?$/, LIMITS['128k']],
+  [/^deepseek/, LIMITS['128k']],
 
   // -------------------
-  // Moonshot / Kimi
+  // Zhipu GLM
   // -------------------
-  [/^kimi-2\.5.*$/, LIMITS['256k']], // Kimi-2.5: 256K context
-  [/^kimi-k2.*$/, LIMITS['256k']], // Kimi-k2 variants: 256K context
-
-  // -------------------
-  // GPT-OSS / Llama & Mistral examples
-  // -------------------
-  [/^gpt-oss.*$/, LIMITS['128k']],
-  [/^llama-4-scout.*$/, LIMITS['10m']],
-  [/^mistral-large-2.*$/, LIMITS['128k']],
+  [/^glm-5/, 202_752 as TokenCount], // GLM-5: exact vendor limit
+  [/^glm-/, 202_752 as TokenCount], // GLM fallback: 128K
 
   // -------------------
   // MiniMax
   // -------------------
-  [/^minimax-m2\.1.*$/i, LIMITS['200k']], // MiniMax-M2.1: 200K context
+  [/^minimax-m2\.5/i, LIMITS['1m']], // MiniMax-M2.5: 1,000,000
+  [/^minimax-/i, LIMITS['200k']], // MiniMax fallback: 200K
+
+  // -------------------
+  // Moonshot / Kimi
+  // -------------------
+  [/^kimi-/, LIMITS['256k']], // Kimi fallback: 256K
+
+  // -------------------
+  // ByteDance Seed-OSS (512K)
+  // -------------------
+  [/^seed-oss/, LIMITS['512k']],
 ];
 
 /**
@@ -193,32 +147,38 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  * in a single response for specific models.
  */
 const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
-  // -------------------
-  // Alibaba / Qwen - DashScope Models
-  // -------------------
-  // Qwen3-Coder-Plus: 65,536 max output tokens
-  [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
+  // Google Gemini
+  [/^gemini-3/, LIMITS['64k']], // Gemini 3.x: 64K
+  [/^gemini-/, LIMITS['8k']], // Gemini fallback: 8K
 
-  // Qwen3.5-Plus: 65,536 max output tokens
-  [/^qwen3\.5-plus(-.*)?$/, LIMITS['64k']],
+  // OpenAI
+  [/^gpt-5/, LIMITS['128k']], // GPT-5.x: 128K
+  [/^gpt-/, LIMITS['16k']], // GPT fallback: 16K
+  [/^o\d/, LIMITS['128k']], // o-series: 128K
 
-  // Generic coder-model: same as qwen3.5-plus (64K max output tokens)
+  // Anthropic Claude
+  [/^claude-opus-4-6/, LIMITS['128k']], // Opus 4.6: 128K
+  [/^claude-sonnet-4-6/, LIMITS['64k']], // Sonnet 4.6: 64K
+  [/^claude-/, LIMITS['64k']], // Claude fallback: 64K
+
+  // Alibaba / Qwen
+  [/^qwen3\.5/, LIMITS['64k']],
   [/^coder-model$/, LIMITS['64k']],
+  [/^qwen3-max/, LIMITS['64k']],
 
-  // Qwen3-Max: 65,536 max output tokens
-  [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['64k']],
+  // DeepSeek
+  [/^deepseek-reasoner/, LIMITS['64k']],
+  [/^deepseek-chat/, LIMITS['8k']],
 
-  // Qwen-VL-Max-Latest: 8,192 max output tokens
-  [/^qwen-vl-max-latest$/, LIMITS['8k']],
+  // Zhipu GLM
+  [/^glm-5/, LIMITS['16k']],
+  [/^glm-4\.7/, LIMITS['16k']],
 
-  // Qwen3-VL-Plus: 32K max output tokens
-  [/^qwen3-vl-plus$/, LIMITS['32k']],
+  // MiniMax
+  [/^minimax-m2\.5/i, LIMITS['64k']],
 
-  // Deepseek-chat: 8k max tokens
-  [/^deepseek-chat$/, LIMITS['8k']],
-
-  // Deepseek-reasoner: 64k max tokens
-  [/^deepseek-reasoner$/, LIMITS['64k']],
+  // Kimi
+  [/^kimi-k2\.5/, LIMITS['32k']],
 ];
 
 /**
diff --git a/packages/core/src/models/constants.ts b/packages/core/src/models/constants.ts
index 025e3b9cf..c7f4a148b 100644
--- a/packages/core/src/models/constants.ts
+++ b/packages/core/src/models/constants.ts
@@ -22,12 +22,14 @@ export const MODEL_GENERATION_CONFIG_FIELDS = [
   'samplingParams',
   'timeout',
   'maxRetries',
+  'retryErrorCodes',
   'enableCacheControl',
   'schemaCompliance',
   'reasoning',
   'contextWindowSize',
   'customHeaders',
   'extra_body',
+  'modalities',
 ] as const satisfies ReadonlyArray<keyof ContentGeneratorConfig>;
 
 /**
diff --git a/packages/core/src/models/modelRegistry.ts b/packages/core/src/models/modelRegistry.ts
index 7b9bdad77..c2815fb32 100644
--- a/packages/core/src/models/modelRegistry.ts
+++ b/packages/core/src/models/modelRegistry.ts
@@ -5,6 +5,8 @@
  */
 
 import { AuthType } from '../core/contentGenerator.js';
+import { defaultModalities } from '../core/modalityDefaults.js';
+import { tokenLimit } from '../core/tokenLimits.js';
 import { DEFAULT_OPENAI_BASE_URL } from '../core/openaiContentGenerator/constants.js';
 import {
   type ModelConfig,
@@ -121,7 +123,12 @@ export class ModelRegistry {
       capabilities: model.capabilities,
       authType: model.authType,
       isVision: model.capabilities?.vision ?? false,
-      contextWindowSize: model.generationConfig.contextWindowSize,
+      contextWindowSize:
+        model.generationConfig.contextWindowSize ?? tokenLimit(model.id),
+      modalities:
+        model.generationConfig.modalities ?? defaultModalities(model.id),
+      baseUrl: model.baseUrl,
+      envKey: model.envKey,
     }));
   }
 
diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
index a77d1d06b..d22cc790c 100644
--- a/packages/core/src/models/modelsConfig.ts
+++ b/packages/core/src/models/modelsConfig.ts
@@ -11,6 +11,7 @@ import type { ContentGeneratorConfig } from '../core/contentGenerator.js';
 import type { ContentGeneratorConfigSources } from '../core/contentGenerator.js';
 import { DEFAULT_QWEN_MODEL } from '../config/models.js';
 import { tokenLimit } from '../core/tokenLimits.js';
+import { defaultModalities } from '../core/modalityDefaults.js';
 
 import { ModelRegistry } from './modelRegistry.js';
 import {
@@ -770,6 +771,15 @@ export class ModelsConfig {
         detail: 'auto-detected from model',
       };
     }
+
+    // modalities fallback: auto-detect from model when not set by provider
+    if (gc.modalities === undefined) {
+      this._generationConfig.modalities = defaultModalities(model.id);
+      this.generationConfigSources['modalities'] = {
+        kind: 'computed',
+        detail: 'auto-detected from model',
+      };
+    }
   }
 
   /**
diff --git a/packages/core/src/models/types.ts b/packages/core/src/models/types.ts
index 69c286729..64f5ef43e 100644
--- a/packages/core/src/models/types.ts
+++ b/packages/core/src/models/types.ts
@@ -7,6 +7,7 @@
 import type {
   AuthType,
   ContentGeneratorConfig,
+  InputModalities,
 } from '../core/contentGenerator.js';
 import type { ConfigSources } from '../utils/configResolver.js';
 
@@ -29,12 +30,14 @@ export type ModelGenerationConfig = Pick<
   | 'samplingParams'
   | 'timeout'
   | 'maxRetries'
+  | 'retryErrorCodes'
   | 'enableCacheControl'
   | 'schemaCompliance'
   | 'reasoning'
   | 'customHeaders'
   | 'extra_body'
   | 'contextWindowSize'
+  | 'modalities'
 >;
 
 /**
@@ -93,6 +96,9 @@ export interface AvailableModel {
   authType: AuthType;
   isVision?: boolean;
   contextWindowSize?: number;
+  modalities?: InputModalities;
+  baseUrl?: string;
+  envKey?: string;
 
   /** Whether this is a runtime model (not from modelProviders) */
   isRuntimeModel?: boolean;
diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts
index ce6e64ae4..0286d11c8 100644
--- a/packages/core/src/subagents/subagent.test.ts
+++ b/packages/core/src/subagents/subagent.test.ts
@@ -458,6 +458,103 @@ describe('subagent.ts', () => {
         ]);
       });
 
+      it('should append userMemory to the system prompt when available', async () => {
+        const { config } = await createMockConfig();
+        const userMemoryContent =
+          '# Output language preference: English\nRespond in English.';
+        vi.spyOn(config, 'getUserMemory').mockReturnValue(userMemoryContent);
+
+        vi.mocked(GeminiChat).mockClear();
+
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'You are a test agent.',
+        };
+        const context = new ContextState();
+
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(context);
+
+        const generationConfig = getGenerationConfigFromMock();
+        expect(generationConfig.systemInstruction).toContain(
+          'You are a test agent.',
+        );
+        expect(generationConfig.systemInstruction).toContain(
+          'Important Rules:',
+        );
+        expect(generationConfig.systemInstruction).toContain(
+          '# Output language preference: English',
+        );
+        expect(generationConfig.systemInstruction).toContain(
+          'Respond in English.',
+        );
+      });
+
+      it('should not append userMemory separator when userMemory is empty', async () => {
+        const { config } = await createMockConfig();
+        vi.spyOn(config, 'getUserMemory').mockReturnValue('');
+
+        vi.mocked(GeminiChat).mockClear();
+
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'You are a test agent.',
+        };
+        const context = new ContextState();
+
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(context);
+
+        const generationConfig = getGenerationConfigFromMock();
+        const sysPrompt = generationConfig.systemInstruction as string;
+        expect(sysPrompt).toContain('You are a test agent.');
+        expect(sysPrompt).not.toContain('---');
+      });
+
+      it('should not append userMemory separator when userMemory is whitespace-only', async () => {
+        const { config } = await createMockConfig();
+        vi.spyOn(config, 'getUserMemory').mockReturnValue('   \n\n  ');
+
+        vi.mocked(GeminiChat).mockClear();
+
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'You are a test agent.',
+        };
+        const context = new ContextState();
+
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(context);
+
+        const generationConfig = getGenerationConfigFromMock();
+        const sysPrompt = generationConfig.systemInstruction as string;
+        expect(sysPrompt).not.toContain('---');
+      });
+
       it('should use initialMessages instead of systemPrompt if provided', async () => {
         const { config } = await createMockConfig();
         vi.mocked(GeminiChat).mockClear();
diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts
index c9328e5ad..613bc8044 100644
--- a/packages/core/src/subagents/subagent.ts
+++ b/packages/core/src/subagents/subagent.ts
@@ -999,6 +999,12 @@ Important Rules:
  - Use tools only when necessary to obtain facts or make changes.
  - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`;
 
+    // Append user memory (QWEN.md + output-language.md) to ensure subagent respects project conventions
+    const userMemory = this.runtimeContext.getUserMemory();
+    if (userMemory && userMemory.trim().length > 0) {
+      finalPrompt += `\n\n---\n\n${userMemory.trim()}`;
+    }
+
     return finalPrompt;
   }
 }
diff --git a/packages/core/src/tools/memoryTool.ts b/packages/core/src/tools/memoryTool.ts
index fff2d2be1..95c89b18b 100644
--- a/packages/core/src/tools/memoryTool.ts
+++ b/packages/core/src/tools/memoryTool.ts
@@ -76,11 +76,16 @@ Do NOT use this tool:
 
 export const QWEN_CONFIG_DIR = '.qwen';
 export const DEFAULT_CONTEXT_FILENAME = 'QWEN.md';
+export const AGENT_CONTEXT_FILENAME = 'AGENTS.md';
 export const MEMORY_SECTION_HEADER = '## Qwen Added Memories';
 
-// This variable will hold the currently configured filename for QWEN.md context files.
-// It defaults to DEFAULT_CONTEXT_FILENAME but can be overridden by setGeminiMdFilename.
-let currentGeminiMdFilename: string | string[] = DEFAULT_CONTEXT_FILENAME;
+// This variable will hold the currently configured filename for context files.
+// It defaults to include both QWEN.md and AGENTS.md but can be overridden by setGeminiMdFilename.
+// QWEN.md is first to maintain backward compatibility (used by /init command and save_memory tool).
+let currentGeminiMdFilename: string | string[] = [
+  DEFAULT_CONTEXT_FILENAME,
+  AGENT_CONTEXT_FILENAME,
+];
 
 export function setGeminiMdFilename(newFilename: string | string[]): void {
   if (Array.isArray(newFilename)) {
diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts
index 4972f26e7..ec07a6995 100644
--- a/packages/core/src/tools/read-file.test.ts
+++ b/packages/core/src/tools/read-file.test.ts
@@ -231,8 +231,8 @@ describe('ReadFileTool', () => {
 
     it('should return error for a file that is too large', async () => {
       const filePath = path.join(tempRootDir, 'largefile.txt');
-      // 21MB of content exceeds 20MB limit
-      const largeContent = 'x'.repeat(21 * 1024 * 1024);
+      // 11MB of content exceeds 10MB limit
+      const largeContent = 'x'.repeat(11 * 1024 * 1024);
       await fsp.writeFile(filePath, largeContent, 'utf-8');
       const params: ReadFileToolParams = { absolute_path: filePath };
       const invocation = tool.build(params) as ToolInvocation<
@@ -244,7 +244,7 @@ describe('ReadFileTool', () => {
       expect(result).toHaveProperty('error');
       expect(result.error?.type).toBe(ToolErrorType.FILE_TOO_LARGE);
       expect(result.error?.message).toContain(
-        'File size exceeds the 20MB limit',
+        'File size exceeds the 10MB limit',
       );
     });
 
diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts
index da9f257fd..b21ee79e2 100644
--- a/packages/core/src/utils/fileUtils.test.ts
+++ b/packages/core/src/utils/fileUtils.test.ts
@@ -948,13 +948,13 @@ describe('fileUtils', () => {
       );
     });
 
-    it('should return an error if the file size exceeds 20MB', async () => {
+    it('should return an error if the file size exceeds 10MB', async () => {
       // Create a small test file
       actualNodeFs.writeFileSync(testTextFilePath, 'test content');
 
       // Spy on fs.promises.stat to return a large file size
       const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
-        size: 21 * 1024 * 1024,
+        size: 11 * 1024 * 1024,
         isDirectory: () => false,
       } as fs.Stats);
 
@@ -964,11 +964,11 @@ describe('fileUtils', () => {
           mockConfig,
         );
 
-        expect(result.error).toContain('File size exceeds the 20MB limit');
+        expect(result.error).toContain('File size exceeds the 10MB limit');
         expect(result.returnDisplay).toContain(
-          'File size exceeds the 20MB limit',
+          'File size exceeds the 10MB limit',
         );
-        expect(result.llmContent).toContain('File size exceeds the 20MB limit');
+        expect(result.llmContent).toContain('File size exceeds the 10MB limit');
       } finally {
         statSpy.mockRestore();
       }
diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts
index 3e4124d18..aab6935cb 100644
--- a/packages/core/src/utils/fileUtils.ts
+++ b/packages/core/src/utils/fileUtils.ts
@@ -340,11 +340,12 @@ export async function processSingleFileContent(
     }
 
     const fileSizeInMB = stats.size / (1024 * 1024);
-    if (fileSizeInMB > 20) {
+    // Use 9.9MB instead of 10MB to leave margin for encoding overhead (#1880)
+    if (fileSizeInMB > 9.9) {
       return {
-        llmContent: 'File size exceeds the 20MB limit.',
-        returnDisplay: 'File size exceeds the 20MB limit.',
-        error: `File size exceeds the 20MB limit: ${filePath} (${fileSizeInMB.toFixed(2)}MB)`,
+        llmContent: 'File size exceeds the 10MB limit.',
+        returnDisplay: 'File size exceeds the 10MB limit.',
+        error: `File size exceeds the 10MB limit: ${filePath} (${fileSizeInMB.toFixed(2)}MB)`,
         errorType: ToolErrorType.FILE_TOO_LARGE,
       };
     }
@@ -465,6 +466,16 @@ export async function processSingleFileContent(
       case 'pdf': {
         const contentBuffer = await fs.promises.readFile(filePath);
         const base64Data = contentBuffer.toString('base64');
+        const base64SizeInMB = base64Data.length / (1024 * 1024);
+        // Use 9.9MB instead of 10MB to leave margin for small overhead (#1880)
+        if (base64SizeInMB > 9.9) {
+          return {
+            llmContent: `File exceeds the 10MB data URI limit after base64 encoding (${base64SizeInMB.toFixed(2)}MB encoded).`,
+            returnDisplay: `File exceeds the 10MB data URI limit after base64 encoding.`,
+            error: `File exceeds the 10MB data URI limit after base64 encoding: ${filePath} (${base64SizeInMB.toFixed(2)}MB encoded)`,
+            errorType: ToolErrorType.FILE_TOO_LARGE,
+          };
+        }
         return {
           llmContent: {
             inlineData: {
diff --git a/packages/core/src/utils/pathReader.test.ts b/packages/core/src/utils/pathReader.test.ts
index 5de10765b..282a7d6d1 100644
--- a/packages/core/src/utils/pathReader.test.ts
+++ b/packages/core/src/utils/pathReader.test.ts
@@ -392,8 +392,8 @@ describe('readPathFromWorkspace', () => {
   );
 
   it('should return an error string for files exceeding the size limit', async () => {
-    // Mock a file slightly larger than the 20MB limit defined in fileUtils.ts
-    const largeContent = 'a'.repeat(21 * 1024 * 1024); // 21MB
+    // Mock a file slightly larger than the 10MB limit defined in fileUtils.ts
+    const largeContent = 'a'.repeat(11 * 1024 * 1024); // 11MB
     mock({
       [CWD]: {
         'large.txt': largeContent,
@@ -406,6 +406,6 @@ describe('readPathFromWorkspace', () => {
     const result = await readPathFromWorkspace('large.txt', config);
     const textResult = result[0] as string;
     // The error message comes directly from processSingleFileContent
-    expect(textResult).toBe('File size exceeds the 20MB limit.');
+    expect(textResult).toBe('File size exceeds the 10MB limit.');
   });
 });
diff --git a/packages/core/src/utils/rateLimit.test.ts b/packages/core/src/utils/rateLimit.test.ts
index 48605db20..a342a4a0b 100644
--- a/packages/core/src/utils/rateLimit.test.ts
+++ b/packages/core/src/utils/rateLimit.test.ts
@@ -33,6 +33,13 @@ describe('isRateLimitError — detection paths', () => {
     expect(info).toBe(true);
   });
 
+  it('should detect 1305 code from ApiError (issue #1918)', () => {
+    const info = isRateLimitError({
+      error: { code: 1305, message: 'IdealTalk rate limit' },
+    });
+    expect(info).toBe(true);
+  });
+
   it('should detect rate-limit from StructuredError.status', () => {
     const error: StructuredError = { message: 'Rate limited', status: 429 };
     const info = isRateLimitError(error);
@@ -52,6 +59,21 @@ describe('isRateLimitError — detection paths', () => {
     ).toBe(false);
   });
 
+  it('should detect custom error code passed via extraCodes', () => {
+    expect(
+      isRateLimitError(
+        { error: { code: 9999, message: 'Custom rate limit' } },
+        [9999],
+      ),
+    ).toBe(true);
+  });
+
+  it('should not detect custom code when extraCodes is not provided', () => {
+    expect(
+      isRateLimitError({ error: { code: 9999, message: 'Custom rate limit' } }),
+    ).toBe(false);
+  });
+
   it('should return null for invalid inputs', () => {
     expect(isRateLimitError(null)).toBe(false);
     expect(isRateLimitError(undefined)).toBe(false);
diff --git a/packages/core/src/utils/rateLimit.ts b/packages/core/src/utils/rateLimit.ts
index 559cb26fb..19466e90f 100644
--- a/packages/core/src/utils/rateLimit.ts
+++ b/packages/core/src/utils/rateLimit.ts
@@ -10,7 +10,8 @@ import { isApiError, isStructuredError } from './quotaErrorDetection.js';
 // 429  - Standard HTTP "Too Many Requests" (DashScope TPM, OpenAI, etc.)
 // 503  - Provider throttling/overload (treated as rate-limit for retry UI)
 // 1302 - Z.AI GLM rate limit (https://docs.z.ai/api-reference/api-code)
-const RATE_LIMIT_ERROR_CODES = new Set([429, 503, 1302]);
+// 1305 - DashScope/IdealTalk internal rate limit (issue #1918)
+const RATE_LIMIT_ERROR_CODES = new Set([429, 503, 1302, 1305]);
 
 export interface RetryInfo {
   /** Formatted error message for display, produced by parseAndFormatApiError. */
@@ -25,10 +26,20 @@ export interface RetryInfo {
 
 /**
  * Detects rate-limit / throttling errors and returns retry info.
+ *
+ * @param error - The error to check.
+ * @param extraCodes - Additional error codes to treat as rate-limit errors,
+ *   merged with the built-in set at call time (not mutating the default set).
  */
-export function isRateLimitError(error: unknown): boolean {
+export function isRateLimitError(
+  error: unknown,
+  extraCodes?: readonly number[],
+): boolean {
   const code = getErrorCode(error);
-  return code !== null && RATE_LIMIT_ERROR_CODES.has(code);
+  if (code === null) return false;
+  if (RATE_LIMIT_ERROR_CODES.has(code)) return true;
+  if (extraCodes && extraCodes.includes(code)) return true;
+  return false;
 }
 
 /**