diff --git a/docs/developers/sdk-typescript.md b/docs/developers/sdk-typescript.md
index 46625e840..4c705f068 100644
--- a/docs/developers/sdk-typescript.md
+++ b/docs/developers/sdk-typescript.md
@@ -63,6 +63,7 @@ Creates a new query session with the Qwen Code.
 | `permissionMode`         | `'default' \| 'plan' \| 'auto-edit' \| 'yolo'` | `'default'`      | Permission mode controlling tool execution approval. See [Permission Modes](#permission-modes) for details.                                                                                                                                                                                                                                                                                                                                                                           |
 | `canUseTool`             | `CanUseTool`                                   | -                | Custom permission handler for tool execution approval. Invoked when a tool requires confirmation. Must respond within 60 seconds or the request will be auto-denied. See [Custom Permission Handler](#custom-permission-handler).                                                                                                                                                                                                                                                     |
 | `env`                    | `Record<string, string>`                       | -                | Environment variables to pass to the Qwen Code process. Merged with the current process environment.                                                                                                                                                                                                                                                                                                                                                                                  |
+| `systemPrompt`           | `string \| QuerySystemPromptPreset`            | -                | System prompt configuration for the main session. Use a string to fully override the built-in Qwen Code system prompt, or a preset object to keep the built-in prompt and append extra instructions.                                                                                                                                                                                                                                                                                  |
 | `mcpServers`             | `Record<string, McpServerConfig>`              | -                | MCP (Model Context Protocol) servers to connect. Supports external servers (stdio/SSE/HTTP) and SDK-embedded servers. External servers are configured with transport options like `command`, `args`, `url`, `httpUrl`, etc. SDK servers use `{ type: 'sdk', name: string, instance: Server }`.                                                                                                                                                                                        |
 | `abortController`        | `AbortController`                              | -                | Controller to cancel the query session. Call `abortController.abort()` to terminate the session and cleanup resources.                                                                                                                                                                                                                                                                                                                                                                |
 | `debug`                  | `boolean`                                      | `false`          | Enable debug mode for verbose logging from the CLI process.                                                                                                                                                                                                                                                                                                                                                                                                                           |
@@ -248,6 +249,36 @@ const result = query({
 });
 ```
 
+### Override the System Prompt
+
+```typescript
+import { query } from '@qwen-code/sdk';
+
+const result = query({
+  prompt: 'Say hello in one sentence.',
+  options: {
+    systemPrompt: 'You are a terse assistant. Answer in exactly one sentence.',
+  },
+});
+```
+
+### Append to the Built-in System Prompt
+
+```typescript
+import { query } from '@qwen-code/sdk';
+
+const result = query({
+  prompt: 'Review the current directory.',
+  options: {
+    systemPrompt: {
+      type: 'preset',
+      preset: 'qwen_code',
+      append: 'Be terse and focus on concrete findings.',
+    },
+  },
+});
+```
+
 ### With SDK-Embedded MCP Servers
 
 The SDK provides `tool` and `createSdkMcpServer` to create MCP servers that run in the same process as your SDK application. This is useful when you want to expose custom tools to the AI without running a separate server process.
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
index bc56a437e..308d09e63 100644
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@@ -412,6 +412,8 @@ Arguments passed directly when running the CLI can override other configurations
 | `--model`                    | `-m`  | Specifies the Qwen model to use for this session.                                                                                                                                       | Model name                             | Example: `npm start -- --model qwen3-coder-plus`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | `--prompt`                   | `-p`  | Used to pass a prompt directly to the command. This invokes Qwen Code in a non-interactive mode.                                                                                        | Your prompt text                       | For scripting examples, use the `--output-format json` flag to get structured output.                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | `--prompt-interactive`       | `-i`  | Starts an interactive session with the provided prompt as the initial input.                                                                                                            | Your prompt text                       | The prompt is processed within the interactive session, not before it. Cannot be used when piping input from stdin. Example: `qwen -i "explain this code"`                                                                                                                                                                                                                                                                                                                                                                                                      |
+| `--system-prompt`            |       | Overrides the built-in main session system prompt for this run.                                                                                                                         | Your prompt text                       | Loaded context files such as `QWEN.md` are still appended after this override. Can be combined with `--append-system-prompt`.                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| `--append-system-prompt`     |       | Appends extra instructions to the main session system prompt for this run.                                                                                                              | Your prompt text                       | Applied after the built-in prompt and loaded context files. Can be combined with `--system-prompt`. See [Headless Mode](../features/headless) for examples.                                                                                                                                                                                                                                                                                                                                                                                                     |
 | `--output-format`            | `-o`  | Specifies the format of the CLI output for non-interactive mode.                                                                                                                        | `text`, `json`, `stream-json`          | `text`: (Default) The standard human-readable output. `json`: A machine-readable JSON output emitted at the end of execution. `stream-json`: Streaming JSON messages emitted as they occur during execution. For structured output and scripting, use the `--output-format json` or `--output-format stream-json` flag. See [Headless Mode](../features/headless) for detailed information.                                                                                                                                                                     |
 | `--input-format`             |       | Specifies the format consumed from standard input.                                                                                                                                      | `text`, `stream-json`                  | `text`: (Default) Standard text input from stdin or command-line arguments. `stream-json`: JSON message protocol via stdin for bidirectional communication. Requirement: `--input-format stream-json` requires `--output-format stream-json` to be set. When using `stream-json`, stdin is reserved for protocol messages. See [Headless Mode](../features/headless) for detailed information.                                                                                                                                                                  |
 | `--include-partial-messages` |       | Include partial assistant messages when using `stream-json` output format. When enabled, emits stream events (message_start, content_block_delta, etc.) as they occur during streaming. |                                        | Default: `false`. Requirement: Requires `--output-format stream-json` to be set. See [Headless Mode](../features/headless) for detailed information about stream events.                                                                                                                                                                                                                                                                                                                                                                                        |
diff --git a/docs/users/features/headless.md b/docs/users/features/headless.md
index 203e08a2d..12172f121 100644
--- a/docs/users/features/headless.md
+++ b/docs/users/features/headless.md
@@ -58,6 +58,40 @@ qwen --resume 123e4567-e89b-12d3-a456-426614174000 -p "Apply the follow-up refac
 > - Session data is project-scoped JSONL under `~/.qwen/projects/<sanitized-cwd>/chats`.
 > - Restores conversation history, tool outputs, and chat-compression checkpoints before sending the new prompt.
 
+## Customize the Main Session Prompt
+
+You can change the main session system prompt for a single CLI run without editing shared memory files.
+
+### Override the Built-in System Prompt
+
+Use `--system-prompt` to replace Qwen Code's built-in main-session prompt for the current run:
+
+```bash
+qwen -p "Review this patch" --system-prompt "You are a terse release reviewer. Report only blocking issues."
+```
+
+### Append Extra Instructions
+
+Use `--append-system-prompt` to keep the built-in prompt and add extra instructions for this run:
+
+```bash
+qwen -p "Review this patch" --append-system-prompt "Be terse and focus on concrete findings."
+```
+
+You can combine both flags when you want a custom base prompt plus an extra run-specific instruction:
+
+```bash
+qwen -p "Summarize this repository" \
+  --system-prompt "You are a migration planner." \
+  --append-system-prompt "Return exactly three bullets."
+```
+
+> [!note]
+>
+> - `--system-prompt` applies only to the current run's main session.
+> - Loaded memory and context files such as `QWEN.md` are still appended after `--system-prompt`.
+> - `--append-system-prompt` is applied after the built-in prompt and loaded memory, and can be used together with `--system-prompt`.
+
 ## Output Formats
 
 Qwen Code supports multiple output formats for different use cases:
@@ -189,19 +223,21 @@ qwen -p "Write code" --output-format stream-json --include-partial-messages | jq
 
 Key command-line options for headless usage:
 
-| Option                       | Description                                         | Example                                                                  |
-| ---------------------------- | --------------------------------------------------- | ------------------------------------------------------------------------ |
-| `--prompt`, `-p`             | Run in headless mode                                | `qwen -p "query"`                                                        |
-| `--output-format`, `-o`      | Specify output format (text, json, stream-json)     | `qwen -p "query" --output-format json`                                   |
-| `--input-format`             | Specify input format (text, stream-json)            | `qwen --input-format text --output-format stream-json`                   |
-| `--include-partial-messages` | Include partial messages in stream-json output      | `qwen -p "query" --output-format stream-json --include-partial-messages` |
-| `--debug`, `-d`              | Enable debug mode                                   | `qwen -p "query" --debug`                                                |
-| `--all-files`, `-a`          | Include all files in context                        | `qwen -p "query" --all-files`                                            |
-| `--include-directories`      | Include additional directories                      | `qwen -p "query" --include-directories src,docs`                         |
-| `--yolo`, `-y`               | Auto-approve all actions                            | `qwen -p "query" --yolo`                                                 |
-| `--approval-mode`            | Set approval mode                                   | `qwen -p "query" --approval-mode auto_edit`                              |
-| `--continue`                 | Resume the most recent session for this project     | `qwen --continue -p "Pick up where we left off"`                         |
-| `--resume [sessionId]`       | Resume a specific session (or choose interactively) | `qwen --resume 123e... -p "Finish the refactor"`                         |
+| Option                       | Description                                                              | Example                                                                  |
+| ---------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ |
+| `--prompt`, `-p`             | Run in headless mode                                                     | `qwen -p "query"`                                                        |
+| `--output-format`, `-o`      | Specify output format (text, json, stream-json)                          | `qwen -p "query" --output-format json`                                   |
+| `--input-format`             | Specify input format (text, stream-json)                                 | `qwen --input-format text --output-format stream-json`                   |
+| `--include-partial-messages` | Include partial messages in stream-json output                           | `qwen -p "query" --output-format stream-json --include-partial-messages` |
+| `--system-prompt`            | Override the main session system prompt for this run                     | `qwen -p "query" --system-prompt "You are a terse reviewer."`            |
+| `--append-system-prompt`     | Append extra instructions to the main session system prompt for this run | `qwen -p "query" --append-system-prompt "Focus on concrete findings."`   |
+| `--debug`, `-d`              | Enable debug mode                                                        | `qwen -p "query" --debug`                                                |
+| `--all-files`, `-a`          | Include all files in context                                             | `qwen -p "query" --all-files`                                            |
+| `--include-directories`      | Include additional directories                                           | `qwen -p "query" --include-directories src,docs`                         |
+| `--yolo`, `-y`               | Auto-approve all actions                                                 | `qwen -p "query" --yolo`                                                 |
+| `--approval-mode`            | Set approval mode                                                        | `qwen -p "query" --approval-mode auto_edit`                              |
+| `--continue`                 | Resume the most recent session for this project                          | `qwen --continue -p "Pick up where we left off"`                         |
+| `--resume [sessionId]`       | Resume a specific session (or choose interactively)                      | `qwen --resume 123e... -p "Finish the refactor"`                         |
 
 For complete details on all available configuration options, settings files, and environment variables, see the [Configuration Guide](../configuration/settings).
 
diff --git a/integration-tests/sdk-typescript/message-event-pairing.test.ts b/integration-tests/sdk-typescript/message-event-pairing.test.ts
new file mode 100644
index 000000000..b439ec276
--- /dev/null
+++ b/integration-tests/sdk-typescript/message-event-pairing.test.ts
@@ -0,0 +1,870 @@
+/**
+ * E2E tests for message_start and message_stop event pairing
+ * Ensures that message_start and message_stop events are always paired correctly
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import {
+  query,
+  isSDKPartialAssistantMessage,
+  isSDKAssistantMessage,
+  type SDKPartialAssistantMessage,
+  type TextBlock,
+} from '@qwen-code/sdk';
+import { SDKTestHelper, createSharedTestOptions } from './test-helper.js';
+
+const SHARED_TEST_OPTIONS = createSharedTestOptions();
+
+describe('Message Start/Stop Event Pairing (E2E)', () => {
+  let helper: SDKTestHelper;
+  let testDir: string;
+
+  beforeEach(async () => {
+    helper = new SDKTestHelper();
+    testDir = await helper.setup('message-event-pairing');
+  });
+
+  afterEach(async () => {
+    await helper.cleanup();
+  });
+
+  describe('Basic Message Event Pairing', () => {
+    it('should emit paired message_start and message_stop for single turn', async () => {
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start and message_stop are paired
+      expect(messageStartEvents.length).toBeGreaterThan(0);
+      expect(messageStopEvents.length).toBe(messageStartEvents.length);
+    });
+
+    it('should emit message_start before message_stop', async () => {
+      const events: Array<{ type: string; timestamp: number }> = [];
+
+      const q = query({
+        prompt: 'Say hello world',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (
+              message.event.type === 'message_start' ||
+              message.event.type === 'message_stop'
+            ) {
+              events.push({
+                type: message.event.type,
+                timestamp: Date.now(),
+              });
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start comes before message_stop
+      expect(events.length).toBeGreaterThanOrEqual(2);
+      expect(events[0].type).toBe('message_start');
+      expect(events[events.length - 1].type).toBe('message_stop');
+    });
+
+    it('should have matching session_id for paired events', async () => {
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify session_id matches between paired events
+      expect(messageStartEvents.length).toBeGreaterThan(0);
+      expect(messageStopEvents.length).toBe(messageStartEvents.length);
+      expect(messageStartEvents[0].session_id).toBe(
+        messageStopEvents[0].session_id,
+      );
+    });
+  });
+
+  describe('Multi-turn Message Event Pairing', () => {
+    it('should emit paired events for each turn in multi-turn conversation', async () => {
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+      const assistantMessages: string[] = [];
+
+      const sessionId = crypto.randomUUID();
+
+      const q = query({
+        prompt: (async function* () {
+          // First turn
+          yield {
+            type: 'user',
+            session_id: sessionId,
+            message: {
+              role: 'user',
+              content: 'Say "first"',
+            },
+            parent_tool_use_id: null,
+          };
+
+          // Wait a bit for processing
+          await new Promise((resolve) => setTimeout(resolve, 500));
+
+          // Second turn
+          yield {
+            type: 'user',
+            session_id: sessionId,
+            message: {
+              role: 'user',
+              content: 'Say "second"',
+            },
+            parent_tool_use_id: null,
+          };
+        })(),
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          } else if (isSDKAssistantMessage(message)) {
+            const text = message.message.content
+              .filter((block): block is TextBlock => block.type === 'text')
+              .map((block) => block.text)
+              .join('');
+            assistantMessages.push(text);
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify we have paired events for each assistant message
+      expect(messageStartEvents.length).toBeGreaterThanOrEqual(1);
+      expect(messageStopEvents.length).toBe(messageStartEvents.length);
+    });
+  });
+
+  describe('Message Event Pairing with Tool Calls', () => {
+    it('should emit paired events when tool is used', async () => {
+      await helper.createFile('test.txt', 'Hello World');
+
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Read the content of test.txt',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          coreTools: ['read_file'],
+          permissionMode: 'default',
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start and message_stop are paired even with tool usage
+      expect(messageStartEvents.length).toBeGreaterThan(0);
+      expect(messageStopEvents.length).toBe(messageStartEvents.length);
+    });
+
+    it('should maintain event pairing through multiple tool calls', async () => {
+      await helper.createFile('file1.txt', 'Content 1');
+      await helper.createFile('file2.txt', 'Content 2');
+
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Read file1.txt and file2.txt and summarize their contents',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          coreTools: ['read_file'],
+          permissionMode: 'default',
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify events are paired
+      expect(messageStartEvents.length).toBeGreaterThan(0);
+      expect(messageStopEvents.length).toBe(messageStartEvents.length);
+    });
+  });
+
+  describe('Message Event Structure Validation', () => {
+    it('should have correct message_start event structure', async () => {
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (
+            isSDKPartialAssistantMessage(message) &&
+            message.event.type === 'message_start'
+          ) {
+            messageStartEvents.push(message);
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      expect(messageStartEvents.length).toBeGreaterThan(0);
+      const startEvent = messageStartEvents[0].event;
+      expect(startEvent.type).toBe('message_start');
+      if (startEvent.type === 'message_start') {
+        expect(startEvent.message).toBeDefined();
+        expect(startEvent.message.id).toBeDefined();
+        expect(startEvent.message.role).toBe('assistant');
+        expect(startEvent.message.model).toBeDefined();
+      }
+    });
+
+    it('should have correct message_stop event structure', async () => {
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (
+            isSDKPartialAssistantMessage(message) &&
+            message.event.type === 'message_stop'
+          ) {
+            messageStopEvents.push(message);
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      expect(messageStopEvents.length).toBeGreaterThan(0);
+      const event = messageStopEvents[0].event;
+      expect(event.type).toBe('message_stop');
+    });
+
+    it('should have message_start and message_stop paired by count', async () => {
+      const startEvents: SDKPartialAssistantMessage[] = [];
+      const stopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello world',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              startEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              stopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start and message_stop appear in pairs (same count)
+      expect(startEvents.length).toBeGreaterThan(0);
+      expect(stopEvents.length).toBe(startEvents.length);
+
+      // Verify message_start carries the message id via its nested message.id field
+      for (const e of startEvents) {
+        const event = e.event as {
+          type: 'message_start';
+          message: { id: string };
+        };
+        expect(typeof event.message.id).toBe('string');
+        expect(event.message.id.length).toBeGreaterThan(0);
+      }
+    });
+  });
+
+  describe('Error Scenarios', () => {
+    it('should still emit message_stop even when query errors', async () => {
+      const messageStartEvents: SDKPartialAssistantMessage[] = [];
+      const messageStopEvents: SDKPartialAssistantMessage[] = [];
+
+      // Use an invalid tool to trigger an error scenario
+      const q = query({
+        prompt: 'Use a non-existent tool',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          coreTools: [], // No tools available
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'message_start') {
+              messageStartEvents.push(message);
+            } else if (message.event.type === 'message_stop') {
+              messageStopEvents.push(message);
+            }
+          }
+        }
+      } catch {
+        // Expected to potentially have errors
+      } finally {
+        await q.close();
+      }
+
+      // Even in error scenarios, if message_start was emitted, message_stop should also be emitted
+      if (messageStartEvents.length > 0) {
+        expect(messageStopEvents.length).toBe(messageStartEvents.length);
+      }
+    });
+  });
+
+  describe('Content Block Event Pairing', () => {
+    it('should emit paired content_block_start and content_block_stop for each content block', async () => {
+      const contentBlockStartEvents: SDKPartialAssistantMessage[] = [];
+      const contentBlockStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'content_block_start') {
+              contentBlockStartEvents.push(message);
+            } else if (message.event.type === 'content_block_stop') {
+              contentBlockStopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify content_block_start and content_block_stop are paired
+      expect(contentBlockStartEvents.length).toBeGreaterThan(0);
+      expect(contentBlockStopEvents.length).toBe(
+        contentBlockStartEvents.length,
+      );
+    });
+
+    it('should emit content_block_start before content_block_stop', async () => {
+      const events: Array<{ type: string; index: number; timestamp: number }> =
+        [];
+
+      const q = query({
+        prompt: 'Say hello world',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (
+              message.event.type === 'content_block_start' ||
+              message.event.type === 'content_block_stop'
+            ) {
+              events.push({
+                type: message.event.type,
+                index: message.event.index,
+                timestamp: Date.now(),
+              });
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify events exist
+      expect(events.length).toBeGreaterThanOrEqual(2);
+
+      // Group events by index
+      const eventsByIndex = new Map<number, typeof events>();
+      for (const event of events) {
+        if (!eventsByIndex.has(event.index)) {
+          eventsByIndex.set(event.index, []);
+        }
+        eventsByIndex.get(event.index)!.push(event);
+      }
+
+      // For each index, verify content_block_start comes before content_block_stop
+      eventsByIndex.forEach((indexEvents) => {
+        const startIndex = indexEvents.findIndex(
+          (e) => e.type === 'content_block_start',
+        );
+        const stopIndex = indexEvents.findIndex(
+          (e) => e.type === 'content_block_stop',
+        );
+        expect(startIndex).toBeGreaterThanOrEqual(0);
+        expect(stopIndex).toBeGreaterThanOrEqual(0);
+        expect(startIndex).toBeLessThan(stopIndex);
+      });
+    });
+
+    it('should have correct content_block_start event structure', async () => {
+      const contentBlockStartEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (
+            isSDKPartialAssistantMessage(message) &&
+            message.event.type === 'content_block_start'
+          ) {
+            contentBlockStartEvents.push(message);
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      expect(contentBlockStartEvents.length).toBeGreaterThan(0);
+
+      // Verify each content_block_start has correct structure
+      for (const message of contentBlockStartEvents) {
+        const event = message.event as {
+          type: 'content_block_start';
+          index: number;
+          content_block: unknown;
+        };
+        expect(event.type).toBe('content_block_start');
+        expect(event).toHaveProperty('index');
+        expect(typeof event.index).toBe('number');
+        expect(event.index).toBeGreaterThanOrEqual(0);
+        expect(event).toHaveProperty('content_block');
+        expect(event.content_block).toBeDefined();
+      }
+    });
+
+    it('should have correct content_block_stop event structure', async () => {
+      const contentBlockStopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (
+            isSDKPartialAssistantMessage(message) &&
+            message.event.type === 'content_block_stop'
+          ) {
+            contentBlockStopEvents.push(message);
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      expect(contentBlockStopEvents.length).toBeGreaterThan(0);
+
+      // Verify each content_block_stop has correct structure
+      for (const message of contentBlockStopEvents) {
+        const event = message.event as {
+          type: 'content_block_stop';
+          index: number;
+        };
+        expect(event.type).toBe('content_block_stop');
+        expect(event).toHaveProperty('index');
+        expect(typeof event.index).toBe('number');
+        expect(event.index).toBeGreaterThanOrEqual(0);
+      }
+    });
+
+    it('should have matching index for paired content_block_start and content_block_stop', async () => {
+      const startEvents: SDKPartialAssistantMessage[] = [];
+      const stopEvents: SDKPartialAssistantMessage[] = [];
+
+      const q = query({
+        prompt: 'Say hello world',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            if (message.event.type === 'content_block_start') {
+              startEvents.push(message);
+            } else if (message.event.type === 'content_block_stop') {
+              stopEvents.push(message);
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify events exist and are paired
+      expect(startEvents.length).toBeGreaterThan(0);
+      expect(stopEvents.length).toBe(startEvents.length);
+
+      // Extract indices from start and stop events
+      const startIndices = startEvents.map(
+        (e) => (e.event as { index: number }).index,
+      );
+      const stopIndices = stopEvents.map(
+        (e) => (e.event as { index: number }).index,
+      );
+
+      // Verify each start index has a matching stop index
+      expect(new Set(stopIndices)).toEqual(new Set(startIndices));
+
+      // Verify each index appears the same number of times in both start and stop events
+      const startIndexCounts = new Map<number, number>();
+      const stopIndexCounts = new Map<number, number>();
+
+      for (const idx of startIndices) {
+        startIndexCounts.set(idx, (startIndexCounts.get(idx) || 0) + 1);
+      }
+      for (const idx of stopIndices) {
+        stopIndexCounts.set(idx, (stopIndexCounts.get(idx) || 0) + 1);
+      }
+
+      startIndexCounts.forEach((count, idx) => {
+        expect(stopIndexCounts.get(idx)).toBe(count);
+      });
+    });
+
+    it('should follow correct event flow: content_block_start -> content_block_delta -> content_block_stop', async () => {
+      const events: Array<{
+        type: string;
+        index: number;
+        position: number;
+      }> = [];
+
+      const q = query({
+        prompt: 'Write a short story about a cat',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      let pos = 0;
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            const eventType = message.event.type;
+            if (
+              eventType === 'content_block_start' ||
+              eventType === 'content_block_delta' ||
+              eventType === 'content_block_stop'
+            ) {
+              events.push({
+                type: eventType,
+                index: (message.event as { index: number }).index,
+                position: pos++,
+              });
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      expect(events.length).toBeGreaterThanOrEqual(2);
+
+      // Pair content_block_start/stop sequentially (not by index, since
+      // block-type transitions reset the blocks array and reuse index 0).
+      // Each start is matched with the next stop that follows it.
+      const starts = events.filter((e) => e.type === 'content_block_start');
+      const stops = events.filter((e) => e.type === 'content_block_stop');
+      expect(starts.length).toBe(stops.length);
+
+      for (let i = 0; i < starts.length; i++) {
+        const start = starts[i];
+        const stop = stops[i];
+
+        // start must come before the paired stop
+        expect(start.position).toBeLessThan(stop.position);
+
+        // All deltas between this pair must sit between start and stop
+        const deltas = events.filter(
+          (e) =>
+            e.type === 'content_block_delta' &&
+            e.position > start.position &&
+            e.position < stop.position,
+        );
+        for (const delta of deltas) {
+          expect(delta.position).toBeGreaterThan(start.position);
+          expect(delta.position).toBeLessThan(stop.position);
+        }
+      }
+    });
+
+    it('should have content_block_start after message_start and before message_stop', async () => {
+      const events: Array<{
+        type: string;
+        timestamp: number;
+      }> = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            const eventType = message.event.type;
+            if (
+              eventType === 'message_start' ||
+              eventType === 'message_stop' ||
+              eventType === 'content_block_start'
+            ) {
+              events.push({
+                type: eventType,
+                timestamp: Date.now(),
+              });
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start exists
+      const messageStartIndex = events.findIndex(
+        (e) => e.type === 'message_start',
+      );
+      expect(messageStartIndex).toBeGreaterThanOrEqual(0);
+
+      // Verify message_stop exists
+      const messageStopIndex = events.findIndex(
+        (e) => e.type === 'message_stop',
+      );
+      expect(messageStopIndex).toBeGreaterThanOrEqual(0);
+
+      // Verify content_block_start exists
+      const firstContentBlockStartIndex = events.findIndex(
+        (e) => e.type === 'content_block_start',
+      );
+      expect(firstContentBlockStartIndex).toBeGreaterThanOrEqual(0);
+
+      // content_block_start should be after message_start
+      expect(firstContentBlockStartIndex).toBeGreaterThan(messageStartIndex);
+
+      // content_block_start should be before message_stop
+      expect(firstContentBlockStartIndex).toBeLessThan(messageStopIndex);
+    });
+
+    it('should have content_block_stop after message_start and before message_stop', async () => {
+      const events: Array<{
+        type: string;
+        timestamp: number;
+      }> = [];
+
+      const q = query({
+        prompt: 'Say hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          includePartialMessages: true,
+          cwd: testDir,
+          debug: false,
+        },
+      });
+
+      try {
+        for await (const message of q) {
+          if (isSDKPartialAssistantMessage(message)) {
+            const eventType = message.event.type;
+            if (
+              eventType === 'message_start' ||
+              eventType === 'message_stop' ||
+              eventType === 'content_block_stop'
+            ) {
+              events.push({
+                type: eventType,
+                timestamp: Date.now(),
+              });
+            }
+          }
+        }
+      } finally {
+        await q.close();
+      }
+
+      // Verify message_start exists
+      const messageStartIndex = events.findIndex(
+        (e) => e.type === 'message_start',
+      );
+      expect(messageStartIndex).toBeGreaterThanOrEqual(0);
+
+      // Verify message_stop exists
+      const messageStopIndex = events.findIndex(
+        (e) => e.type === 'message_stop',
+      );
+      expect(messageStopIndex).toBeGreaterThanOrEqual(0);
+
+      // Verify content_block_stop exists (use reverse find for ES compatibility)
+      const lastContentBlockStopIndex =
+        events
+          .map((e, i) => ({ ...e, originalIndex: i }))
+          .reverse()
+          .find((e) => e.type === 'content_block_stop')?.originalIndex ?? -1;
+      expect(lastContentBlockStopIndex).toBeGreaterThanOrEqual(0);
+
+      // content_block_stop should be after message_start
+      expect(lastContentBlockStopIndex).toBeGreaterThan(messageStartIndex);
+
+      // content_block_stop should be before message_stop
+      expect(lastContentBlockStopIndex).toBeLessThan(messageStopIndex);
+    });
+  });
+});
diff --git a/integration-tests/terminal-capture/scenarios/pr-2371-review.ts b/integration-tests/terminal-capture/scenarios/pr-2371-review.ts
new file mode 100644
index 000000000..0752f0a20
--- /dev/null
+++ b/integration-tests/terminal-capture/scenarios/pr-2371-review.ts
@@ -0,0 +1,18 @@
+import type { ScenarioConfig } from '../scenario-runner.js';
+
+export default {
+  name: 'pr-2371-review',
+  spawn: ['node', 'dist/cli.js', '--yolo'],
+  terminal: { title: 'qwen-code', cwd: '../../..' },
+  flow: [
+    {
+      type: '/review https://github.com/QwenLM/qwen-code/pull/2371',
+      streaming: {
+        delayMs: 5000,
+        intervalMs: 10000, // Every 10s
+        count: 60, // 10 minutes total (60 * 10s)
+        gif: true,
+      },
+    },
+  ],
+} satisfies ScenarioConfig;
diff --git a/package-lock.json b/package-lock.json
index 5f194731a..fd6cc6624 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "@qwen-code/qwen-code",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@qwen-code/qwen-code",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "workspaces": [
         "packages/*"
       ],
@@ -18784,7 +18784,7 @@
     },
     "packages/cli": {
       "name": "@qwen-code/qwen-code",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "dependencies": {
         "@agentclientprotocol/sdk": "^0.14.1",
         "@google/genai": "1.30.0",
@@ -19441,7 +19441,7 @@
     },
     "packages/core": {
       "name": "@qwen-code/qwen-code-core",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "hasInstallScript": true,
       "dependencies": {
         "@anthropic-ai/sdk": "^0.36.1",
@@ -22872,7 +22872,7 @@
     },
     "packages/test-utils": {
       "name": "@qwen-code/qwen-code-test-utils",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "dev": true,
       "license": "Apache-2.0",
       "devDependencies": {
@@ -22884,7 +22884,7 @@
     },
     "packages/vscode-ide-companion": {
       "name": "qwen-code-vscode-ide-companion",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "license": "LICENSE",
       "dependencies": {
         "@agentclientprotocol/sdk": "^0.14.1",
@@ -23132,7 +23132,7 @@
     },
     "packages/web-templates": {
       "name": "@qwen-code/web-templates",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "devDependencies": {
         "@types/react": "^18.2.0",
         "@types/react-dom": "^18.2.0",
@@ -23660,7 +23660,7 @@
     },
     "packages/webui": {
       "name": "@qwen-code/webui",
-      "version": "0.12.5",
+      "version": "0.13.0",
       "license": "MIT",
       "dependencies": {
         "markdown-it": "^14.1.0"
diff --git a/package.json b/package.json
index 24b219589..c1dfa2448 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/qwen-code",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "engines": {
     "node": ">=20.0.0"
   },
@@ -13,7 +13,7 @@
     "url": "git+https://github.com/QwenLM/qwen-code.git"
   },
   "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.5"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.13.0"
   },
   "scripts": {
     "start": "cross-env node scripts/start.js",
@@ -36,8 +36,8 @@
     "test:integration:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests",
     "test:integration:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests",
     "test:integration:sandbox:podman": "cross-env QWEN_SANDBOX=podman vitest run --root ./integration-tests",
-    "test:integration:sdk:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests sdk-typescript",
-    "test:integration:sdk:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests sdk-typescript",
+    "test:integration:sdk:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests --poolOptions.threads.maxThreads 2 sdk-typescript",
+    "test:integration:sdk:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests --poolOptions.threads.maxThreads 2 sdk-typescript",
     "test:integration:cli:sandbox:none": "cross-env QWEN_SANDBOX=false vitest run --root ./integration-tests --exclude '**/sdk-typescript/**'",
     "test:integration:cli:sandbox:docker": "cross-env QWEN_SANDBOX=docker npm run build:sandbox && QWEN_SANDBOX=docker vitest run --root ./integration-tests --exclude '**/sdk-typescript/**'",
     "test:terminal-bench": "cross-env VERBOSE=true KEEP_OUTPUT=true vitest run --config ./vitest.terminal-bench.config.ts --root ./integration-tests",
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 97fab4c5c..fff36c603 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/qwen-code",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "description": "Qwen Code",
   "repository": {
     "type": "git",
@@ -33,7 +33,7 @@
     "dist"
   ],
   "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.5"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.13.0"
   },
   "dependencies": {
     "@agentclientprotocol/sdk": "^0.14.1",
diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index 644fc050c..3e304050a 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -241,6 +241,30 @@ describe('parseArguments', () => {
     expect(argv.prompt).toBeUndefined();
   });
 
+  it('should parse --system-prompt', async () => {
+    process.argv = [
+      'node',
+      'script.js',
+      '--system-prompt',
+      'You are a test system prompt.',
+    ];
+    const argv = await parseArguments();
+    expect(argv.systemPrompt).toBe('You are a test system prompt.');
+    expect(argv.appendSystemPrompt).toBeUndefined();
+  });
+
+  it('should parse --append-system-prompt', async () => {
+    process.argv = [
+      'node',
+      'script.js',
+      '--append-system-prompt',
+      'Be extra concise.',
+    ];
+    const argv = await parseArguments();
+    expect(argv.appendSystemPrompt).toBe('Be extra concise.');
+    expect(argv.systemPrompt).toBeUndefined();
+  });
+
   it('should allow -r flag as alias for --resume', async () => {
     process.argv = [
       'node',
@@ -432,6 +456,21 @@ describe('parseArguments', () => {
     mockExit.mockRestore();
   });
 
+  it('should allow --system-prompt and --append-system-prompt together', async () => {
+    process.argv = [
+      'node',
+      'script.js',
+      '--system-prompt',
+      'Override prompt',
+      '--append-system-prompt',
+      'Append prompt',
+    ];
+
+    const argv = await parseArguments();
+    expect(argv.systemPrompt).toBe('Override prompt');
+    expect(argv.appendSystemPrompt).toBe('Append prompt');
+  });
+
   it('should throw an error when include-partial-messages is used without stream-json output', async () => {
     process.argv = ['node', 'script.js', '--include-partial-messages'];
 
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 84e94c581..34d7c3fe6 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -110,6 +110,8 @@ export interface CliArgs {
   debug: boolean | undefined;
   prompt: string | undefined;
   promptInteractive: string | undefined;
+  systemPrompt: string | undefined;
+  appendSystemPrompt: string | undefined;
   yolo: boolean | undefined;
   approvalMode: string | undefined;
   telemetry: boolean | undefined;
@@ -289,6 +291,16 @@ export async function parseArguments(): Promise<CliArgs> {
           description:
             'Execute the provided prompt and continue in interactive mode',
         })
+        .option('system-prompt', {
+          type: 'string',
+          description:
+            'Override the main session system prompt for this run. Can be combined with --append-system-prompt.',
+        })
+        .option('append-system-prompt', {
+          type: 'string',
+          description:
+            'Append instructions to the main session system prompt for this run. Can be combined with --system-prompt.',
+        })
         .option('sandbox', {
           alias: 's',
           type: 'boolean',
@@ -964,6 +976,8 @@ export async function loadCliConfig(
     importFormat: settings.context?.importFormat || 'tree',
     debugMode,
     question,
+    systemPrompt: argv.systemPrompt,
+    appendSystemPrompt: argv.appendSystemPrompt,
     coreTools: argv.coreTools || settings.tools?.core || undefined,
     allowedTools: argv.allowedTools || settings.tools?.allowed || undefined,
     excludeTools,
diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx
index 9b47de5b5..b9ddb97fa 100644
--- a/packages/cli/src/gemini.test.tsx
+++ b/packages/cli/src/gemini.test.tsx
@@ -467,6 +467,8 @@ describe('gemini.tsx main function kitty protocol', () => {
       debug: undefined,
       prompt: undefined,
       promptInteractive: undefined,
+      systemPrompt: undefined,
+      appendSystemPrompt: undefined,
       query: undefined,
       yolo: undefined,
       approvalMode: undefined,
diff --git a/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts
index b0d6736a5..dc62f9ae2 100644
--- a/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts
+++ b/packages/cli/src/nonInteractive/io/BaseJsonOutputAdapter.ts
@@ -282,12 +282,12 @@ export abstract class BaseJsonOutputAdapter {
       return;
     }
 
-    if (lastBlock.type === 'text') {
-      const index = state.blocks.length - 1;
-      this.onBlockClosed(state, index, actualParentToolUseId);
-      this.closeBlock(state, index);
-    } else if (lastBlock.type === 'thinking') {
-      const index = state.blocks.length - 1;
+    const index = state.blocks.length - 1;
+    if (!state.openBlocks.has(index)) {
+      return;
+    }
+
+    if (lastBlock.type === 'text' || lastBlock.type === 'thinking') {
       this.onBlockClosed(state, index, actualParentToolUseId);
       this.closeBlock(state, index);
     }
@@ -392,7 +392,9 @@ export abstract class BaseJsonOutputAdapter {
     }
 
     const message = this.buildMessage(parentToolUseId);
-    this.emitMessageImpl(message);
+    if (state.messageStarted) {
+      this.emitMessageImpl(message);
+    }
     return message;
   }
 
@@ -656,12 +658,7 @@ export abstract class BaseJsonOutputAdapter {
     parentToolUseId: string,
   ): CLIAssistantMessage {
     const state = this.getMessageState(parentToolUseId);
-    const message = this.finalizeAssistantMessageInternal(
-      state,
-      parentToolUseId,
-    );
-    this.updateLastAssistantMessage(message);
-    return message;
+    return this.finalizeAssistantMessageInternal(state, parentToolUseId);
   }
 
   /**
diff --git a/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts
index a76de53a8..68633675b 100644
--- a/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts
+++ b/packages/cli/src/nonInteractive/io/JsonOutputAdapter.ts
@@ -52,12 +52,10 @@ export class JsonOutputAdapter
   }
 
   finalizeAssistantMessage(): CLIAssistantMessage {
-    const message = this.finalizeAssistantMessageInternal(
+    return this.finalizeAssistantMessageInternal(
       this.mainAgentMessageState,
       null,
     );
-    this.updateLastAssistantMessage(message);
-    return message;
   }
 
   emitResult(options: ResultOptions): void {
diff --git a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts
index 96977d5b0..64448c8a6 100644
--- a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts
+++ b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.test.ts
@@ -654,6 +654,24 @@ describe('StreamJsonOutputAdapter', () => {
         'Message not started',
       );
     });
+
+    it('should not emit empty assistant message when started but no content processed', () => {
+      stdoutWriteSpy.mockClear();
+      adapter.finalizeAssistantMessage();
+
+      const assistantCalls = stdoutWriteSpy.mock.calls.filter(
+        (call: unknown[]) => {
+          try {
+            const parsed = JSON.parse(call[0] as string);
+            return parsed.type === 'assistant';
+          } catch {
+            return false;
+          }
+        },
+      );
+
+      expect(assistantCalls).toHaveLength(0);
+    });
   });
 
   describe('emitResult', () => {
@@ -1007,56 +1025,68 @@ describe('StreamJsonOutputAdapter', () => {
     });
   });
 
-  describe('message_id in stream events', () => {
+  describe('content_block event identification', () => {
     beforeEach(() => {
       adapter = new StreamJsonOutputAdapter(mockConfig, true);
       adapter.startAssistantMessage();
     });
 
-    it('should include message_id in stream events after message starts', () => {
+    it('should not include message_id in content_block events', () => {
       adapter.processEvent({
         type: GeminiEventType.Content,
         value: 'Text',
       });
-      // Process another event to ensure messageStarted is true
       adapter.processEvent({
         type: GeminiEventType.Content,
         value: 'More',
       });
 
       const calls = stdoutWriteSpy.mock.calls;
-      // Find all delta events
-      const deltaCalls = calls.filter((call: unknown[]) => {
+      const contentBlockCalls = calls.filter((call: unknown[]) => {
         try {
           const parsed = JSON.parse(call[0] as string);
           return (
             parsed.type === 'stream_event' &&
-            parsed.event.type === 'content_block_delta'
+            (parsed.event.type === 'content_block_start' ||
+              parsed.event.type === 'content_block_delta' ||
+              parsed.event.type === 'content_block_stop')
           );
         } catch {
           return false;
         }
       });
 
-      expect(deltaCalls.length).toBeGreaterThan(0);
-      // The second delta event should have message_id (after messageStarted becomes true)
-      // message_id is added to the event object, so check parsed.event.message_id
-      if (deltaCalls.length > 1) {
-        const secondDelta = JSON.parse(
-          (deltaCalls[1] as unknown[])[0] as string,
-        );
-        // message_id is on the enriched event object
-        expect(
-          secondDelta.event.message_id || secondDelta.message_id,
-        ).toBeTruthy();
-      } else {
-        // If only one delta, check if message_id exists
-        const delta = JSON.parse((deltaCalls[0] as unknown[])[0] as string);
-        // message_id is added when messageStarted is true
-        // First event may or may not have it, but subsequent ones should
-        expect(delta.event.message_id || delta.message_id).toBeTruthy();
+      expect(contentBlockCalls.length).toBeGreaterThan(0);
+      for (const call of contentBlockCalls) {
+        const parsed = JSON.parse((call as unknown[])[0] as string);
+        expect(parsed.event.message_id).toBeUndefined();
       }
     });
+
+    it('should identify content_block events by session_id and index', () => {
+      adapter.processEvent({
+        type: GeminiEventType.Content,
+        value: 'Text',
+      });
+
+      const calls = stdoutWriteSpy.mock.calls;
+      const blockStartCall = calls.find((call: unknown[]) => {
+        try {
+          const parsed = JSON.parse(call[0] as string);
+          return (
+            parsed.type === 'stream_event' &&
+            parsed.event.type === 'content_block_start'
+          );
+        } catch {
+          return false;
+        }
+      });
+
+      expect(blockStartCall).toBeDefined();
+      const parsed = JSON.parse((blockStartCall as unknown[])[0] as string);
+      expect(parsed.session_id).toBe('test-session-id');
+      expect(typeof parsed.event.index).toBe('number');
+    });
   });
 
   describe('multiple text blocks', () => {
diff --git a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts
index bf76d025c..c67190e6a 100644
--- a/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts
+++ b/packages/cli/src/nonInteractive/io/StreamJsonOutputAdapter.ts
@@ -36,6 +36,8 @@ export class StreamJsonOutputAdapter
   extends BaseJsonOutputAdapter
   implements JsonOutputAdapterInterface
 {
+  private mainTurnMessageStartEmitted = false;
+
   constructor(
     config: Config,
     private readonly includePartialMessages: boolean,
@@ -68,29 +70,27 @@ export class StreamJsonOutputAdapter
     return this.includePartialMessages;
   }
 
+  override startAssistantMessage(): void {
+    this.mainTurnMessageStartEmitted = false;
+    super.startAssistantMessage();
+  }
+
   finalizeAssistantMessage(): CLIAssistantMessage {
-    const state = this.mainAgentMessageState;
-    if (state.finalized) {
-      return this.buildMessage(null);
-    }
-    state.finalized = true;
-
-    this.finalizePendingBlocks(state, null);
-    const orderedOpenBlocks = Array.from(state.openBlocks).sort(
-      (a, b) => a - b,
+    const message = this.finalizeAssistantMessageInternal(
+      this.mainAgentMessageState,
+      null,
     );
-    for (const index of orderedOpenBlocks) {
-      this.onBlockClosed(state, index, null);
-      this.closeBlock(state, index);
+    if (this.mainTurnMessageStartEmitted && this.includePartialMessages) {
+      const partial: CLIPartialAssistantMessage = {
+        type: 'stream_event',
+        uuid: randomUUID(),
+        session_id: this.getSessionId(),
+        parent_tool_use_id: null,
+        event: { type: 'message_stop' },
+      };
+      this.emitMessageImpl(partial);
     }
-
-    if (state.messageStarted && this.includePartialMessages) {
-      this.emitStreamEventIfEnabled({ type: 'message_stop' }, null);
-    }
-
-    const message = this.buildMessage(null);
-    this.updateLastAssistantMessage(message);
-    this.emitMessageImpl(message);
+    this.mainTurnMessageStartEmitted = false;
     return message;
   }
 
@@ -249,14 +249,15 @@ export class StreamJsonOutputAdapter
 
   /**
    * Overrides base class hook to emit message_start event when message is started.
-   * Only emits for main agent, not for subagents.
+   * Only emits once per turn for the main agent (guarded by mainTurnMessageStartEmitted),
+   * so block-type transitions inside a single turn do not produce spurious message_start events.
    */
   protected override onEnsureMessageStarted(
     state: MessageState,
     parentToolUseId: string | null,
   ): void {
-    // Only emit message_start for main agent, not for subagents
-    if (parentToolUseId === null) {
+    if (parentToolUseId === null && !this.mainTurnMessageStartEmitted) {
+      this.mainTurnMessageStartEmitted = true;
       this.emitStreamEventIfEnabled(
         {
           type: 'message_start',
@@ -264,6 +265,7 @@ export class StreamJsonOutputAdapter
             id: state.messageId!,
             role: 'assistant',
             model: this.config.getModel(),
+            content: [],
           },
         },
         null,
@@ -311,19 +313,12 @@ export class StreamJsonOutputAdapter
       return;
     }
 
-    const state = this.getMessageState(parentToolUseId);
-    const enrichedEvent = state.messageStarted
-      ? ({ ...event, message_id: state.messageId } as StreamEvent & {
-          message_id: string;
-        })
-      : event;
-
     const partial: CLIPartialAssistantMessage = {
       type: 'stream_event',
       uuid: randomUUID(),
       session_id: this.getSessionId(),
       parent_tool_use_id: parentToolUseId,
-      event: enrichedEvent,
+      event,
     };
     this.emitMessageImpl(partial);
   }
diff --git a/packages/cli/src/nonInteractive/types.ts b/packages/cli/src/nonInteractive/types.ts
index 84c2d0ff7..69eaa1dcd 100644
--- a/packages/cli/src/nonInteractive/types.ts
+++ b/packages/cli/src/nonInteractive/types.ts
@@ -201,6 +201,7 @@ export interface MessageStartStreamEvent {
     id: string;
     role: 'assistant';
     model: string;
+    content: [];
   };
 }
 
diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts
index e4c22cebb..bf29f8f0e 100644
--- a/packages/cli/src/nonInteractiveCli.ts
+++ b/packages/cli/src/nonInteractiveCli.ts
@@ -390,6 +390,16 @@ export async function runNonInteractive(
         }
       }
     } catch (error) {
+      // Ensure message_start / message_stop (and content_block events) are
+      // properly paired even when an error aborts the turn mid-stream.
+      // The call is safe when no message was started (throws → caught) or
+      // when already finalized (idempotent guard inside the adapter).
+      try {
+        adapter.finalizeAssistantMessage();
+      } catch {
+        // Expected when no message was started or already finalized
+      }
+
       // For JSON and STREAM_JSON modes, compute usage from metrics
       const message = error instanceof Error ? error.message : String(error);
       const metrics = uiTelemetryService.getMetrics();
diff --git a/packages/cli/src/services/prompt-processors/shellProcessor.ts b/packages/cli/src/services/prompt-processors/shellProcessor.ts
index 2a6df7161..a3e30bf66 100644
--- a/packages/cli/src/services/prompt-processors/shellProcessor.ts
+++ b/packages/cli/src/services/prompt-processors/shellProcessor.ts
@@ -109,10 +109,9 @@ export class ShellProcessor implements IPromptProcessor {
           return { ...injection, resolvedCommand: undefined };
         }
 
-        const resolvedCommand = command.replaceAll(
-          SHORTHAND_ARGS_PLACEHOLDER,
-          userArgsEscaped,
-        );
+        const resolvedCommand = command
+          .replaceAll(SHORTHAND_ARGS_PLACEHOLDER, userArgsEscaped) // Replace {{args}}
+          .replaceAll('$ARGUMENTS', userArgsEscaped); // Replace $ARGUMENTS
         return { ...injection, resolvedCommand };
       },
     );
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 61584b8c7..347a1e918 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -1957,6 +1957,25 @@ describe('InputPrompt', () => {
   });
 
   describe('command search (Ctrl+R when not in shell)', () => {
+    it('passes newest-first user history to command search', async () => {
+      props.shellModeActive = false;
+      props.userMessages = ['oldest', 'middle', 'newest'];
+
+      const { unmount } = renderWithProviders(<InputPrompt {...props} />);
+      await wait();
+
+      const commandSearchCall =
+        mockedUseReverseSearchCompletion.mock.calls.find(
+          ([, history]) =>
+            Array.isArray(history) &&
+            history.length === 3 &&
+            history.includes('newest'),
+        );
+
+      expect(commandSearchCall?.[1]).toEqual(['newest', 'middle', 'oldest']);
+      unmount();
+    });
+
     it('enters command search on Ctrl+R and shows suggestions', async () => {
       props.shellModeActive = false;
 
diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx
index 4fd3bb216..52add983b 100644
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@@ -5,7 +5,7 @@
  */
 
 import type React from 'react';
-import { useCallback, useEffect, useState, useRef } from 'react';
+import { useCallback, useEffect, useMemo, useState, useRef } from 'react';
 import { Box, Text } from 'ink';
 import { SuggestionsDisplay, MAX_WIDTH } from './SuggestionsDisplay.js';
 import { theme } from '../semantic-colors.js';
@@ -199,9 +199,14 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
     reverseSearchActive,
   );
 
+  const commandSearchHistory = useMemo(
+    () => [...userMessages].reverse(),
+    [userMessages],
+  );
+
   const commandSearchCompletion = useReverseSearchCompletion(
     buffer,
-    userMessages,
+    commandSearchHistory,
     commandSearchActive,
   );
 
diff --git a/packages/cli/src/ui/contexts/KeypressContext.test.tsx b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
index edf25bead..b662ec7ed 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.test.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.test.tsx
@@ -1367,6 +1367,75 @@ describe('KeypressContext - Kitty Protocol', () => {
         }),
       );
     });
+
+    it('drops unsupported Kitty CSI-u keys without blocking later input', () => {
+      const keyHandler = vi.fn();
+      const { result } = renderHook(() => useKeypressContext(), { wrapper });
+      act(() => result.current.subscribe(keyHandler));
+
+      act(() => stdin.sendKittySequence(`\x1b[57358u`)); // CAPS_LOCK
+      act(() =>
+        stdin.pressKey({
+          name: 'a',
+          ctrl: false,
+          meta: false,
+          shift: false,
+          paste: false,
+          sequence: 'a',
+        }),
+      );
+
+      expect(keyHandler).toHaveBeenCalledTimes(1);
+      expect(keyHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          name: 'a',
+          sequence: 'a',
+        }),
+      );
+    });
+
+    it('recovers plain text that arrives in the same chunk after an unsupported CSI-u key', () => {
+      const keyHandler = vi.fn();
+      const { result } = renderHook(() => useKeypressContext(), { wrapper });
+      act(() => result.current.subscribe(keyHandler));
+
+      act(() =>
+        stdin.pressKey({
+          name: '',
+          ctrl: false,
+          meta: false,
+          shift: false,
+          paste: false,
+          sequence: '\x1b[57358ua',
+        }),
+      );
+
+      expect(keyHandler).toHaveBeenCalledTimes(1);
+      expect(keyHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          name: 'a',
+          sequence: 'a',
+          kittyProtocol: true,
+        }),
+      );
+    });
+
+    it('drops unsupported CSI-u variants with event metadata and keeps parsing', () => {
+      const keyHandler = vi.fn();
+      const { result } = renderHook(() => useKeypressContext(), { wrapper });
+      act(() => result.current.subscribe(keyHandler));
+
+      act(() => stdin.sendKittySequence(`\x1b[57358;1:1u\x1b[100u`));
+
+      expect(keyHandler).toHaveBeenCalledTimes(1);
+      expect(keyHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          name: 'd',
+          sequence: 'd',
+          kittyProtocol: true,
+        }),
+      );
+    });
   });
 
   describe('Kitty keypad private-use keys', () => {
diff --git a/packages/cli/src/ui/contexts/KeypressContext.tsx b/packages/cli/src/ui/contexts/KeypressContext.tsx
index 791602f6a..97db27563 100644
--- a/packages/cli/src/ui/contexts/KeypressContext.tsx
+++ b/packages/cli/src/ui/contexts/KeypressContext.tsx
@@ -178,6 +178,25 @@ export function KeypressProvider({
     let rawDataBuffer = Buffer.alloc(0);
     let rawFlushTimeout: NodeJS.Timeout | null = null;
 
+    const createPrintableKey = (char: string): Key => {
+      const printableName =
+        char === ' '
+          ? 'space'
+          : /^[A-Za-z]$/.test(char)
+            ? char.toLowerCase()
+            : char;
+
+      return {
+        name: printableName,
+        ctrl: false,
+        meta: false,
+        shift: false,
+        paste: false,
+        sequence: char,
+        kittyProtocol: true,
+      };
+    };
+
     // Parse a single complete kitty sequence from the start (prefix) of the
     // buffer and return both the Key and the number of characters consumed.
     // This lets us "peel off" one complete event when multiple sequences arrive
@@ -415,22 +434,11 @@ export function KeypressProvider({
           keyCode <= 0x10ffff &&
           !(keyCode >= 0xe000 && keyCode <= 0xf8ff)
         ) {
-          const char = String.fromCodePoint(keyCode);
-          const printableName =
-            char === ' '
-              ? 'space'
-              : /^[A-Za-z]$/.test(char)
-                ? char.toLowerCase()
-                : char;
           return {
             key: {
-              name: printableName,
-              ctrl: false,
+              ...createPrintableKey(String.fromCodePoint(keyCode)),
               meta: alt,
               shift,
-              paste: false,
-              sequence: char,
-              kittyProtocol: true,
             },
             length: m[0].length,
           };
@@ -490,6 +498,42 @@ export function KeypressProvider({
       return null;
     };
 
+    const getCompleteCsiSequenceLength = (buffer: string): number | null => {
+      if (!buffer.startsWith(`${ESC}[`)) {
+        return null;
+      }
+
+      for (let i = 2; i < buffer.length; i++) {
+        const code = buffer.charCodeAt(i);
+        if (code >= 0x40 && code <= 0x7e) {
+          return i + 1;
+        }
+        if (code < 0x20 || code > 0x3f) {
+          return 0;
+        }
+      }
+
+      return null;
+    };
+
+    const parsePlainTextPrefix = (
+      buffer: string,
+    ): { key: Key; length: number } | null => {
+      if (!buffer || buffer.startsWith(ESC)) {
+        return null;
+      }
+
+      const [char] = Array.from(buffer);
+      if (!char) {
+        return null;
+      }
+
+      return {
+        key: createPrintableKey(char),
+        length: char.length,
+      };
+    };
+
     const broadcast = (key: Key) => {
       for (const handler of subscribers) {
         handler(key);
@@ -653,47 +697,82 @@ export function KeypressProvider({
           // start of the buffer. This handles batched inputs cleanly. If the
           // prefix is incomplete or invalid, skip to the next CSI introducer
           // (ESC[) so that a following valid sequence can still be parsed.
-          let parsedAny = false;
+          let bufferedInputHandled = false;
           while (kittySequenceBuffer) {
             const parsed = parseKittyPrefix(kittySequenceBuffer);
-            if (!parsed) {
-              // Look for the next potential CSI start beyond index 0
-              const nextStart = kittySequenceBuffer.indexOf(`${ESC}[`, 1);
-              if (nextStart > 0) {
-                if (debugKeystrokeLogging) {
+            if (parsed) {
+              if (debugKeystrokeLogging) {
+                const parsedSequence = kittySequenceBuffer.slice(
+                  0,
+                  parsed.length,
+                );
+                if (kittySequenceBuffer.length > parsed.length) {
                   debugLogger.debug(
-                    '[DEBUG] Skipping incomplete/invalid CSI prefix:',
-                    kittySequenceBuffer.slice(0, nextStart),
+                    '[DEBUG] Kitty sequence parsed successfully (prefix):',
+                    parsedSequence,
+                  );
+                } else {
+                  debugLogger.debug(
+                    '[DEBUG] Kitty sequence parsed successfully:',
+                    parsedSequence,
                   );
                 }
-                kittySequenceBuffer = kittySequenceBuffer.slice(nextStart);
-                continue;
               }
-              break;
+              // Consume the parsed prefix and broadcast it.
+              kittySequenceBuffer = kittySequenceBuffer.slice(parsed.length);
+              broadcast(parsed.key);
+              bufferedInputHandled = true;
+              continue;
             }
-            if (debugKeystrokeLogging) {
-              const parsedSequence = kittySequenceBuffer.slice(
-                0,
-                parsed.length,
+
+            const completeUnsupportedCsiLength =
+              getCompleteCsiSequenceLength(kittySequenceBuffer);
+            if (completeUnsupportedCsiLength) {
+              if (debugKeystrokeLogging) {
+                debugLogger.debug(
+                  '[DEBUG] Dropping unsupported complete CSI sequence:',
+                  kittySequenceBuffer.slice(0, completeUnsupportedCsiLength),
+                );
+              }
+              kittySequenceBuffer = kittySequenceBuffer.slice(
+                completeUnsupportedCsiLength,
               );
-              if (kittySequenceBuffer.length > parsed.length) {
+              bufferedInputHandled = true;
+              continue;
+            }
+
+            const plainTextPrefix = parsePlainTextPrefix(kittySequenceBuffer);
+            if (plainTextPrefix) {
+              if (debugKeystrokeLogging) {
                 debugLogger.debug(
-                  '[DEBUG] Kitty sequence parsed successfully (prefix):',
-                  parsedSequence,
-                );
-              } else {
-                debugLogger.debug(
-                  '[DEBUG] Kitty sequence parsed successfully:',
-                  parsedSequence,
+                  '[DEBUG] Recovered plain text after kitty sequence:',
+                  plainTextPrefix.key.sequence,
                 );
               }
+              kittySequenceBuffer = kittySequenceBuffer.slice(
+                plainTextPrefix.length,
+              );
+              broadcast(plainTextPrefix.key);
+              bufferedInputHandled = true;
+              continue;
             }
-            // Consume the parsed prefix and broadcast it.
-            kittySequenceBuffer = kittySequenceBuffer.slice(parsed.length);
-            broadcast(parsed.key);
-            parsedAny = true;
+
+            // Look for the next potential CSI start beyond index 0
+            const nextStart = kittySequenceBuffer.indexOf(`${ESC}[`, 1);
+            if (nextStart > 0) {
+              if (debugKeystrokeLogging) {
+                debugLogger.debug(
+                  '[DEBUG] Skipping incomplete/invalid CSI prefix:',
+                  kittySequenceBuffer.slice(0, nextStart),
+                );
+              }
+              kittySequenceBuffer = kittySequenceBuffer.slice(nextStart);
+              bufferedInputHandled = true;
+              continue;
+            }
+            break;
           }
-          if (parsedAny) return;
+          if (bufferedInputHandled) return;
 
           if (config?.getDebugMode() || debugKeystrokeLogging) {
             const codes = Array.from(kittySequenceBuffer).map((ch) =>
diff --git a/packages/core/package.json b/packages/core/package.json
index 134da8f48..88e4c5c1d 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/qwen-code-core",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "description": "Qwen Code Core",
   "repository": {
     "type": "git",
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index 30b24c086..3024bd2df 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -248,6 +248,26 @@ describe('Server Config (config.ts)', () => {
     );
   });
 
+  it('should store a system prompt override', () => {
+    const config = new Config({
+      ...baseParams,
+      systemPrompt: 'You are a custom system prompt.',
+    });
+
+    expect(config.getSystemPrompt()).toBe('You are a custom system prompt.');
+    expect(config.getAppendSystemPrompt()).toBeUndefined();
+  });
+
+  it('should store an appended system prompt', () => {
+    const config = new Config({
+      ...baseParams,
+      appendSystemPrompt: 'Be extra concise.',
+    });
+
+    expect(config.getAppendSystemPrompt()).toBe('Be extra concise.');
+    expect(config.getSystemPrompt()).toBeUndefined();
+  });
+
   describe('initialize', () => {
     it('should throw an error if checkpointing is enabled and GitService fails', async () => {
       const gitError = new Error('Git is not installed');
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index b4b449fc0..b2a35229d 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -315,6 +315,8 @@ export interface ConfigParameters {
   debugMode: boolean;
   includePartialMessages?: boolean;
   question?: string;
+  systemPrompt?: string;
+  appendSystemPrompt?: string;
   coreTools?: string[];
   allowedTools?: string[];
   excludeTools?: string[];
@@ -468,6 +470,8 @@ export class Config {
   private readonly outputFormat: OutputFormat;
   private readonly includePartialMessages: boolean;
   private readonly question: string | undefined;
+  private readonly systemPrompt: string | undefined;
+  private readonly appendSystemPrompt: string | undefined;
   private readonly coreTools: string[] | undefined;
   private readonly allowedTools: string[] | undefined;
   private readonly excludeTools: string[] | undefined;
@@ -580,6 +584,8 @@ export class Config {
     this.outputFormat = normalizedOutputFormat ?? OutputFormat.TEXT;
     this.includePartialMessages = params.includePartialMessages ?? false;
     this.question = params.question;
+    this.systemPrompt = params.systemPrompt;
+    this.appendSystemPrompt = params.appendSystemPrompt;
     this.coreTools = params.coreTools;
     this.allowedTools = params.allowedTools;
     this.excludeTools = params.excludeTools;
@@ -1229,6 +1235,14 @@ export class Config {
     return this.question;
   }
 
+  getSystemPrompt(): string | undefined {
+    return this.systemPrompt;
+  }
+
+  getAppendSystemPrompt(): string | undefined {
+    return this.appendSystemPrompt;
+  }
+
   getCoreTools(): string[] | undefined {
     return this.coreTools;
   }
diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts
index 3f0e17197..16cf3622f 100644
--- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts
+++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts
@@ -328,6 +328,170 @@ describe('AnthropicContentGenerator', () => {
         expect.not.objectContaining({ thinking: expect.anything() }),
       );
     });
+
+    describe('output token limits', () => {
+      it('caps configured samplingParams.max_tokens to model output limit', async () => {
+        const { AnthropicContentGenerator } = await importGenerator();
+        anthropicState.createImpl.mockResolvedValue({
+          id: 'anthropic-1',
+          model: 'claude-sonnet-4',
+          content: [{ type: 'text', text: 'hi' }],
+        });
+
+        const generator = new AnthropicContentGenerator(
+          {
+            model: 'claude-sonnet-4',
+            apiKey: 'test-key',
+            timeout: 10_000,
+            maxRetries: 2,
+            samplingParams: { max_tokens: 200_000 },
+            schemaCompliance: 'auto',
+          },
+          mockConfig,
+        );
+
+        await generator.generateContent({
+          model: 'models/ignored',
+          contents: 'Hello',
+        } as unknown as GenerateContentParameters);
+
+        const [anthropicRequest] =
+          anthropicState.lastCreateArgs as AnthropicCreateArgs;
+        expect(anthropicRequest).toEqual(
+          expect.objectContaining({ max_tokens: 65536 }),
+        );
+      });
+
+      it('caps request.config.maxOutputTokens to model output limit when config max_tokens is missing', async () => {
+        const { AnthropicContentGenerator } = await importGenerator();
+        anthropicState.createImpl.mockResolvedValue({
+          id: 'anthropic-1',
+          model: 'claude-sonnet-4',
+          content: [{ type: 'text', text: 'hi' }],
+        });
+
+        const generator = new AnthropicContentGenerator(
+          {
+            model: 'claude-sonnet-4',
+            apiKey: 'test-key',
+            timeout: 10_000,
+            maxRetries: 2,
+            samplingParams: {},
+            schemaCompliance: 'auto',
+          },
+          mockConfig,
+        );
+
+        await generator.generateContent({
+          model: 'models/ignored',
+          contents: 'Hello',
+          config: { maxOutputTokens: 100_000 },
+        } as unknown as GenerateContentParameters);
+
+        const [anthropicRequest] =
+          anthropicState.lastCreateArgs as AnthropicCreateArgs;
+        expect(anthropicRequest).toEqual(
+          expect.objectContaining({ max_tokens: 65536 }),
+        );
+      });
+
+      it('uses conservative default when max_tokens is not explicitly configured', async () => {
+        const { AnthropicContentGenerator } = await importGenerator();
+        anthropicState.createImpl.mockResolvedValue({
+          id: 'anthropic-1',
+          model: 'claude-sonnet-4',
+          content: [{ type: 'text', text: 'hi' }],
+        });
+
+        const generator = new AnthropicContentGenerator(
+          {
+            model: 'claude-sonnet-4',
+            apiKey: 'test-key',
+            timeout: 10_000,
+            maxRetries: 2,
+            samplingParams: {},
+            schemaCompliance: 'auto',
+          },
+          mockConfig,
+        );
+
+        await generator.generateContent({
+          model: 'models/ignored',
+          contents: 'Hello',
+        } as unknown as GenerateContentParameters);
+
+        const [anthropicRequest] =
+          anthropicState.lastCreateArgs as AnthropicCreateArgs;
+        expect(anthropicRequest).toEqual(
+          expect.objectContaining({ max_tokens: 32000 }),
+        );
+      });
+
+      it('respects configured max_tokens for unknown models', async () => {
+        const { AnthropicContentGenerator } = await importGenerator();
+        anthropicState.createImpl.mockResolvedValue({
+          id: 'anthropic-1',
+          model: 'unknown-model',
+          content: [{ type: 'text', text: 'hi' }],
+        });
+
+        const generator = new AnthropicContentGenerator(
+          {
+            model: 'unknown-model',
+            apiKey: 'test-key',
+            timeout: 10_000,
+            maxRetries: 2,
+            samplingParams: { max_tokens: 100_000 },
+            schemaCompliance: 'auto',
+          },
+          mockConfig,
+        );
+
+        await generator.generateContent({
+          model: 'models/ignored',
+          contents: 'Hello',
+        } as unknown as GenerateContentParameters);
+
+        const [anthropicRequest] =
+          anthropicState.lastCreateArgs as AnthropicCreateArgs;
+        expect(anthropicRequest).toEqual(
+          expect.objectContaining({ max_tokens: 100_000 }),
+        );
+      });
+
+      it('treats null maxOutputTokens as not configured', async () => {
+        const { AnthropicContentGenerator } = await importGenerator();
+        anthropicState.createImpl.mockResolvedValue({
+          id: 'anthropic-1',
+          model: 'claude-sonnet-4',
+          content: [{ type: 'text', text: 'hi' }],
+        });
+
+        const generator = new AnthropicContentGenerator(
+          {
+            model: 'claude-sonnet-4',
+            apiKey: 'test-key',
+            timeout: 10_000,
+            maxRetries: 2,
+            samplingParams: {},
+            schemaCompliance: 'auto',
+          },
+          mockConfig,
+        );
+
+        await generator.generateContent({
+          model: 'models/ignored',
+          contents: 'Hello',
+          config: { maxOutputTokens: null as unknown as undefined },
+        } as unknown as GenerateContentParameters);
+
+        const [anthropicRequest] =
+          anthropicState.lastCreateArgs as AnthropicCreateArgs;
+        expect(anthropicRequest).toEqual(
+          expect.objectContaining({ max_tokens: 32000 }),
+        );
+      });
+    });
   });
 
   describe('countTokens', () => {
diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts
index 3fcd4b96d..e3c61893e 100644
--- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts
+++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts
@@ -31,6 +31,11 @@ import { AnthropicContentConverter } from './converter.js';
 import { buildRuntimeFetchOptions } from '../../utils/runtimeFetchOptions.js';
 import { DEFAULT_TIMEOUT } from '../openaiContentGenerator/constants.js';
 import { createDebugLogger } from '../../utils/debugLogger.js';
+import {
+  tokenLimit,
+  DEFAULT_OUTPUT_TOKEN_LIMIT,
+  hasExplicitOutputLimit,
+} from '../tokenLimits.js';
 
 const debugLogger = createDebugLogger('ANTHROPIC');
 
@@ -223,8 +228,18 @@ export class AnthropicContentGenerator implements ContentGenerator {
       return configValue !== undefined ? configValue : requestValue;
     };
 
+    // Apply output token limit logic consistent with OpenAI providers
+    const userMaxTokens = getParam<number>('max_tokens', 'maxOutputTokens');
+    const modelId = this.contentGeneratorConfig.model;
+    const modelLimit = tokenLimit(modelId, 'output');
+    const isKnownModel = hasExplicitOutputLimit(modelId);
+
     const maxTokens =
-      getParam<number>('max_tokens', 'maxOutputTokens') ?? 10_000;
+      userMaxTokens !== undefined && userMaxTokens !== null
+        ? isKnownModel
+          ? Math.min(userMaxTokens, modelLimit)
+          : userMaxTokens
+        : Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
 
     return {
       max_tokens: maxTokens,
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
index c73142aa4..8d81a5bbb 100644
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -31,7 +31,7 @@ import {
   Turn,
   type ChatCompressionInfo,
 } from './turn.js';
-import { getCoreSystemPrompt } from './prompts.js';
+import { getCoreSystemPrompt, getCustomSystemPrompt } from './prompts.js';
 import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
@@ -314,6 +314,8 @@ describe('Gemini Client (client.ts)', () => {
       getVertexAI: vi.fn().mockReturnValue(false),
       getUserAgent: vi.fn().mockReturnValue('test-agent'),
       getUserMemory: vi.fn().mockReturnValue(''),
+      getSystemPrompt: vi.fn().mockReturnValue(undefined),
+      getAppendSystemPrompt: vi.fn().mockReturnValue(undefined),
       getFullContext: vi.fn().mockReturnValue(false),
       getSessionId: vi.fn().mockReturnValue('test-session-id'),
       getProxy: vi.fn().mockReturnValue(undefined),
@@ -2428,6 +2430,104 @@ Other open files:
       );
     });
 
+    it('should use config system prompt override when provided', async () => {
+      const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
+      const abortSignal = new AbortController().signal;
+
+      vi.spyOn(client['config'], 'getSystemPrompt').mockReturnValue(
+        'Override prompt',
+      );
+      vi.spyOn(client['config'], 'getUserMemory').mockReturnValue(
+        'Saved memory',
+      );
+      vi.mocked(getCustomSystemPrompt).mockReturnValueOnce(
+        'Override prompt with memory',
+      );
+
+      await client.generateContent(
+        contents,
+        {},
+        abortSignal,
+        DEFAULT_QWEN_FLASH_MODEL,
+      );
+
+      expect(getCustomSystemPrompt).toHaveBeenCalledWith(
+        'Override prompt',
+        'Saved memory',
+        undefined,
+      );
+      expect(mockContentGenerator.generateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            systemInstruction: 'Override prompt with memory',
+          }),
+        }),
+        'test-session-id',
+      );
+    });
+
+    it('should append config appendSystemPrompt to the core system prompt', async () => {
+      const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
+      const abortSignal = new AbortController().signal;
+
+      vi.mocked(getCoreSystemPrompt).mockClear();
+      vi.spyOn(client['config'], 'getAppendSystemPrompt').mockReturnValue(
+        'Be extra concise.',
+      );
+
+      await client.generateContent(
+        contents,
+        {},
+        abortSignal,
+        DEFAULT_QWEN_FLASH_MODEL,
+      );
+
+      expect(getCoreSystemPrompt).toHaveBeenCalledWith(
+        '',
+        'test-model',
+        'Be extra concise.',
+      );
+    });
+
+    it('should append config appendSystemPrompt after a config system prompt override', async () => {
+      const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
+      const abortSignal = new AbortController().signal;
+
+      vi.spyOn(client['config'], 'getSystemPrompt').mockReturnValue(
+        'Override prompt',
+      );
+      vi.spyOn(client['config'], 'getAppendSystemPrompt').mockReturnValue(
+        'Focus on findings only.',
+      );
+      vi.spyOn(client['config'], 'getUserMemory').mockReturnValue(
+        'Saved memory',
+      );
+      vi.mocked(getCustomSystemPrompt).mockReturnValueOnce(
+        'Override prompt with memory and append',
+      );
+
+      await client.generateContent(
+        contents,
+        {},
+        abortSignal,
+        DEFAULT_QWEN_FLASH_MODEL,
+      );
+
+      expect(getCustomSystemPrompt).toHaveBeenCalledWith(
+        'Override prompt',
+        'Saved memory',
+        'Focus on findings only.',
+      );
+      expect(mockContentGenerator.generateContent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          config: expect.objectContaining({
+            systemInstruction: 'Override prompt with memory and append',
+          }),
+        }),
+        'test-session-id',
+      );
+    });
+
     // Note: there is currently no "fallback mode" model routing; the model used
     // is always the one explicitly requested by the caller.
   });
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 96c9d4ff7..ee87e39cd 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -199,6 +199,26 @@ export class GeminiClient {
     });
   }
 
+  private getMainSessionSystemInstruction(): string {
+    const userMemory = this.config.getUserMemory();
+    const overrideSystemPrompt = this.config.getSystemPrompt();
+    const appendSystemPrompt = this.config.getAppendSystemPrompt();
+
+    if (overrideSystemPrompt) {
+      return getCustomSystemPrompt(
+        overrideSystemPrompt,
+        userMemory,
+        appendSystemPrompt,
+      );
+    }
+
+    return getCoreSystemPrompt(
+      userMemory,
+      this.config.getModel(),
+      appendSystemPrompt,
+    );
+  }
+
   async startChat(extraHistory?: Content[]): Promise<GeminiChat> {
     this.forceFullIdeContext = true;
     this.hasFailedCompressionAttempt = false;
@@ -210,9 +230,7 @@ export class GeminiClient {
     const history = await getInitialChatHistory(this.config, extraHistory);
 
     try {
-      const userMemory = this.config.getUserMemory();
-      const model = this.config.getModel();
-      const systemInstruction = getCoreSystemPrompt(userMemory, model);
+      const systemInstruction = this.getMainSessionSystemInstruction();
 
       return new GeminiChat(
         this.config,
@@ -770,7 +788,7 @@ export class GeminiClient {
       const userMemory = this.config.getUserMemory();
       const finalSystemInstruction = generationConfig.systemInstruction
         ? getCustomSystemPrompt(generationConfig.systemInstruction, userMemory)
-        : getCoreSystemPrompt(userMemory, this.config.getModel());
+        : this.getMainSessionSystemInstruction();
 
       const requestConfig: GenerateContentConfig = {
         abortSignal,
diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts
index 3411fff50..918ade81c 100644
--- a/packages/core/src/core/coreToolScheduler.test.ts
+++ b/packages/core/src/core/coreToolScheduler.test.ts
@@ -2583,3 +2583,229 @@ describe('CoreToolScheduler plan mode with ask_user_question', () => {
     expect(completedCalls[0].status).toBe('cancelled');
   });
 });
+
+describe('Concurrent task tool execution', () => {
+  function createScheduler(
+    tools: Map<string, MockTool>,
+    onAllToolCallsComplete: Mock,
+    onToolCallsUpdate: Mock,
+  ) {
+    const mockToolRegistry = {
+      getTool: (name: string) => tools.get(name),
+      getFunctionDeclarations: () => [],
+      tools,
+      discovery: {},
+      registerTool: () => {},
+      getToolByName: (name: string) => tools.get(name),
+      getToolByDisplayName: () => undefined,
+      getTools: () => [...tools.values()],
+      discoverTools: async () => {},
+      getAllTools: () => [...tools.values()],
+      getToolsByServer: () => [],
+    } as unknown as ToolRegistry;
+
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+      getUsageStatisticsEnabled: () => true,
+      getDebugMode: () => false,
+      getApprovalMode: () => ApprovalMode.AUTO_EDIT,
+      getAllowedTools: () => [],
+      getContentGeneratorConfig: () => ({
+        model: 'test-model',
+        authType: 'gemini',
+      }),
+      getShellExecutionConfig: () => ({
+        terminalWidth: 90,
+        terminalHeight: 30,
+      }),
+      storage: {
+        getProjectTempDir: () => '/tmp',
+      },
+      getTruncateToolOutputThreshold: () =>
+        DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD,
+      getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES,
+      getToolRegistry: () => mockToolRegistry,
+      getUseModelRouter: () => false,
+      getGeminiClient: () => null,
+      getChatRecordingService: () => undefined,
+    } as unknown as Config;
+
+    return new CoreToolScheduler({
+      config: mockConfig,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+      getPreferredEditor: () => 'vscode',
+      onEditorClose: vi.fn(),
+    });
+  }
+
+  it('should execute multiple task tools concurrently', async () => {
+    const executionLog: string[] = [];
+
+    const taskTool = new MockTool({
+      name: 'task',
+      execute: async (params) => {
+        const id = (params as { id: string }).id;
+        executionLog.push(`start:${id}`);
+        // Simulate async work — concurrent tasks will interleave here
+        await new Promise((r) => setTimeout(r, 50));
+        executionLog.push(`end:${id}`);
+        return {
+          llmContent: `Task ${id} done`,
+          returnDisplay: `Task ${id} done`,
+        };
+      },
+    });
+
+    const tools = new Map([['task', taskTool]]);
+    const onAllToolCallsComplete = vi.fn();
+    const onToolCallsUpdate = vi.fn();
+    const scheduler = createScheduler(
+      tools,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+    );
+
+    const abortController = new AbortController();
+    const requests = [
+      {
+        callId: '1',
+        name: 'task',
+        args: { id: 'A' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+      {
+        callId: '2',
+        name: 'task',
+        args: { id: 'B' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+      {
+        callId: '3',
+        name: 'task',
+        args: { id: 'C' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+    ];
+
+    await scheduler.schedule(requests, abortController.signal);
+
+    // All tasks should have completed
+    expect(onAllToolCallsComplete).toHaveBeenCalled();
+    const completedCalls = onAllToolCallsComplete.mock
+      .calls[0][0] as ToolCall[];
+    expect(completedCalls).toHaveLength(3);
+    expect(completedCalls.every((c) => c.status === 'success')).toBe(true);
+
+    // Verify concurrency: all tasks should start before any finishes
+    // With sequential execution, the log would be [start:A, end:A, start:B, end:B, ...]
+    // With concurrent execution, all starts happen before any end
+    const startIndices = executionLog
+      .filter((e) => e.startsWith('start:'))
+      .map((e) => executionLog.indexOf(e));
+    const firstEnd = executionLog.findIndex((e) => e.startsWith('end:'));
+    expect(startIndices.every((i) => i < firstEnd)).toBe(true);
+  });
+
+  it('should run task tools concurrently while other tools run sequentially', async () => {
+    const executionLog: string[] = [];
+
+    const taskTool = new MockTool({
+      name: 'task',
+      execute: async (params) => {
+        const id = (params as { id: string }).id;
+        executionLog.push(`task:start:${id}`);
+        await new Promise((r) => setTimeout(r, 50));
+        executionLog.push(`task:end:${id}`);
+        return {
+          llmContent: `Task ${id} done`,
+          returnDisplay: `Task ${id} done`,
+        };
+      },
+    });
+
+    const readTool = new MockTool({
+      name: 'read_file',
+      execute: async (params) => {
+        const id = (params as { id: string }).id;
+        executionLog.push(`read:start:${id}`);
+        await new Promise((r) => setTimeout(r, 20));
+        executionLog.push(`read:end:${id}`);
+        return {
+          llmContent: `Read ${id} done`,
+          returnDisplay: `Read ${id} done`,
+        };
+      },
+    });
+
+    const tools = new Map<string, MockTool>([
+      ['task', taskTool],
+      ['read_file', readTool],
+    ]);
+    const onAllToolCallsComplete = vi.fn();
+    const onToolCallsUpdate = vi.fn();
+    const scheduler = createScheduler(
+      tools,
+      onAllToolCallsComplete,
+      onToolCallsUpdate,
+    );
+
+    const abortController = new AbortController();
+    const requests = [
+      {
+        callId: '1',
+        name: 'read_file',
+        args: { id: '1' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+      {
+        callId: '2',
+        name: 'task',
+        args: { id: 'A' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+      {
+        callId: '3',
+        name: 'read_file',
+        args: { id: '2' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+      {
+        callId: '4',
+        name: 'task',
+        args: { id: 'B' },
+        isClientInitiated: false,
+        prompt_id: 'p1',
+      },
+    ];
+
+    await scheduler.schedule(requests, abortController.signal);
+
+    expect(onAllToolCallsComplete).toHaveBeenCalled();
+    const completedCalls = onAllToolCallsComplete.mock
+      .calls[0][0] as ToolCall[];
+    expect(completedCalls).toHaveLength(4);
+    expect(completedCalls.every((c) => c.status === 'success')).toBe(true);
+
+    // Non-task tools should execute sequentially: read:1 finishes before read:2 starts
+    const read1End = executionLog.indexOf('read:end:1');
+    const read2Start = executionLog.indexOf('read:start:2');
+    expect(read1End).toBeLessThan(read2Start);
+
+    // Task tools should execute concurrently: both start before either ends
+    const taskAStart = executionLog.indexOf('task:start:A');
+    const taskBStart = executionLog.indexOf('task:start:B');
+    const firstTaskEnd = Math.min(
+      executionLog.indexOf('task:end:A'),
+      executionLog.indexOf('task:end:B'),
+    );
+    expect(taskAStart).toBeLessThan(firstTaskEnd);
+    expect(taskBStart).toBeLessThan(firstTaskEnd);
+  });
+});
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
index 7a8ab2895..20e60bd4d 100644
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -1081,9 +1081,28 @@ export class CoreToolScheduler {
         (call) => call.status === 'scheduled',
       );
 
-      for (const toolCall of callsToExecute) {
-        await this.executeSingleToolCall(toolCall, signal);
-      }
+      // Task tools are safe to run concurrently — they spawn independent
+      // sub-agents with no shared mutable state.  All other tools run
+      // sequentially in their original order to preserve any implicit
+      // ordering the model may rely on.
+      const taskCalls = callsToExecute.filter(
+        (call) => call.request.name === ToolNames.TASK,
+      );
+      const otherCalls = callsToExecute.filter(
+        (call) => call.request.name !== ToolNames.TASK,
+      );
+
+      const taskPromise = Promise.all(
+        taskCalls.map((tc) => this.executeSingleToolCall(tc, signal)),
+      );
+
+      const othersPromise = (async () => {
+        for (const toolCall of otherCalls) {
+          await this.executeSingleToolCall(toolCall, signal);
+        }
+      })();
+
+      await Promise.all([taskPromise, othersPromise]);
     }
   }
 
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 0dfe43247..4983b5754 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -17,7 +17,8 @@ import type {
   GenerateContentResponseUsageMetadata,
 } from '@google/genai';
 import { createUserContent } from '@google/genai';
-import { getErrorStatus, retryWithBackoff } from '../utils/retry.js';
+import { retryWithBackoff } from '../utils/retry.js';
+import { getErrorStatus } from '../utils/errors.js';
 import { createDebugLogger } from '../utils/debugLogger.js';
 import { parseAndFormatApiError } from '../utils/errorParsing.js';
 import { isRateLimitError, type RetryInfo } from '../utils/rateLimit.js';
diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
index 156b75a01..abf129268 100644
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
@@ -225,7 +225,7 @@ describe('LoggingContentGenerator', () => {
 
   it('logs errors with status code and request id, then rethrows', async () => {
     const error = Object.assign(new Error('boom'), {
-      code: 429,
+      status: 429,
       request_id: 'req-99',
       type: 'rate_limit',
     });
diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
index 1a51846c3..33242a28a 100644
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
@@ -35,13 +35,13 @@ import type {
   ContentGenerator,
   ContentGeneratorConfig,
 } from '../contentGenerator.js';
-import { isStructuredError } from '../../utils/quotaErrorDetection.js';
 import { OpenAIContentConverter } from '../openaiContentGenerator/converter.js';
 import { OpenAILogger } from '../../utils/openaiLogger.js';
-
-interface StructuredError {
-  status: number;
-}
+import {
+  getErrorMessage,
+  getErrorStatus,
+  getErrorType,
+} from '../../utils/errors.js';
 
 /**
  * A decorator that wraps a ContentGenerator to add logging to API calls.
@@ -108,33 +108,26 @@ export class LoggingContentGenerator implements ContentGenerator {
     model: string,
     prompt_id: string,
   ): void {
-    const errorMessage = error instanceof Error ? error.message : String(error);
-    const errorType =
-      (error as { type?: string })?.type ||
-      (error instanceof Error ? error.name : 'unknown');
+    const errorMessage = getErrorMessage(error);
+    const errorType = getErrorType(error);
     const errorResponseId =
       (error as { requestID?: string; request_id?: string })?.requestID ||
       (error as { requestID?: string; request_id?: string })?.request_id ||
       responseId;
-    const errorStatus =
-      (error as { code?: string | number; status?: number })?.code ??
-      (error as { status?: number })?.status ??
-      (isStructuredError(error)
-        ? (error as StructuredError).status
-        : undefined);
+    const errorStatus = getErrorStatus(error);
 
     logApiError(
       this.config,
-      new ApiErrorEvent(
-        errorResponseId,
+      new ApiErrorEvent({
+        responseId: errorResponseId,
         model,
-        errorMessage,
         durationMs,
-        prompt_id,
-        this.config.getAuthType(),
+        promptId: prompt_id,
+        authType: this.config.getAuthType(),
+        errorMessage,
         errorType,
-        errorStatus,
-      ),
+        statusCode: errorStatus,
+      }),
     );
   }
 
diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts
index 5c6cdc682..4e2d42bd8 100644
--- a/packages/core/src/core/openaiContentGenerator/pipeline.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts
@@ -255,9 +255,23 @@ export class ContentGenerationPipeline {
         .candidates?.[0]?.finishReason;
 
     if (isFinishChunk) {
-      // This is a finish reason chunk
-      collectedGeminiResponses.push(response);
-      setPendingFinish(response);
+      if (hasPendingFinish) {
+        // Duplicate finish chunk (e.g. from OpenRouter providers that send two
+        // finish_reason chunks for tool calls). The streaming tool call parser
+        // was already reset after the first finish chunk, so the second one
+        // carries no functionCall parts. Merge only usageMetadata and keep the
+        // candidates (including functionCall parts) from the first finish chunk.
+        const lastResponse =
+          collectedGeminiResponses[collectedGeminiResponses.length - 1];
+        if (response.usageMetadata) {
+          lastResponse.usageMetadata = response.usageMetadata;
+        }
+        setPendingFinish(lastResponse);
+      } else {
+        // This is a finish reason chunk
+        collectedGeminiResponses.push(response);
+        setPendingFinish(response);
+      }
       return false; // Don't yield yet, wait for potential subsequent chunks to merge
     } else if (hasPendingFinish) {
       // We have a pending finish chunk, merge this chunk's data into it
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
index 024e9a28c..c64ee436d 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@@ -789,7 +789,7 @@ describe('DashScopeOpenAICompatibleProvider', () => {
       expect(result.max_tokens).toBe(1000); // Should remain unchanged
     });
 
-    it('should not add max_tokens when not present in request', () => {
+    it('should set conservative max_tokens default when not present in request', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
         model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
@@ -798,31 +798,35 @@ describe('DashScopeOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      expect(result.max_tokens).toBeUndefined(); // Should remain undefined
+      // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
+      // qwen3-max has 64K output limit, so min(64K, 32K) = 32K
+      expect(result.max_tokens).toBe(32000);
     });
 
-    it('should handle null max_tokens parameter', () => {
+    it('should set conservative max_tokens when null is provided', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
         model: 'qwen3-max',
         messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: null,
+        max_tokens: null as unknown as undefined,
       };
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      expect(result.max_tokens).toBeNull(); // Should remain null
+      // null is treated as not configured, so set conservative default
+      expect(result.max_tokens).toBe(32000);
     });
 
-    it('should use default output limit for unknown models', () => {
+    it('should respect user max_tokens for unknown models', () => {
       const request: OpenAI.Chat.ChatCompletionCreateParams = {
         model: 'unknown-model',
         messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 20000, // Exceeds the default limit
+        max_tokens: 40000, // User explicitly sets 40K
       };
 
       const result = provider.buildRequest(request, 'test-prompt-id');
 
-      expect(result.max_tokens).toBe(16384); // Should be limited to default output limit (16K)
+      // Unknown models: respect user's configuration (backend may support it)
+      expect(result.max_tokens).toBe(40000);
     });
 
     it('should preserve other request parameters when limiting max_tokens', () => {
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
index a889401cf..a94ad0be3 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
@@ -9,27 +9,20 @@ import {
   DEFAULT_DASHSCOPE_BASE_URL,
 } from '../constants.js';
 import type {
-  OpenAICompatibleProvider,
   DashScopeRequestMetadata,
   ChatCompletionContentPartTextWithCache,
   ChatCompletionContentPartWithCache,
   ChatCompletionToolWithCache,
 } from './types.js';
 import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
-import { tokenLimit } from '../../tokenLimits.js';
-
-export class DashScopeOpenAICompatibleProvider
-  implements OpenAICompatibleProvider
-{
-  private contentGeneratorConfig: ContentGeneratorConfig;
-  private cliConfig: Config;
+import { DefaultOpenAICompatibleProvider } from './default.js';
 
+export class DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatibleProvider {
   constructor(
     contentGeneratorConfig: ContentGeneratorConfig,
     cliConfig: Config,
   ) {
-    this.cliConfig = cliConfig;
-    this.contentGeneratorConfig = contentGeneratorConfig;
+    super(contentGeneratorConfig, cliConfig);
   }
 
   static isDashScopeProvider(
@@ -44,7 +37,7 @@ export class DashScopeOpenAICompatibleProvider
     return /([\w-]+\.)?dashscope(-intl)?\.aliyuncs\.com/i.test(baseUrl);
   }
 
-  buildHeaders(): Record<string, string | undefined> {
+  override buildHeaders(): Record<string, string | undefined> {
     const version = this.cliConfig.getCliVersion() || 'unknown';
     const userAgent = `QwenCode/${version} (${process.platform}; ${process.arch})`;
     const { authType, customHeaders } = this.contentGeneratorConfig;
@@ -60,7 +53,7 @@ export class DashScopeOpenAICompatibleProvider
       : defaultHeaders;
   }
 
-  buildClient(): OpenAI {
+  override buildClient(): OpenAI {
     const {
       apiKey,
       baseUrl = DEFAULT_DASHSCOPE_BASE_URL,
@@ -98,7 +91,7 @@ export class DashScopeOpenAICompatibleProvider
    * @param userPromptId - Unique identifier for the user prompt for session tracking
    * @returns Configured request with DashScope-specific parameters applied
    */
-  buildRequest(
+  override buildRequest(
     request: OpenAI.Chat.ChatCompletionCreateParams,
     userPromptId: string,
   ): OpenAI.Chat.ChatCompletionCreateParams {
@@ -116,8 +109,9 @@ export class DashScopeOpenAICompatibleProvider
       tools = updatedTools;
     }
 
-    // Apply output token limits based on model capabilities
-    // This ensures max_tokens doesn't exceed the model's maximum output limit
+    // Apply output token limits using parent class logic
+    // Uses conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
+    // to preserve input quota when user hasn't explicitly configured max_tokens
     const requestWithTokenLimits = this.applyOutputTokenLimit(request);
 
     const extraBody = this.contentGeneratorConfig.extra_body;
@@ -155,7 +149,7 @@ export class DashScopeOpenAICompatibleProvider
     };
   }
 
-  getDefaultGenerationConfig(): GenerateContentConfig {
+  override getDefaultGenerationConfig(): GenerateContentConfig {
     return {
       temperature: 0.3,
     };
@@ -316,41 +310,6 @@ export class DashScopeOpenAICompatibleProvider
     return false;
   }
 
-  /**
-   * Apply output token limit to a request's max_tokens parameter.
-   *
-   * Ensures that existing max_tokens parameters don't exceed the model's maximum output
-   * token limit. Only modifies max_tokens when already present in the request.
-   *
-   * @param request - The chat completion request parameters
-   * @returns The request with max_tokens adjusted to respect the model's limits (if present)
-   */
-  private applyOutputTokenLimit<
-    T extends { max_tokens?: number | null; model: string },
-  >(request: T): T {
-    const currentMaxTokens = request.max_tokens;
-
-    // Only process if max_tokens is already present in the request
-    if (currentMaxTokens === undefined || currentMaxTokens === null) {
-      return request; // No max_tokens parameter, return unchanged
-    }
-
-    // Dynamically calculate output token limit using tokenLimit function
-    // This ensures we always use the latest model-specific limits without relying on user configuration
-    const modelLimit = tokenLimit(request.model, 'output');
-
-    // If max_tokens exceeds the model limit, cap it to the model's limit
-    if (currentMaxTokens > modelLimit) {
-      return {
-        ...request,
-        max_tokens: modelLimit,
-      };
-    }
-
-    // If max_tokens is within the limit, return the request unchanged
-    return request;
-  }
-
   /**
    * Check if cache control should be disabled based on configuration.
    *
diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts
index cc227b464..ce46a3621 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/default.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/default.test.ts
@@ -193,6 +193,76 @@ describe('DefaultOpenAICompatibleProvider', () => {
       expect(result).not.toBe(originalRequest); // Should be a new object
     });
 
+    it('should set conservative max_tokens default when not configured', () => {
+      const requestWithoutMaxTokens: OpenAI.Chat.ChatCompletionCreateParams = {
+        model: 'gpt-4',
+        messages: [{ role: 'user', content: 'Hello' }],
+      };
+
+      const result = provider.buildRequest(
+        requestWithoutMaxTokens,
+        'prompt-id',
+      );
+
+      // Should set conservative default (min of model limit and DEFAULT_OUTPUT_TOKEN_LIMIT)
+      // GPT-4 has 16K output limit, so min(16K, 32K) = 16K
+      expect(result.max_tokens).toBe(16384);
+    });
+
+    it('should respect user max_tokens for unknown models (deployment aliases, self-hosted)', () => {
+      // Unknown models: user config is respected entirely (backend may support larger limits)
+      const request: OpenAI.Chat.ChatCompletionCreateParams = {
+        model: 'unknown-model',
+        messages: [{ role: 'user', content: 'Hello' }],
+        max_tokens: 100000,
+      };
+
+      const result = provider.buildRequest(request, 'prompt-id');
+
+      // User's 100K setting is preserved for unknown models
+      expect(result.max_tokens).toBe(100000);
+    });
+
+    it('should use conservative default for unknown models when max_tokens not configured', () => {
+      // Unknown models without user config: use DEFAULT_OUTPUT_TOKEN_LIMIT
+      const request: OpenAI.Chat.ChatCompletionCreateParams = {
+        model: 'custom-deployment-alias',
+        messages: [{ role: 'user', content: 'Hello' }],
+      };
+
+      const result = provider.buildRequest(request, 'prompt-id');
+
+      // Uses conservative default (32K)
+      expect(result.max_tokens).toBe(32000);
+    });
+
+    it('should cap max_tokens for known models to avoid API errors', () => {
+      // Known models (GPT-4): user config is capped at model limit
+      const request: OpenAI.Chat.ChatCompletionCreateParams = {
+        model: 'gpt-4',
+        messages: [{ role: 'user', content: 'Hello' }],
+        max_tokens: 100000, // Exceeds GPT-4's 16K limit
+      };
+
+      const result = provider.buildRequest(request, 'prompt-id');
+
+      // Capped to GPT-4's output limit (16K)
+      expect(result.max_tokens).toBe(16384);
+    });
+
+    it('should treat null max_tokens as not configured', () => {
+      const request: OpenAI.Chat.ChatCompletionCreateParams = {
+        model: 'gpt-4',
+        messages: [{ role: 'user', content: 'Hello' }],
+        max_tokens: null as unknown as undefined,
+      };
+
+      const result = provider.buildRequest(request, 'prompt-id');
+
+      // GPT-4 has 16K output limit, so conservative default is still 16K
+      expect(result.max_tokens).toBe(16384);
+    });
+
     it('should preserve all sampling parameters', () => {
       const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
         model: 'gpt-3.5-turbo',
@@ -230,7 +300,10 @@ describe('DefaultOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(minimalRequest, 'prompt-id');
 
-      expect(result).toEqual(minimalRequest);
+      // Should set conservative max_tokens default
+      expect(result.model).toBe('gpt-4');
+      expect(result.messages).toEqual(minimalRequest.messages);
+      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
     });
 
     it('should handle streaming requests', () => {
@@ -242,8 +315,11 @@ describe('DefaultOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(streamingRequest, 'prompt-id');
 
-      expect(result).toEqual(streamingRequest);
+      // Should set conservative max_tokens default while preserving stream
+      expect(result.model).toBe('gpt-4');
+      expect(result.messages).toEqual(streamingRequest.messages);
       expect(result.stream).toBe(true);
+      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
     });
 
     it('should not modify the original request object', () => {
@@ -287,6 +363,7 @@ describe('DefaultOpenAICompatibleProvider', () => {
 
       expect(result).toEqual({
         ...originalRequest,
+        max_tokens: 16384, // GPT-4 has 16K limit, min(16K, 32K) = 16K
         custom_param: 'custom_value',
         nested: { key: 'value' },
       });
@@ -301,7 +378,11 @@ describe('DefaultOpenAICompatibleProvider', () => {
 
       const result = provider.buildRequest(originalRequest, 'prompt-id');
 
-      expect(result).toEqual(originalRequest);
+      // Should preserve original params and set conservative max_tokens default
+      expect(result.model).toBe('gpt-4');
+      expect(result.messages).toEqual(originalRequest.messages);
+      expect(result.temperature).toBe(0.7);
+      expect(result.max_tokens).toBe(16384); // GPT-4 has 16K limit, min(16K, 32K) = 16K
       expect(result).not.toHaveProperty('custom_param');
     });
   });
diff --git a/packages/core/src/core/openaiContentGenerator/provider/default.ts b/packages/core/src/core/openaiContentGenerator/provider/default.ts
index 783c962d1..ec7f6946a 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/default.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/default.ts
@@ -5,6 +5,11 @@ import type { ContentGeneratorConfig } from '../../contentGenerator.js';
 import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
 import type { OpenAICompatibleProvider } from './types.js';
 import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
+import {
+  tokenLimit,
+  DEFAULT_OUTPUT_TOKEN_LIMIT,
+  hasExplicitOutputLimit,
+} from '../../tokenLimits.js';
 
 /**
  * Default provider for standard OpenAI-compatible APIs
@@ -65,9 +70,13 @@ export class DefaultOpenAICompatibleProvider
     _userPromptId: string,
   ): OpenAI.Chat.ChatCompletionCreateParams {
     const extraBody = this.contentGeneratorConfig.extra_body;
-    // Default provider doesn't need special enhancements, just pass through all parameters
+
+    // Apply output token limits to ensure max_tokens is set appropriately
+    // This prevents occupying too much context window with output reservation
+    const requestWithTokenLimits = this.applyOutputTokenLimit(request);
+
     return {
-      ...request, // Preserve all original parameters including sampling params
+      ...requestWithTokenLimits,
       ...(extraBody ? extraBody : {}),
     };
   }
@@ -75,4 +84,70 @@ export class DefaultOpenAICompatibleProvider
   getDefaultGenerationConfig(): GenerateContentConfig {
     return {};
   }
+
+  /**
+   * Apply output token limit to a request's max_tokens parameter.
+   *
+   * Purpose:
+   * Some APIs (e.g., OpenAI-compatible) default to a very small max_tokens value,
+   * which can cause responses to be truncated mid-output. This function ensures
+   * a reasonable default is set while respecting user configuration.
+   *
+   * Logic:
+   * 1. If user explicitly configured max_tokens:
+   *    - For known models (in OUTPUT_PATTERNS): use the user's value, but cap at
+   *      model's max output limit to avoid API errors
+   *      (input + max_output > contextWindowSize would cause 400 errors on some APIs)
+   *    - For unknown models (deployment aliases, self-hosted): respect user's
+   *      configured value entirely (backend may support larger limits)
+   * 2. If user didn't configure max_tokens:
+   *    - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT)
+   *    - This provides a conservative default (32K) that avoids truncating output
+   *      while preserving input quota (not occupying too much context window)
+   * 3. If model has no specific limit (tokenLimit returns default):
+   *    - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard
+   *
+   * Examples:
+   * - User sets 4K, known model limit 64K → uses 4K (respects user preference)
+   * - User sets 100K, known model limit 64K → uses 64K (capped to avoid API error)
+   * - User sets 100K, unknown model → uses 100K (respects user, backend may support it)
+   * - User not set, model limit 64K → uses 32K (conservative default)
+   * - User not set, model limit 8K → uses 8K (model limit is lower)
+   *
+   * @param request - The chat completion request parameters
+   * @returns The request with max_tokens adjusted according to the logic
+   */
+  protected applyOutputTokenLimit<
+    T extends { max_tokens?: number | null; model: string },
+  >(request: T): T {
+    const userMaxTokens = request.max_tokens;
+
+    // Get model-specific output limit and check if model is known
+    const modelLimit = tokenLimit(request.model, 'output');
+    const isKnownModel = hasExplicitOutputLimit(request.model);
+
+    // Determine the effective max_tokens
+    let effectiveMaxTokens: number;
+
+    if (userMaxTokens !== undefined && userMaxTokens !== null) {
+      // User explicitly configured max_tokens
+      if (isKnownModel) {
+        // Known model: respect user config but cap at model limit to avoid API errors
+        effectiveMaxTokens = Math.min(userMaxTokens, modelLimit);
+      } else {
+        // Unknown model (deployment aliases, self-hosted): respect user's value
+        // The backend may support larger limits than our default
+        effectiveMaxTokens = userMaxTokens;
+      }
+    } else {
+      // User didn't configure, use conservative default:
+      // min(model-specific limit, DEFAULT_OUTPUT_TOKEN_LIMIT)
+      effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
+    }
+
+    return {
+      ...request,
+      max_tokens: effectiveMaxTokens,
+    };
+  }
 }
diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts
index 176efeb60..b0947e98f 100644
--- a/packages/core/src/core/prompts.test.ts
+++ b/packages/core/src/core/prompts.test.ts
@@ -80,6 +80,35 @@ describe('Core System Prompt (prompts.ts)', () => {
     expect(prompt).toMatchSnapshot(); // Snapshot the combined prompt
   });
 
+  it('should append extra system prompt instructions after user memory when provided', () => {
+    vi.stubEnv('SANDBOX', undefined);
+    const memory = 'Remember the project conventions.';
+    const appendInstruction = 'Always answer in exactly one sentence.';
+    const prompt = getCoreSystemPrompt(memory, undefined, appendInstruction);
+
+    expect(prompt).toContain(`\n\n---\n\n${memory}`);
+    expect(prompt).toContain(`\n\n---\n\n${appendInstruction}`);
+    expect(prompt.indexOf(memory)).toBeLessThan(
+      prompt.indexOf(appendInstruction),
+    );
+  });
+
+  it('should append extra instructions after a custom system prompt and user memory', () => {
+    const customInstruction = 'You are a release manager.';
+    const userMemory = 'The repo uses pnpm.';
+    const appendInstruction = 'Only report blocking issues.';
+
+    const result = getCustomSystemPrompt(
+      customInstruction,
+      userMemory,
+      appendInstruction,
+    );
+
+    expect(result).toBe(
+      [customInstruction, userMemory, appendInstruction].join('\n\n---\n\n'),
+    );
+  });
+
   it('should include sandbox-specific instructions when SANDBOX env var is set', () => {
     vi.stubEnv('SANDBOX', 'true'); // Generic sandbox value
     const prompt = getCoreSystemPrompt();
diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts
index 21d21c2c5..b2799b79b 100644
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -72,11 +72,13 @@ export function resolvePathFromEnv(envVar?: string): {
  *
  * @param customInstruction - Custom system instruction (ContentUnion from @google/genai)
  * @param userMemory - User memory to append
- * @returns Processed custom system instruction with user memory appended
+ * @param appendInstruction - Extra instructions to append after user memory
+ * @returns Processed custom system instruction with user memory and extra append instructions applied
  */
 export function getCustomSystemPrompt(
   customInstruction: GenerateContentConfig['systemInstruction'],
   userMemory?: string,
+  appendInstruction?: string,
 ): string {
   // Extract text from custom instruction
   let instructionText = '';
@@ -100,17 +102,20 @@ export function getCustomSystemPrompt(
   }
 
   // Append user memory using the same pattern as getCoreSystemPrompt
-  const memorySuffix =
-    userMemory && userMemory.trim().length > 0
-      ? `\n\n---\n\n${userMemory.trim()}`
-      : '';
+  const memorySuffix = buildSystemPromptSuffix(userMemory);
 
-  return `${instructionText}${memorySuffix}`;
+  return `${instructionText}${memorySuffix}${buildSystemPromptSuffix(appendInstruction)}`;
+}
+
+function buildSystemPromptSuffix(text?: string): string {
+  const trimmed = text?.trim();
+  return trimmed ? `\n\n---\n\n${trimmed}` : '';
 }
 
 export function getCoreSystemPrompt(
   userMemory?: string,
   model?: string,
+  appendInstruction?: string,
 ): string {
   // if QWEN_SYSTEM_MD is set (and not 0|false), override system prompt from file
   // default path is .qwen/system.md but can be modified via custom path in QWEN_SYSTEM_MD
@@ -338,10 +343,11 @@ Your core function is efficient and safe assistance. Balance extreme conciseness
 
   const memorySuffix =
     userMemory && userMemory.trim().length > 0
-      ? `\n\n---\n\n${userMemory.trim()}`
+      ? buildSystemPromptSuffix(userMemory)
       : '';
+  const appendSuffix = buildSystemPromptSuffix(appendInstruction);
 
-  return `${basePrompt}${memorySuffix}`;
+  return `${basePrompt}${memorySuffix}${appendSuffix}`;
 }
 
 /**
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index b566a01dc..2e923ab73 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -9,7 +9,7 @@ type TokenCount = number;
 export type TokenLimitType = 'input' | 'output';
 
 export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
-export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 16_384; // 16K tokens
+export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 32_000; // 32K tokens
 
 /**
  * Accurate numeric limits:
@@ -184,6 +184,19 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
   [/^kimi-k2\.5/, LIMITS['32k']],
 ];
 
+/**
+ * Check if a model has an explicitly defined output token limit.
+ * This distinguishes between models with known limits in OUTPUT_PATTERNS
+ * and unknown models that would fallback to DEFAULT_OUTPUT_TOKEN_LIMIT.
+ *
+ * @param model - The model name to check
+ * @returns true if the model has an explicit output limit definition, false if it uses the default fallback
+ */
+export function hasExplicitOutputLimit(model: Model): boolean {
+  const norm = normalize(model);
+  return OUTPUT_PATTERNS.some(([regex]) => regex.test(norm));
+}
+
 /**
  * Return the token limit for a model string based on the specified type.
  *
diff --git a/packages/core/src/extension/claude-converter.test.ts b/packages/core/src/extension/claude-converter.test.ts
index 502e8196e..c984b17bc 100644
--- a/packages/core/src/extension/claude-converter.test.ts
+++ b/packages/core/src/extension/claude-converter.test.ts
@@ -17,6 +17,7 @@ import {
   type ClaudeMarketplacePluginConfig,
   type ClaudeMarketplaceConfig,
 } from './claude-converter.js';
+import { HookType } from '../hooks/types.js';
 
 describe('convertClaudeToQwenConfig', () => {
   it('should convert basic Claude config', () => {
@@ -433,4 +434,140 @@ describe('convertClaudePluginPackage', () => {
     // Clean up
     fs.rmSync(result.convertedDir, { recursive: true, force: true });
   });
+
+  it('should convert hooks from Claude plugin format to Qwen format with variable substitution', async () => {
+    // Setup: Create a plugin with hooks in Claude format
+    const pluginSourceDir = path.join(testDir, 'plugin-with-hooks');
+    fs.mkdirSync(pluginSourceDir, { recursive: true });
+
+    // Create hooks directory with hooks.json in Claude format
+    const hooksDir = path.join(pluginSourceDir, 'hooks');
+    fs.mkdirSync(hooksDir, { recursive: true });
+
+    const hooksJson = {
+      hooks: {
+        PostToolUse: [
+          {
+            matcher: 'post-install-matcher', // Part of HookDefinition
+            sequential: true, // Part of HookDefinition
+            description: 'Run after installation',
+            hooks: [
+              // HookConfig[] array inside HookDefinition
+              {
+                type: HookType.Command,
+                command: '${CLAUDE_PLUGIN_ROOT}/scripts/post-install.sh',
+              },
+            ],
+          },
+        ],
+      },
+    };
+
+    fs.writeFileSync(
+      path.join(hooksDir, 'hooks.json'),
+      JSON.stringify(hooksJson),
+      'utf-8',
+    );
+
+    // Create marketplace.json
+    const marketplaceDir = path.join(pluginSourceDir, '.claude-plugin');
+    fs.mkdirSync(marketplaceDir, { recursive: true });
+
+    const marketplaceConfig: ClaudeMarketplaceConfig = {
+      name: 'test-marketplace',
+      owner: { name: 'Test Owner', email: 'test@example.com' },
+      plugins: [
+        {
+          name: 'hooks-plugin',
+          version: '1.0.0',
+          source: './',
+          strict: false,
+          hooks: './hooks/hooks.json', // Reference hooks from file
+        },
+      ],
+    };
+
+    fs.writeFileSync(
+      path.join(marketplaceDir, 'marketplace.json'),
+      JSON.stringify(marketplaceConfig, null, 2),
+      'utf-8',
+    );
+
+    // Execute: Convert the plugin
+    const result = await convertClaudePluginPackage(
+      pluginSourceDir,
+      'hooks-plugin',
+    );
+
+    // Verify: The converted config should contain processed hooks
+    expect(result.config.hooks).toBeDefined();
+    expect(result.config.hooks!['PostToolUse']).toHaveLength(1);
+    // Check that the variable was substituted
+    expect(result.config.hooks!['PostToolUse']![0].hooks![0].command).toBe(
+      `${pluginSourceDir}/scripts/post-install.sh`,
+    );
+
+    // Clean up converted directory
+    fs.rmSync(result.convertedDir, { recursive: true, force: true });
+  });
+
+  it('should handle hooks defined directly in marketplace config', async () => {
+    // Setup: Create a plugin with hooks defined directly in marketplace config
+    const pluginSourceDir = path.join(testDir, 'direct-hooks-plugin');
+    fs.mkdirSync(pluginSourceDir, { recursive: true });
+
+    // Create marketplace.json with hooks defined directly
+    const marketplaceDir = path.join(pluginSourceDir, '.claude-plugin');
+    fs.mkdirSync(marketplaceDir, { recursive: true });
+
+    const marketplaceConfig: ClaudeMarketplaceConfig = {
+      name: 'test-marketplace',
+      owner: { name: 'Test Owner', email: 'test@example.com' },
+      plugins: [
+        {
+          name: 'direct-hooks-plugin',
+          version: '1.0.0',
+          source: './',
+          strict: false,
+          hooks: {
+            PreToolUse: [
+              {
+                matcher: '*', // Part of HookDefinition
+                sequential: true, // Part of HookDefinition
+                hooks: [
+                  // HookConfig[] array inside HookDefinition
+                  {
+                    type: HookType.Command,
+                    command: 'npm install',
+                  },
+                ],
+              },
+            ],
+          },
+        },
+      ],
+    };
+
+    fs.writeFileSync(
+      path.join(marketplaceDir, 'marketplace.json'),
+      JSON.stringify(marketplaceConfig, null, 2),
+      'utf-8',
+    );
+
+    // Execute: Convert the plugin
+    const result = await convertClaudePluginPackage(
+      pluginSourceDir,
+      'direct-hooks-plugin',
+    );
+
+    // Verify: The converted config should contain the hooks
+    expect(result.config.hooks).toBeDefined();
+    expect(result.config.hooks!['PreToolUse']).toHaveLength(1);
+    expect(result.config.hooks!['PreToolUse']![0].hooks![0].command).toBe(
+      'npm install',
+    );
+
+    // Clean up converted directory
+    fs.rmSync(result.convertedDir, { recursive: true, force: true });
+  });
 });
diff --git a/packages/core/src/extension/claude-converter.ts b/packages/core/src/extension/claude-converter.ts
index 6c333c9aa..ff5ba72a9 100644
--- a/packages/core/src/extension/claude-converter.ts
+++ b/packages/core/src/extension/claude-converter.ts
@@ -16,6 +16,7 @@ import type {
   ExtensionInstallMetadata,
   MCPServerConfig,
 } from '../config/config.js';
+import type { HookEventName, HookDefinition } from '../hooks/types.js';
 import { cloneFromGit, downloadFromGitHubRelease } from './github.js';
 import { createHash } from 'node:crypto';
 import { copyDirectory } from './gemini-converter.js';
@@ -25,9 +26,121 @@ import {
 } from '../utils/yaml-parser.js';
 import { createDebugLogger } from '../utils/debugLogger.js';
 import { normalizeContent } from '../utils/textUtils.js';
+import { substituteHookVariables } from './variables.js';
 
 const debugLogger = createDebugLogger('CLAUDE_CONVERTER');
 
+/**
+ * Perform variable replacement in all markdown and shell script files of the extension.
+ * This is done during the conversion phase to avoid modifying files during every extension load.
+ * @param extensionPath - The path to the extension directory
+ */
+export function performVariableReplacement(extensionPath: string): void {
+  // Process markdown files
+  const mdGlobPattern = '**/*.md';
+  const mdGlobOptions = {
+    cwd: extensionPath,
+    nodir: true,
+  };
+
+  try {
+    const mdFiles = glob.sync(mdGlobPattern, mdGlobOptions);
+
+    for (const file of mdFiles) {
+      const filePath = path.join(extensionPath, file);
+
+      try {
+        const content = fs.readFileSync(filePath, 'utf8');
+
+        // Replace ${CLAUDE_PLUGIN_ROOT} with the actual extension path
+        const updatedContent = content.replace(
+          /\$\{CLAUDE_PLUGIN_ROOT\}/g,
+          extensionPath,
+        );
+
+        // Replace Markdown shell syntax ```! ... ``` with system-recognized !{...} syntax
+        // This regex finds code blocks with ! language identifier and captures their content
+        const updatedMdContent = updatedContent.replace(
+          /```!(?:\s*\n)?([\s\S]*?)\n*```/g,
+          '!{$1}',
+        );
+
+        // Only write if content was actually changed
+        if (updatedMdContent !== content) {
+          fs.writeFileSync(filePath, updatedMdContent, 'utf8');
+          debugLogger.debug(
+            `Updated variables and syntax in file: ${filePath}`,
+          );
+        }
+      } catch (error) {
+        debugLogger.warn(
+          `Failed to process file ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
+        );
+      }
+    }
+  } catch (error) {
+    debugLogger.warn(
+      `Failed to scan markdown files in extension directory ${extensionPath}: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+
+  // Process shell script files
+  const scriptGlobPattern = '**/*.sh';
+  const scriptGlobOptions = {
+    cwd: extensionPath,
+    nodir: true,
+  };
+
+  try {
+    const scriptFiles = glob.sync(scriptGlobPattern, scriptGlobOptions);
+
+    for (const file of scriptFiles) {
+      const filePath = path.join(extensionPath, file);
+
+      try {
+        const content = fs.readFileSync(filePath, 'utf8');
+
+        // Replace references to "role":"assistant" with "type":"assistant" in shell scripts
+        const updatedScriptContent = content.replace(
+          /"role":"assistant"/g,
+          '"type":"assistant"',
+        );
+
+        // Replace transcript parsing logic to adapt to actual transcript structure
+        // Change from .message.content | map(select(.type == "text")) to .message.parts | map(select(has("text")))
+        const adaptedScriptContent = updatedScriptContent.replace(
+          /\.message\.content\s*\|\s*map\(select\(\.type\s*==\s*"text"\)\)/g,
+          '.message.parts | map(select(has("text")))',
+        );
+
+        // Replace references to ".claude" directory with ".qwen" in shell scripts
+        // Only match path references (e.g., ~/.claude/, $HOME/.claude, ./.claude/)
+        // Avoid matching URLs, comments, or string literals containing .claude
+        const finalScriptContent = adaptedScriptContent.replace(
+          /(\$\{?HOME\}?\/|~\/)?\.claude(\/|$)/g,
+          '$1.qwen$2',
+        );
+
+        // Only write if content was actually changed
+        if (finalScriptContent !== content) {
+          fs.writeFileSync(filePath, finalScriptContent, 'utf8');
+          debugLogger.debug(
+            `Updated transcript format and replaced .claude with .qwen in shell script: ${filePath}`,
+          );
+        }
+      } catch (error) {
+        debugLogger.warn(
+          `Failed to process shell script file ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
+        );
+      }
+    }
+  } catch (error) {
+    debugLogger.warn(
+      `Failed to scan shell script files in extension directory ${extensionPath}: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+}
+
 export interface ClaudePluginConfig {
   name: string;
   version: string;
@@ -40,7 +153,7 @@ export interface ClaudePluginConfig {
   commands?: string | string[];
   agents?: string | string[];
   skills?: string | string[];
-  hooks?: string;
+  hooks?: string | { [K in HookEventName]?: HookDefinition[] };
   mcpServers?: string | Record<string, MCPServerConfig>;
   outputStyles?: string | string[];
   lspServers?: string | Record<string, unknown>;
@@ -312,12 +425,21 @@ export function convertClaudeToQwenConfig(
     }
   }
 
-  // Warn about unsupported fields
+  // Parse hooks
+  let hooks: { [K in HookEventName]?: HookDefinition[] } | undefined;
   if (claudeConfig.hooks) {
-    debugLogger.warn(
-      `[Claude Converter] Hooks are not yet supported in ${claudeConfig.name}`,
-    );
+    if (typeof claudeConfig.hooks === 'string') {
+      // If it's a string, it's a file path, we handle it later in the conversion process
+      // hooks will be loaded from file path in the convertClaudePluginPackage function
+    } else {
+      // Assume it's already in the correct format
+      hooks = claudeConfig.hooks as { [K in HookEventName]?: HookDefinition[] };
+    }
+  } else {
+    hooks = undefined;
   }
+
+  // Warn about unsupported fields
   if (claudeConfig.outputStyles) {
     debugLogger.warn(
       `[Claude Converter] Output styles are not yet supported in ${claudeConfig.name}`,
@@ -329,6 +451,7 @@ export function convertClaudeToQwenConfig(
     version: claudeConfig.version,
     mcpServers,
     lspServers: claudeConfig.lspServers,
+    hooks, // Assign the properly typed hooks variable
   };
 }
 
@@ -461,10 +584,49 @@ export async function convertClaudePluginPackage(
       // Otherwise, keep the existing folder from pluginSource (default behavior)
     }
 
+    // Step 7: Handle hooks from file paths if needed
+    if (mergedConfig.hooks && typeof mergedConfig.hooks === 'string') {
+      const hooksPath = path.isAbsolute(mergedConfig.hooks)
+        ? mergedConfig.hooks
+        : path.join(pluginSource, mergedConfig.hooks);
+
+      if (fs.existsSync(hooksPath)) {
+        try {
+          const hooksContent = fs.readFileSync(hooksPath, 'utf-8');
+          const parsedHooks = JSON.parse(hooksContent);
+
+          // Check if the file has a top-level "hooks" property (like Claude plugins use)
+          // or if the entire file content is the hooks object
+          let hooksData;
+          if (parsedHooks.hooks && typeof parsedHooks.hooks === 'object') {
+            hooksData = parsedHooks.hooks as {
+              [K in HookEventName]?: HookDefinition[];
+            };
+          } else {
+            // Assume the entire file content is the hooks object
+            hooksData = parsedHooks as {
+              [K in HookEventName]?: HookDefinition[];
+            };
+          }
+
+          // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT}
+          mergedConfig.hooks = substituteHookVariables(hooksData, pluginSource);
+        } catch (error) {
+          debugLogger.warn(
+            `Failed to parse hooks file ${hooksPath}: ${error instanceof Error ? error.message : String(error)}`,
+          );
+        }
+      }
+    }
+
     // Step 9.1: Convert collected agent files from Claude format to Qwen format
     const agentsDestDir = path.join(tmpDir, 'agents');
     await convertAgentFiles(agentsDestDir);
 
+    // Step 9.2: Perform variable replacement in markdown and shell script files
+    // This is done during conversion to avoid modifying files during every extension load
+    performVariableReplacement(tmpDir);
+
     // Step 10: Convert to Qwen format config
     const qwenConfig = convertClaudeToQwenConfig(mergedConfig);
 
diff --git a/packages/core/src/extension/extensionManager.test.ts b/packages/core/src/extension/extensionManager.test.ts
index be94f9056..8ef27da30 100644
--- a/packages/core/src/extension/extensionManager.test.ts
+++ b/packages/core/src/extension/extensionManager.test.ts
@@ -757,4 +757,139 @@ describe('extension tests', () => {
       });
     });
   });
+
+  describe('hooks loading and processing', () => {
+    it('should load hooks from qwen-extension.json', async () => {
+      const extensionDir = path.join(userExtensionsDir, 'hooks-extension');
+      fs.mkdirSync(extensionDir, { recursive: true });
+
+      // Create qwen-extension.json with hooks
+      const configWithHooks = {
+        name: 'hooks-extension',
+        version: '1.0.0',
+        hooks: {
+          PreToolUse: [
+            {
+              description: 'Run before tool start',
+              hooks: [
+                {
+                  type: 'command',
+                  command: 'echo "hello"',
+                },
+              ],
+            },
+          ],
+        },
+      };
+
+      fs.writeFileSync(
+        path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME),
+        JSON.stringify(configWithHooks),
+      );
+
+      const manager = createExtensionManager();
+      await manager.refreshCache();
+      const extensions = manager.getLoadedExtensions();
+
+      expect(extensions).toHaveLength(1);
+      expect(extensions[0].hooks).toBeDefined();
+      expect(extensions[0].hooks!['PreToolUse']).toHaveLength(1);
+      expect(extensions[0].hooks!['PreToolUse']![0].hooks![0].command).toBe(
+        'echo "hello"',
+      );
+    });
+
+    it('should load hooks from hooks/hooks.json when not in main config', async () => {
+      const extensionDir = path.join(
+        userExtensionsDir,
+        'hooks-from-file-extension',
+      );
+      fs.mkdirSync(extensionDir, { recursive: true });
+
+      // Create qwen-extension.json without hooks
+      const configWithoutHooks = {
+        name: 'hooks-from-file-extension',
+        version: '1.0.0',
+      };
+
+      fs.writeFileSync(
+        path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME),
+        JSON.stringify(configWithoutHooks),
+      );
+
+      // Create hooks directory and hooks.json
+      const hooksDir = path.join(extensionDir, 'hooks');
+      fs.mkdirSync(hooksDir, { recursive: true });
+
+      const hooksJson = {
+        PostToolUse: [
+          {
+            description: 'Run after install',
+            hooks: [
+              {
+                type: 'command',
+                command: `echo "installed in ${extensionDir}"`,
+              },
+            ],
+          },
+        ],
+      };
+
+      fs.writeFileSync(
+        path.join(hooksDir, 'hooks.json'),
+        JSON.stringify(hooksJson),
+      );
+
+      const manager = createExtensionManager();
+      await manager.refreshCache();
+      const extensions = manager.getLoadedExtensions();
+
+      expect(extensions).toHaveLength(1);
+      expect(extensions[0].hooks).toBeDefined();
+      expect(extensions[0].hooks!['PostToolUse']).toHaveLength(1);
+      expect(extensions[0].hooks!['PostToolUse']![0].hooks![0].command).toBe(
+        `echo "installed in ${extensionDir}"`,
+      );
+    });
+
+    it('should substitute ${CLAUDE_PLUGIN_ROOT} variable in hooks', async () => {
+      const extensionDir = path.join(userExtensionsDir, 'hooks-var-extension');
+      fs.mkdirSync(extensionDir, { recursive: true });
+
+      // Create qwen-extension.json with hooks using ${CLAUDE_PLUGIN_ROOT}
+      const configWithHooks = {
+        name: 'hooks-var-extension',
+        version: '1.0.0',
+        hooks: {
+          PreToolUse: [
+            {
+              description: 'Run before start with var',
+              hooks: [
+                {
+                  type: 'command',
+                  command: '${CLAUDE_PLUGIN_ROOT}/scripts/setup.sh',
+                },
+              ],
+            },
+          ],
+        },
+      };
+
+      fs.writeFileSync(
+        path.join(extensionDir, EXTENSIONS_CONFIG_FILENAME),
+        JSON.stringify(configWithHooks),
+      );
+
+      const manager = createExtensionManager();
+      await manager.refreshCache();
+      const extensions = manager.getLoadedExtensions();
+
+      expect(extensions).toHaveLength(1);
+      expect(extensions[0].hooks).toBeDefined();
+      expect(extensions[0].hooks!['PreToolUse']).toHaveLength(1);
+      expect(extensions[0].hooks!['PreToolUse']![0].hooks![0].command).toBe(
+        `${extensionDir}/scripts/setup.sh`,
+      );
+    });
+  });
 });
diff --git a/packages/core/src/extension/extensionManager.ts b/packages/core/src/extension/extensionManager.ts
index 3af573ac7..d0382347e 100644
--- a/packages/core/src/extension/extensionManager.ts
+++ b/packages/core/src/extension/extensionManager.ts
@@ -11,6 +11,7 @@ import type {
   SubagentConfig,
   ClaudeMarketplaceConfig,
 } from '../index.js';
+import type { HookEventName, HookDefinition } from '../hooks/types.js';
 import {
   Storage,
   Config,
@@ -28,6 +29,7 @@ import {
   EXTENSIONS_CONFIG_FILENAME,
   INSTALL_METADATA_FILENAME,
   recursivelyHydrateStrings,
+  substituteHookVariables,
 } from './variables.js';
 import { resolveEnvVarsInObject } from '../utils/envVarResolver.js';
 import {
@@ -100,6 +102,7 @@ export interface Extension {
   commands?: string[];
   skills?: SkillConfig[];
   agents?: SubagentConfig[];
+  hooks?: { [K in HookEventName]?: HookDefinition[] };
 }
 
 export interface ExtensionConfig {
@@ -112,6 +115,7 @@ export interface ExtensionConfig {
   skills?: string | string[];
   agents?: string | string[];
   settings?: ExtensionSetting[];
+  hooks?: { [K in HookEventName]?: HookDefinition[] };
 }
 
 export interface ExtensionUpdateInfo {
@@ -662,6 +666,50 @@ export class ExtensionManager {
         `${effectiveExtensionPath}/agents`,
       );
 
+      if (config.hooks) {
+        // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT}
+        extension.hooks = this.substituteHookVariables(
+          config.hooks,
+          effectiveExtensionPath,
+        );
+      }
+
+      // Also load hooks from hooks directory if available and not already set
+      if (!extension.hooks) {
+        const hooksDir = path.join(effectiveExtensionPath, 'hooks');
+        const hooksJsonPath = path.join(hooksDir, 'hooks.json');
+
+        if (fs.existsSync(hooksJsonPath)) {
+          try {
+            const hooksContent = fs.readFileSync(hooksJsonPath, 'utf-8');
+            const parsedHooks = JSON.parse(hooksContent);
+
+            // Check if the file has a top-level "hooks" property or if the entire file content is the hooks object
+            let hooksData;
+            if (parsedHooks.hooks && typeof parsedHooks.hooks === 'object') {
+              hooksData = parsedHooks.hooks as {
+                [K in HookEventName]?: HookDefinition[];
+              };
+            } else {
+              // Assume the entire file content is the hooks object
+              hooksData = parsedHooks as {
+                [K in HookEventName]?: HookDefinition[];
+              };
+            }
+
+            // Process the hooks to substitute variables like ${CLAUDE_PLUGIN_ROOT}
+            extension.hooks = this.substituteHookVariables(
+              hooksData,
+              effectiveExtensionPath,
+            );
+          } catch (error) {
+            debugLogger.warn(
+              `Failed to parse hooks file ${hooksJsonPath}: ${error instanceof Error ? error.message : String(error)}`,
+            );
+          }
+        }
+      }
+
       return extension;
     } catch (e) {
       debugLogger.warn(
@@ -673,6 +721,16 @@ export class ExtensionManager {
     }
   }
 
+  /**
+   * Substitute variables in hook configurations, particularly ${CLAUDE_PLUGIN_ROOT}
+   */
+  private substituteHookVariables(
+    hooks: { [K in HookEventName]?: HookDefinition[] } | undefined,
+    extensionPath: string,
+  ): { [K in HookEventName]?: HookDefinition[] } | undefined {
+    return substituteHookVariables(hooks, extensionPath);
+  }
+
   loadInstallMetadata(
     extensionDir: string,
   ): ExtensionInstallMetadata | undefined {
diff --git a/packages/core/src/extension/variables.test.ts b/packages/core/src/extension/variables.test.ts
index d2015f4f9..e8a1db714 100644
--- a/packages/core/src/extension/variables.test.ts
+++ b/packages/core/src/extension/variables.test.ts
@@ -5,7 +5,8 @@
  */
 
 import { expect, describe, it } from 'vitest';
-import { hydrateString } from './variables.js';
+import { hydrateString, substituteHookVariables } from './variables.js';
+import { HookType } from '../hooks/types.js';
 
 describe('hydrateString', () => {
   it('should replace a single variable', () => {
@@ -16,3 +17,180 @@ describe('hydrateString', () => {
     expect(result).toBe('Hello, path/my-extension!');
   });
 });
+
+describe('substituteHookVariables', () => {
+  it('should substitute ${CLAUDE_PLUGIN_ROOT} with the actual path in hooks', () => {
+    const basePath = '/path/to/plugin';
+
+    const hooks = {
+      PreToolUse: [
+        {
+          description: 'Setup before start',
+          hooks: [
+            {
+              type: HookType.Command,
+              command: '${CLAUDE_PLUGIN_ROOT}/scripts/setup.sh',
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = substituteHookVariables(hooks, basePath);
+
+    expect(result).toBeDefined();
+    expect(result!['PreToolUse']).toHaveLength(1);
+    expect(result!['PreToolUse']![0].hooks![0].command).toBe(
+      '/path/to/plugin/scripts/setup.sh',
+    );
+  });
+
+  it('should handle multiple hooks with variables', () => {
+    const basePath = '/project/plugins/my-plugin';
+
+    const hooks = {
+      PostToolUse: [
+        {
+          description: 'Post install hook 1',
+          hooks: [
+            {
+              type: HookType.Command,
+              command: '${CLAUDE_PLUGIN_ROOT}/bin/init.sh',
+            },
+          ],
+        },
+        {
+          description: 'Post install hook 2',
+          hooks: [
+            {
+              type: HookType.Command,
+              command: 'chmod +x ${CLAUDE_PLUGIN_ROOT}/bin/executable.sh',
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = substituteHookVariables(hooks, basePath);
+
+    expect(result).toBeDefined();
+    expect(result!['PostToolUse']).toHaveLength(2);
+    expect(result!['PostToolUse']![0].hooks![0].command).toBe(
+      '/project/plugins/my-plugin/bin/init.sh',
+    );
+    expect(result!['PostToolUse']![1].hooks![0].command).toBe(
+      'chmod +x /project/plugins/my-plugin/bin/executable.sh',
+    );
+  });
+
+  it('should handle multiple event types with hooks', () => {
+    const basePath = '/home/user/.qwen/extensions/my-extension';
+
+    const hooks = {
+      PreToolUse: [
+        {
+          matcher: 'test-matcher', // Part of HookDefinition
+          sequential: true, // Part of HookDefinition
+          hooks: [
+            // HookConfig[] array inside HookDefinition
+            {
+              type: HookType.Command, // HookType.Command
+              command: '${CLAUDE_PLUGIN_ROOT}/scripts/pre-start.sh',
+            },
+          ],
+        },
+      ],
+      UserPromptSubmit: [
+        {
+          matcher: 'another-matcher', // Part of HookDefinition
+          sequential: false, // Part of HookDefinition
+          hooks: [
+            // HookConfig[] array inside HookDefinition
+            {
+              type: HookType.Command, // HookType.Command
+              command: '${CLAUDE_PLUGIN_ROOT}/setup/install.py',
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = substituteHookVariables(hooks, basePath);
+
+    expect(result).toBeDefined();
+    expect(result!['PreToolUse']).toHaveLength(1);
+    expect(result!['PreToolUse']![0].hooks![0].command).toBe(
+      '/home/user/.qwen/extensions/my-extension/scripts/pre-start.sh',
+    );
+    expect(result!['UserPromptSubmit']).toHaveLength(1);
+    expect(result!['UserPromptSubmit']![0].hooks![0].command).toBe(
+      '/home/user/.qwen/extensions/my-extension/setup/install.py',
+    );
+  });
+
+  it('should not modify non-command hooks', () => {
+    const basePath = '/path/to/extension';
+
+    const hooks = {
+      SessionStart: [
+        {
+          matcher: 'test-matcher', // This is part of HookDefinition
+          sequential: true, // This is part of HookDefinition
+          hooks: [
+            // This is the HookConfig[] array inside HookDefinition
+            {
+              type: HookType.Command, // This is part of HookConfig
+              command: '${CLAUDE_PLUGIN_ROOT}/scripts/run.sh', // This is part of HookConfig
+            },
+            {
+              type: 'non-command' as HookType.Command, // Non-command type won't be processed
+              command: '${CLAUDE_PLUGIN_ROOT}/not-affected', // Should not be modified
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = substituteHookVariables(hooks, basePath);
+
+    expect(result).toBeDefined();
+    expect(result!['SessionStart']).toHaveLength(1);
+    expect(result!['SessionStart']![0].hooks![0].command).toBe(
+      '/path/to/extension/scripts/run.sh',
+    );
+    expect(result!['SessionStart']![0].hooks![1].command).toBe(
+      '${CLAUDE_PLUGIN_ROOT}/not-affected',
+    ); // Non-command type won't be processed
+  });
+
+  it('should return undefined when hooks is undefined', () => {
+    const result = substituteHookVariables(undefined, '/some/path');
+    expect(result).toBeUndefined();
+  });
+
+  it('should return original hooks when no ${CLAUDE_PLUGIN_ROOT} found', () => {
+    const basePath = '/path/to/plugin';
+
+    const hooks = {
+      Stop: [
+        {
+          matcher: 'test-matcher', // This is part of HookDefinition
+          sequential: true, // This is part of HookDefinition
+          hooks: [
+            // This is the HookConfig[] array inside HookDefinition
+            {
+              type: HookType.Command, // This is part of CommandHookConfig
+              command: 'echo "hello world"', // This is part of CommandHookConfig
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = substituteHookVariables(hooks, basePath);
+
+    expect(result).toBeDefined();
+    expect(result).toEqual(hooks); // Should be equal but not the same object (deep clone)
+    expect(result!['Stop']![0].hooks![0].command).toBe('echo "hello world"');
+  });
+});
diff --git a/packages/core/src/extension/variables.ts b/packages/core/src/extension/variables.ts
index ccac1c65f..7bdc60d13 100644
--- a/packages/core/src/extension/variables.ts
+++ b/packages/core/src/extension/variables.ts
@@ -7,6 +7,10 @@
 import { type VariableSchema, VARIABLE_SCHEMA } from './variableSchema.js';
 import path from 'node:path';
 import { QWEN_DIR } from '../config/storage.js';
+import type { HookEventName, HookDefinition } from '../hooks/types.js';
+
+// Re-export types for substituteHookVariables
+export type { HookEventName, HookDefinition };
 
 export const EXTENSIONS_DIRECTORY_NAME = path.join(QWEN_DIR, 'extensions');
 export const EXTENSIONS_CONFIG_FILENAME = 'qwen-extension.json';
@@ -70,3 +74,40 @@ export function recursivelyHydrateStrings(
   }
   return obj;
 }
+
+/**
+ * Substitute variables in hook configurations, particularly ${CLAUDE_PLUGIN_ROOT}
+ * @param hooks - The hooks configuration object
+ * @param basePath - The path to substitute for ${CLAUDE_PLUGIN_ROOT}
+ * @returns A deep cloned hooks object with variables substituted
+ */
+export function substituteHookVariables(
+  hooks: { [K in HookEventName]?: HookDefinition[] } | undefined,
+  basePath: string,
+): { [K in HookEventName]?: HookDefinition[] } | undefined {
+  if (!hooks) return hooks;
+
+  // Deep clone the hooks to avoid modifying the original
+  const clonedHooks = JSON.parse(JSON.stringify(hooks));
+
+  // Replace ${CLAUDE_PLUGIN_ROOT} with the actual extension path in all command hooks
+  for (const eventName in clonedHooks) {
+    const eventHooks = clonedHooks[eventName as HookEventName];
+    if (eventHooks && Array.isArray(eventHooks)) {
+      for (const hookDef of eventHooks) {
+        if (hookDef.hooks && Array.isArray(hookDef.hooks)) {
+          for (const hook of hookDef.hooks) {
+            if (hook.type === 'command' && hook.command) {
+              hook.command = hook.command.replace(
+                /\$\{CLAUDE_PLUGIN_ROOT\}/g,
+                basePath,
+              );
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return clonedHooks;
+}
diff --git a/packages/core/src/models/modelsConfig.test.ts b/packages/core/src/models/modelsConfig.test.ts
index 004acb230..87c8aaf34 100644
--- a/packages/core/src/models/modelsConfig.test.ts
+++ b/packages/core/src/models/modelsConfig.test.ts
@@ -1507,8 +1507,8 @@ describe('ModelsConfig', () => {
     });
   });
 
-  describe('max_tokens fallback', () => {
-    it('should auto-detect max_tokens when samplingParams is undefined', async () => {
+  describe('max_tokens in modelsConfig', () => {
+    it('should not auto-fill max_tokens when samplingParams is undefined', async () => {
       const modelProvidersConfig: ModelProvidersConfig = {
         openai: [
           {
@@ -1528,20 +1528,10 @@ describe('ModelsConfig', () => {
       await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
 
       const gc = currentGenerationConfig(modelsConfig);
-      // GPT-4 output limit is 16K per tokenLimits.ts
-      expect(gc.samplingParams?.max_tokens).toBe(16384);
-      expect(gc.samplingParams?.temperature).toBeUndefined();
-
-      const sources = modelsConfig.getGenerationConfigSources();
-      expect(sources['samplingParams']?.kind).toBe('computed');
-      // Even when samplingParams is not explicitly defined in provider config,
-      // the field is still tracked as from modelProviders, so the detail reflects that
-      expect(sources['samplingParams']?.detail).toBe(
-        'max_tokens auto-detected from model (other params from modelProviders)',
-      );
+      expect(gc.samplingParams).toBeUndefined();
     });
 
-    it('should auto-detect max_tokens when samplingParams exists but max_tokens is missing', async () => {
+    it('should not auto-fill max_tokens when samplingParams exists but max_tokens is missing', async () => {
       const modelProvidersConfig: ModelProvidersConfig = {
         openai: [
           {
@@ -1563,15 +1553,12 @@ describe('ModelsConfig', () => {
       await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
 
       const gc = currentGenerationConfig(modelsConfig);
-      // Should preserve temperature from provider and add max_tokens
+      // Should preserve existing sampling params but not inject max_tokens
       expect(gc.samplingParams?.temperature).toBe(0.7);
-      expect(gc.samplingParams?.max_tokens).toBe(16384);
+      expect(gc.samplingParams?.max_tokens).toBeUndefined();
 
       const sources = modelsConfig.getGenerationConfigSources();
-      expect(sources['samplingParams']?.kind).toBe('computed');
-      expect(sources['samplingParams']?.detail).toBe(
-        'max_tokens auto-detected from model (other params from modelProviders)',
-      );
+      expect(sources['samplingParams']?.kind).toBe('modelProviders');
     });
 
     it('should not override existing max_tokens from modelProviders', async () => {
@@ -1604,7 +1591,7 @@ describe('ModelsConfig', () => {
       expect(sources['samplingParams']?.kind).toBe('modelProviders');
     });
 
-    it('should use correct output limit for different model families', async () => {
+    it('should not auto-fill max_tokens for different model families', async () => {
       const modelProvidersConfig: ModelProvidersConfig = {
         anthropic: [
           {
@@ -1622,7 +1609,7 @@ describe('ModelsConfig', () => {
         ],
       };
 
-      // Test Claude model (64K output limit)
+      // Test Claude model without provider max_tokens
       const claudeConfig = new ModelsConfig({
         initialAuthType: AuthType.USE_ANTHROPIC,
         modelProvidersConfig,
@@ -1631,9 +1618,9 @@ describe('ModelsConfig', () => {
       await claudeConfig.switchModel(AuthType.USE_ANTHROPIC, 'claude-3-opus');
 
       let gc = currentGenerationConfig(claudeConfig);
-      expect(gc.samplingParams?.max_tokens).toBe(65536); // 64K = 2^16
+      expect(gc.samplingParams).toBeUndefined();
 
-      // Test Gemini model (8K output limit)
+      // Test Gemini model without provider max_tokens
       const geminiConfig = new ModelsConfig({
         initialAuthType: AuthType.USE_GEMINI,
         modelProvidersConfig,
@@ -1642,7 +1629,7 @@ describe('ModelsConfig', () => {
       await geminiConfig.switchModel(AuthType.USE_GEMINI, 'gemini-pro');
 
       gc = currentGenerationConfig(geminiConfig);
-      expect(gc.samplingParams?.max_tokens).toBe(8192);
+      expect(gc.samplingParams).toBeUndefined();
     });
   });
 });
diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
index d9749bb96..d22cc790c 100644
--- a/packages/core/src/models/modelsConfig.ts
+++ b/packages/core/src/models/modelsConfig.ts
@@ -772,25 +772,6 @@ export class ModelsConfig {
       };
     }
 
-    // max_tokens fallback: auto-detect from model when not set by provider.
-    // Without this, requests to non-Qwen models (Claude, GPT, etc.) may omit
-    // max_tokens entirely, causing the API to use a small default (e.g. 4096)
-    // and truncating long responses mid-tool-call.
-    if (!this._generationConfig.samplingParams?.max_tokens) {
-      const outputLimit = tokenLimit(model.id, 'output');
-      if (!this._generationConfig.samplingParams) {
-        this._generationConfig.samplingParams = {};
-      }
-      this._generationConfig.samplingParams.max_tokens = outputLimit;
-      const existingSource = this.generationConfigSources['samplingParams'];
-      this.generationConfigSources['samplingParams'] = {
-        kind: 'computed',
-        detail: existingSource
-          ? `max_tokens auto-detected from model (other params from ${existingSource.kind})`
-          : 'max_tokens auto-detected from model',
-      };
-    }
-
     // modalities fallback: auto-detect from model when not set by provider
     if (gc.modalities === undefined) {
       this._generationConfig.modalities = defaultModalities(model.id);
diff --git a/packages/core/src/skills/bundled/review/SKILL.md b/packages/core/src/skills/bundled/review/SKILL.md
index 14e5f27e6..957031c7a 100644
--- a/packages/core/src/skills/bundled/review/SKILL.md
+++ b/packages/core/src/skills/bundled/review/SKILL.md
@@ -15,15 +15,16 @@ You are an expert code reviewer. Your job is to review code changes and provide
 
 ## Step 1: Determine what to review
 
-Based on the arguments provided:
+Your goal here is to understand the scope of changes so you can dispatch agents effectively in Step 2. Based on the arguments provided:
 
 - **No arguments**: Review local uncommitted changes
   - Run `git diff` and `git diff --staged` to get all changes
   - If both diffs are empty, inform the user there are no changes to review and stop here — do not proceed to the review agents
 
 - **PR number or URL** (e.g., `123` or `https://github.com/.../pull/123`):
-  - Run `gh pr view <number>` to get PR details
-  - Run `gh pr diff <number>` to get the diff
+  - Save the current branch name, stash any local changes (`git stash --include-untracked`), then `gh pr checkout <number>`
+  - Run `gh pr view <number>` and save the output (title, description, base branch, etc.) to a temp file (e.g., `/tmp/pr-review-context.md`) so agents can read it without you repeating it in each prompt
+  - Note the base branch (e.g., `main`) — agents will use `git diff <base>...HEAD` to get the diff and can read files directly
 
 - **File path** (e.g., `src/foo.ts`):
   - Run `git diff HEAD -- <file>` to get recent changes
@@ -33,6 +34,8 @@ Based on the arguments provided:
 
 Launch **four parallel review agents** to analyze the changes from different angles. Each agent should focus exclusively on its dimension.
 
+**IMPORTANT**: Do NOT paste the full diff into each agent's prompt — this duplicates it 4x. Instead, give each agent the command to obtain the diff, a concise summary of what the changes are about, and its review focus. Each agent can read files and search the codebase on its own.
+
 ### Agent 1: Correctness & Security
 
 Focus areas:
@@ -77,9 +80,11 @@ Focus areas:
 - Unexpected side effects or hidden coupling
 - Anything else that looks off — trust your instincts
 
-## Step 3: Aggregate and present findings
+## Step 3: Restore environment and present findings
 
-Combine results from all four agents into a single, well-organized review. Use this format:
+If you checked out a PR branch in Step 1, restore the original state first: check out the original branch, `git stash pop` if changes were stashed, and remove the temp file.
+
+Then combine results from all four agents into a single, well-organized review. Use this format:
 
 ### Summary
 
diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts
index 0ed3dbf8c..0a7842f38 100644
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -384,7 +384,7 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
     ...event,
     'event.name': EVENT_API_ERROR,
     'event.timestamp': new Date().toISOString(),
-    ['error.message']: event.error,
+    ['error.message']: event.error_message,
     model_name: event.model,
     duration: event.duration_ms,
   };
@@ -398,7 +398,7 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
 
   const logger = logs.getLogger(SERVICE_NAME);
   const logRecord: LogRecord = {
-    body: `API error for ${event.model}. Error: ${event.error}. Duration: ${event.duration_ms}ms.`,
+    body: `API error for ${event.model}. Error: ${event.error_message}. Duration: ${event.duration_ms}ms.`,
     attributes,
   };
   logger.emit(logRecord);
diff --git a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts
index 1776c1823..b0bb22bb0 100644
--- a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts
+++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts
@@ -645,12 +645,13 @@ export class QwenLogger {
       status_code: event.status_code?.toString() ?? '',
       duration: event.duration_ms,
       success: 0,
-      message: event.error,
+      message: event.error_message,
       trace_id: event.response_id,
       properties: {
         auth_type: event.auth_type,
         model: event.model,
         prompt_id: event.prompt_id,
+        error_message: event.error_message,
         error_type: event.error_type,
       },
     });
diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts
index bf82ae90e..39b6c5c48 100644
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -254,33 +254,36 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
   'event.timestamp': string; // ISO 8601
   response_id?: string;
   model: string;
-  error: string;
-  error_type?: string;
-  status_code?: number | string;
   duration_ms: number;
   prompt_id: string;
   auth_type?: string;
+  // Human-readable error message (e.g. "Request failed with status 429")
+  error_message: string;
+  // Error class or category (e.g. "RateLimitError", "invalid_request_error")
+  error_type?: string;
+  // HTTP status code from the API response (e.g. 429, 500)
+  status_code?: number | string;
 
-  constructor(
-    response_id: string | undefined,
-    model: string,
-    error: string,
-    duration_ms: number,
-    prompt_id: string,
-    auth_type?: string,
-    error_type?: string,
-    status_code?: number | string,
-  ) {
+  constructor(opts: {
+    responseId?: string;
+    model: string;
+    durationMs: number;
+    promptId: string;
+    authType?: string;
+    errorMessage: string;
+    errorType?: string;
+    statusCode?: number | string;
+  }) {
     this['event.name'] = 'api_error';
     this['event.timestamp'] = new Date().toISOString();
-    this.response_id = response_id;
-    this.model = model;
-    this.error = error;
-    this.error_type = error_type;
-    this.status_code = status_code;
-    this.duration_ms = duration_ms;
-    this.prompt_id = prompt_id;
-    this.auth_type = auth_type;
+    this.response_id = opts.responseId;
+    this.model = opts.model;
+    this.duration_ms = opts.durationMs;
+    this.prompt_id = opts.promptId;
+    this.auth_type = opts.authType;
+    this.error_message = opts.errorMessage;
+    this.error_type = opts.errorType;
+    this.status_code = opts.statusCode;
   }
 }
 
diff --git a/packages/core/src/telemetry/uiTelemetry.test.ts b/packages/core/src/telemetry/uiTelemetry.test.ts
index e45032619..37542273a 100644
--- a/packages/core/src/telemetry/uiTelemetry.test.ts
+++ b/packages/core/src/telemetry/uiTelemetry.test.ts
@@ -301,7 +301,7 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_ERROR,
         model: 'gemini-2.5-pro',
         duration_ms: 300,
-        error: 'Something went wrong',
+        error_message: 'Something went wrong',
       } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR };
 
       service.addEvent(event);
@@ -342,7 +342,7 @@ describe('UiTelemetryService', () => {
         'event.name': EVENT_API_ERROR,
         model: 'gemini-2.5-pro',
         duration_ms: 300,
-        error: 'Something went wrong',
+        error_message: 'Something went wrong',
       } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR };
 
       service.addEvent(responseEvent);
diff --git a/packages/core/src/tools/mcp-tool.test.ts b/packages/core/src/tools/mcp-tool.test.ts
index 005623afe..1826ff197 100644
--- a/packages/core/src/tools/mcp-tool.test.ts
+++ b/packages/core/src/tools/mcp-tool.test.ts
@@ -18,6 +18,8 @@ import { ToolConfirmationOutcome } from './tools.js';
 import type { CallableTool, Part } from '@google/genai';
 import { ToolErrorType } from './tool-error.js';
 
+vi.mock('node:fs/promises');
+
 // Mock @google/genai mcpToTool and CallableTool
 // We only need to mock the parts of CallableTool that DiscoveredMCPTool uses.
 const mockCallTool = vi.fn();
@@ -147,7 +149,7 @@ describe('DiscoveredMCPTool', () => {
       expect(toolResult.returnDisplay).toBe(stringifiedResponseContent);
     });
 
-    it('should handle empty result from getStringifiedResultForDisplay', async () => {
+    it('should handle empty result from getDisplayFromParts', async () => {
       const params = { param: 'testValue' };
       const mockMcpToolResponsePartsEmpty: Part[] = [];
       mockCallTool.mockResolvedValue(mockMcpToolResponsePartsEmpty);
@@ -155,7 +157,9 @@ describe('DiscoveredMCPTool', () => {
       const toolResult: ToolResult = await invocation.execute(
         new AbortController().signal,
       );
-      expect(toolResult.returnDisplay).toBe('```json\n[]\n```');
+      expect(toolResult.returnDisplay).toBe(
+        '[Error: Could not parse tool response]',
+      );
       expect(toolResult.llmContent).toEqual([
         { text: '[Error: Could not parse tool response]' },
       ]);
@@ -339,7 +343,9 @@ describe('DiscoveredMCPTool', () => {
           },
         },
       ]);
-      expect(toolResult.returnDisplay).toBe('[Audio: audio/mp3]');
+      expect(toolResult.returnDisplay).toBe(
+        `[Tool '${serverToolName}' provided the following audio data with mime-type: audio/mp3]\n[audio/mp3]`,
+      );
     });
 
     it('should handle a ResourceLinkBlock response', async () => {
@@ -372,7 +378,7 @@ describe('DiscoveredMCPTool', () => {
         },
       ]);
       expect(toolResult.returnDisplay).toBe(
-        '[Link to My Resource: file:///path/to/thing]',
+        'Resource Link: My Resource at file:///path/to/thing',
       );
     });
 
@@ -446,7 +452,7 @@ describe('DiscoveredMCPTool', () => {
         },
       ]);
       expect(toolResult.returnDisplay).toBe(
-        '[Embedded Resource: application/octet-stream]',
+        `[Tool '${serverToolName}' provided the following embedded resource with mime-type: application/octet-stream]\n[application/octet-stream]`,
       );
     });
 
@@ -489,7 +495,7 @@ describe('DiscoveredMCPTool', () => {
         { text: 'Second part.' },
       ]);
       expect(toolResult.returnDisplay).toBe(
-        'First part.\n[Image: image/jpeg]\nSecond part.',
+        `First part.\n[Tool '${serverToolName}' provided the following image data with mime-type: image/jpeg]\n[image/jpeg]\nSecond part.`,
       );
     });
 
@@ -514,9 +520,7 @@ describe('DiscoveredMCPTool', () => {
       const toolResult = await invocation.execute(new AbortController().signal);
 
       expect(toolResult.llmContent).toEqual([{ text: 'Valid part.' }]);
-      expect(toolResult.returnDisplay).toBe(
-        'Valid part.\n[Unknown content type: future_block]',
-      );
+      expect(toolResult.returnDisplay).toBe('Valid part.');
     });
 
     it('should handle a complex mix of content block types', async () => {
@@ -574,7 +578,7 @@ describe('DiscoveredMCPTool', () => {
         },
       ]);
       expect(toolResult.returnDisplay).toBe(
-        'Here is a resource.\n[Link to My Resource: file:///path/to/resource]\nEmbedded text content.\n[Image: image/jpeg]',
+        `Here is a resource.\nResource Link: My Resource at file:///path/to/resource\nEmbedded text content.\n[Tool '${serverToolName}' provided the following image data with mime-type: image/jpeg]\n[image/jpeg]`,
       );
     });
 
@@ -964,6 +968,223 @@ describe('DiscoveredMCPTool', () => {
     });
   });
 
+  describe('output truncation for large MCP results', () => {
+    const THRESHOLD = 1000;
+    const TRUNCATE_LINES = 50;
+
+    const mockConfigWithTruncation = {
+      getTruncateToolOutputThreshold: () => THRESHOLD,
+      getTruncateToolOutputLines: () => TRUNCATE_LINES,
+      getUsageStatisticsEnabled: () => false,
+      storage: {
+        getProjectTempDir: () => '/tmp/test-project',
+      },
+      isTrustedFolder: () => true,
+    } as any;
+
+    it('should truncate large text results from direct client execution', async () => {
+      const largeText = 'Line of text content\n'.repeat(200); // ~4200 chars, well over THRESHOLD
+      const mockMcpClient: McpDirectClient = {
+        callTool: vi.fn(async () => ({
+          content: [{ type: 'text', text: largeText }],
+        })),
+      };
+
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        true, // trust
+        undefined,
+        mockConfigWithTruncation,
+        mockMcpClient,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      // The text part in llmContent should be truncated
+      const textParts = (result.llmContent as Part[]).filter(
+        (p: Part) => p.text,
+      );
+      const combinedText = textParts.map((p: Part) => p.text).join('');
+      expect(combinedText.length).toBeLessThan(largeText.length);
+      expect(combinedText).toContain('CONTENT TRUNCATED');
+      expect(result.returnDisplay).toContain('CONTENT TRUNCATED');
+    });
+
+    it('should truncate large text results from callable tool execution', async () => {
+      const largeText = 'Line of text content\n'.repeat(200);
+      const mockMcpToolResponseParts: Part[] = [
+        {
+          functionResponse: {
+            name: serverToolName,
+            response: {
+              content: [{ type: 'text', text: largeText }],
+            },
+          },
+        },
+      ];
+      mockCallTool.mockResolvedValue(mockMcpToolResponseParts);
+
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        true,
+        undefined,
+        mockConfigWithTruncation,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      const textParts = (result.llmContent as Part[]).filter(
+        (p: Part) => p.text,
+      );
+      const combinedText = textParts.map((p: Part) => p.text).join('');
+      expect(combinedText.length).toBeLessThan(largeText.length);
+      expect(combinedText).toContain('CONTENT TRUNCATED');
+      expect(result.returnDisplay).toContain('CONTENT TRUNCATED');
+    });
+
+    it('should not truncate small text results', async () => {
+      const smallText = 'Small response';
+      const mockMcpClient: McpDirectClient = {
+        callTool: vi.fn(async () => ({
+          content: [{ type: 'text', text: smallText }],
+        })),
+      };
+
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        true,
+        undefined,
+        mockConfigWithTruncation,
+        mockMcpClient,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      expect(result.llmContent).toEqual([{ text: smallText }]);
+      expect(result.returnDisplay).not.toContain('Output too long');
+    });
+
+    it('should not truncate non-text content (images, audio)', async () => {
+      const mockMcpClient: McpDirectClient = {
+        callTool: vi.fn(async () => ({
+          content: [
+            {
+              type: 'image',
+              data: 'x'.repeat(5000), // large base64 data
+              mimeType: 'image/png',
+            },
+          ],
+        })),
+      };
+
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        true,
+        undefined,
+        mockConfigWithTruncation,
+        mockMcpClient,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      // Image data should not be truncated
+      const inlineDataParts = (result.llmContent as Part[]).filter(
+        (p: Part) => p.inlineData,
+      );
+      expect(inlineDataParts[0].inlineData!.data).toBe('x'.repeat(5000));
+    });
+
+    it('should truncate only text parts in mixed content', async () => {
+      const largeText = 'Line of text content\n'.repeat(200);
+      const mockMcpClient: McpDirectClient = {
+        callTool: vi.fn(async () => ({
+          content: [
+            { type: 'text', text: largeText },
+            {
+              type: 'image',
+              data: 'IMAGE_DATA',
+              mimeType: 'image/png',
+            },
+          ],
+        })),
+      };
+
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        true,
+        undefined,
+        mockConfigWithTruncation,
+        mockMcpClient,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      const parts = result.llmContent as Part[];
+      // Text should be truncated
+      const textPart = parts.find(
+        (p: Part) => p.text && !p.text.startsWith('[Tool'),
+      );
+      expect(textPart!.text!.length).toBeLessThan(largeText.length);
+      expect(textPart!.text).toContain('CONTENT TRUNCATED');
+      // Image should be preserved
+      const imagePart = parts.find((p: Part) => p.inlineData);
+      expect(imagePart!.inlineData!.data).toBe('IMAGE_DATA');
+    });
+
+    it('should not truncate when config is not provided', async () => {
+      const largeText = 'Line of text content\n'.repeat(200);
+      const mockMcpClient: McpDirectClient = {
+        callTool: vi.fn(async () => ({
+          content: [{ type: 'text', text: largeText }],
+        })),
+      };
+
+      // No cliConfig provided
+      const truncTool = new DiscoveredMCPTool(
+        mockCallableToolInstance,
+        serverName,
+        serverToolName,
+        baseDescription,
+        inputSchema,
+        undefined,
+        undefined,
+        undefined, // no config
+        mockMcpClient,
+      );
+
+      const invocation = truncTool.build({ param: 'test' });
+      const result = await invocation.execute(new AbortController().signal);
+
+      // Without config, should return untouched
+      expect(result.llmContent).toEqual([{ text: largeText }]);
+    });
+  });
+
   describe('streaming progress for long-running MCP tools', () => {
     it('should have canUpdateOutput set to true so the scheduler creates liveOutputCallback', () => {
       // For long-running MCP tools (e.g., browseruse), the scheduler needs
diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts
index 5d48b68c7..73ba1ece4 100644
--- a/packages/core/src/tools/mcp-tool.ts
+++ b/packages/core/src/tools/mcp-tool.ts
@@ -23,6 +23,7 @@ import {
 import type { CallableTool, FunctionCall, Part } from '@google/genai';
 import { ToolErrorType } from './tool-error.js';
 import type { Config } from '../config/config.js';
+import { truncateToolOutput } from '../utils/truncation.js';
 
 type ToolParams = Record<string, unknown>;
 
@@ -263,10 +264,11 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation<
     }
 
     const transformedParts = transformMcpContentToParts(rawResponseParts);
+    const truncatedParts = await this.truncateTextParts(transformedParts);
 
     return {
-      llmContent: transformedParts,
-      returnDisplay: getStringifiedResultForDisplay(rawResponseParts),
+      llmContent: truncatedParts,
+      returnDisplay: getDisplayFromParts(truncatedParts),
     };
   }
 
@@ -333,13 +335,39 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation<
     }
 
     const transformedParts = transformMcpContentToParts(rawResponseParts);
+    const truncatedParts = await this.truncateTextParts(transformedParts);
 
     return {
-      llmContent: transformedParts,
-      returnDisplay: getStringifiedResultForDisplay(rawResponseParts),
+      llmContent: truncatedParts,
+      returnDisplay: getDisplayFromParts(truncatedParts),
     };
   }
 
+  /**
+   * Truncates text parts in the transformed result if they exceed the
+   * configured threshold. Non-text parts (images, audio, etc.) are preserved.
+   */
+  private async truncateTextParts(parts: Part[]): Promise<Part[]> {
+    if (!this.cliConfig) {
+      return parts;
+    }
+
+    const result: Part[] = [];
+    for (const part of parts) {
+      if (part.text && !part.inlineData) {
+        const truncated = await truncateToolOutput(
+          this.cliConfig,
+          `mcp__${this.serverName}__${this.serverToolName}`,
+          part.text,
+        );
+        result.push({ text: truncated.content });
+      } else {
+        result.push(part);
+      }
+    }
+    return result;
+  }
+
   getDescription(): string {
     return safeJsonStringify(this.params);
   }
@@ -524,43 +552,22 @@ function transformMcpContentToParts(sdkResponse: Part[]): Part[] {
 }
 
 /**
- * Processes the raw response from the MCP tool to generate a clean,
- * human-readable string for display in the CLI. It summarizes non-text
- * content and presents text directly.
- *
- * @param rawResponse The raw Part[] array from the GenAI SDK.
- * @returns A formatted string representing the tool's output.
+ * Builds a human-readable display string from transformed Part[].
+ * Text parts are shown directly; inline data is summarized by mime type.
  */
-function getStringifiedResultForDisplay(rawResponse: Part[]): string {
-  const mcpContent = rawResponse?.[0]?.functionResponse?.response?.[
-    'content'
-  ] as McpContentBlock[];
-
-  if (!Array.isArray(mcpContent)) {
-    return '```json\n' + JSON.stringify(rawResponse, null, 2) + '\n```';
+function getDisplayFromParts(parts: Part[]): string {
+  if (parts.length === 0) {
+    return '';
   }
 
-  const displayParts = mcpContent.map((block: McpContentBlock): string => {
-    switch (block.type) {
-      case 'text':
-        return block.text;
-      case 'image':
-        return `[Image: ${block.mimeType}]`;
-      case 'audio':
-        return `[Audio: ${block.mimeType}]`;
-      case 'resource_link':
-        return `[Link to ${block.title || block.name}: ${block.uri}]`;
-      case 'resource':
-        if (block.resource?.text) {
-          return block.resource.text;
-        }
-        return `[Embedded Resource: ${
-          block.resource?.mimeType || 'unknown type'
-        }]`;
-      default:
-        return `[Unknown content type: ${(block as { type: string }).type}]`;
+  const displayParts: string[] = [];
+  for (const part of parts) {
+    if (part.text !== undefined) {
+      displayParts.push(part.text);
+    } else if (part.inlineData) {
+      displayParts.push(`[${part.inlineData.mimeType}]`);
     }
-  });
+  }
 
   return displayParts.join('\n');
 }
diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts
index f6f140afc..1878c3805 100644
--- a/packages/core/src/tools/read-file.test.ts
+++ b/packages/core/src/tools/read-file.test.ts
@@ -94,6 +94,14 @@ describe('ReadFileTool', () => {
       expect(typeof result).not.toBe('string');
     });
 
+    it('should allow access to files in OS temp directory', () => {
+      const params: ReadFileToolParams = {
+        absolute_path: path.join(os.tmpdir(), 'pr-review-context.md'),
+      };
+      const result = tool.build(params);
+      expect(typeof result).not.toBe('string');
+    });
+
     it('should show temp directory in error message when path is outside workspace and temp dir', () => {
       const params: ReadFileToolParams = {
         absolute_path: '/completely/outside/path.txt',
@@ -427,6 +435,28 @@ describe('ReadFileTool', () => {
       expect(result.returnDisplay).toBe('');
     });
 
+    it('should successfully read files from OS temp directory', async () => {
+      const osTempFile = await fsp.mkdtemp(
+        path.join(os.tmpdir(), 'read-file-test-'),
+      );
+      const tempFilePath = path.join(osTempFile, 'pr-review-context.md');
+      const tempFileContent = '## PR #123\nFix encoding issues';
+      await fsp.writeFile(tempFilePath, tempFileContent, 'utf-8');
+
+      try {
+        const params: ReadFileToolParams = { absolute_path: tempFilePath };
+        const invocation = tool.build(params) as ToolInvocation<
+          ReadFileToolParams,
+          ToolResult
+        >;
+
+        const result = await invocation.execute(abortSignal);
+        expect(result.llmContent).toBe(tempFileContent);
+      } finally {
+        await fsp.rm(osTempFile, { recursive: true, force: true });
+      }
+    });
+
     describe('with .qwenignore', () => {
       beforeEach(async () => {
         await fsp.writeFile(
diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts
index 82457d234..2e0a1c228 100644
--- a/packages/core/src/tools/read-file.ts
+++ b/packages/core/src/tools/read-file.ts
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+import os from 'node:os';
 import path from 'node:path';
 import { makeRelative, shortenPath } from '../utils/paths.js';
 import type { ToolInvocation, ToolLocation, ToolResult } from './tools.js';
@@ -189,9 +190,11 @@ export class ReadFileTool extends BaseDeclarativeTool<
     const userSkillsDir = this.config.storage.getUserSkillsDir();
     const arenaDir = Storage.getGlobalArenaDir();
     const resolvedFilePath = path.resolve(filePath);
+    const osTempDir = os.tmpdir();
     const isWithinTempDir =
       isSubpath(projectTempDir, resolvedFilePath) ||
-      isSubpath(globalTempDir, resolvedFilePath);
+      isSubpath(globalTempDir, resolvedFilePath) ||
+      isSubpath(osTempDir, resolvedFilePath);
     const isWithinArenaDir = isSubpath(arenaDir, resolvedFilePath);
     const isWithinUserSkills = isSubpath(userSkillsDir, resolvedFilePath);
 
diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts
index 1de48b599..54ecf30f8 100644
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -26,9 +26,7 @@ import {
   Kind,
 } from './tools.js';
 import { getErrorMessage } from '../utils/errors.js';
-import { truncateAndSaveToFile } from '../utils/truncation.js';
-import { logToolOutputTruncated } from '../telemetry/loggers.js';
-import { ToolOutputTruncatedEvent } from '../telemetry/types.js';
+import { truncateToolOutput } from '../utils/truncation.js';
 import type {
   ShellExecutionConfig,
   ShellOutputEvent,
@@ -381,21 +379,11 @@ export class ShellToolInvocation extends BaseToolInvocation<
       }
 
       // Truncate large output and save full content to a temp file.
-      const truncateThreshold = this.config.getTruncateToolOutputThreshold();
-      const truncateLines = this.config.getTruncateToolOutputLines();
-      if (
-        typeof llmContent === 'string' &&
-        truncateThreshold > 0 &&
-        truncateLines > 0
-      ) {
-        const originalContentLength = llmContent.length;
-        const fileName = `shell_${crypto.randomBytes(6).toString('hex')}`;
-        const truncatedResult = await truncateAndSaveToFile(
+      if (typeof llmContent === 'string') {
+        const truncatedResult = await truncateToolOutput(
+          this.config,
+          ShellTool.Name,
           llmContent,
-          fileName,
-          this.config.storage.getProjectTempDir(),
-          truncateThreshold,
-          truncateLines,
         );
 
         if (truncatedResult.outputFile) {
@@ -403,17 +391,6 @@ export class ShellToolInvocation extends BaseToolInvocation<
           returnDisplayMessage +=
             (returnDisplayMessage ? '\n' : '') +
             `Output too long and was saved to: ${truncatedResult.outputFile}`;
-
-          logToolOutputTruncated(
-            this.config,
-            new ToolOutputTruncatedEvent('', {
-              toolName: ShellTool.Name,
-              originalContentLength,
-              truncatedContentLength: truncatedResult.content.length,
-              threshold: truncateThreshold,
-              lines: truncateLines,
-            }),
-          );
         }
       }
 
diff --git a/packages/core/src/utils/errors.ts b/packages/core/src/utils/errors.ts
index b0ba031dd..790123508 100644
--- a/packages/core/src/utils/errors.ts
+++ b/packages/core/src/utils/errors.ts
@@ -38,6 +38,10 @@ export function isAbortError(error: unknown): boolean {
 
 export function getErrorMessage(error: unknown): string {
   if (error instanceof Error) {
+    const cause = error.cause;
+    if (cause instanceof Error && cause.message !== error.message) {
+      return `${error.message} (cause: ${cause.message})`;
+    }
     return error.message;
   }
   try {
@@ -47,6 +51,80 @@ export function getErrorMessage(error: unknown): string {
   }
 }
 
+/**
+ * Extracts the HTTP status code from an error object.
+ *
+ * Checks the following properties in order of priority:
+ * 1. `error.status` - OpenAI, Anthropic, Gemini SDK errors
+ * 2. `error.statusCode` - Some HTTP client libraries
+ * 3. `error.response.status` - Axios-style errors
+ * 4. `error.error.code` - Nested error objects
+ *
+ * @returns The HTTP status code (100-599), or undefined if not found.
+ */
+export function getErrorStatus(error: unknown): number | undefined {
+  if (typeof error !== 'object' || error === null) {
+    return undefined;
+  }
+
+  const err = error as {
+    status?: unknown;
+    statusCode?: unknown;
+    response?: { status?: unknown };
+    error?: { code?: unknown };
+  };
+
+  const value =
+    err.status ?? err.statusCode ?? err.response?.status ?? err.error?.code;
+
+  return typeof value === 'number' && value >= 100 && value <= 599
+    ? value
+    : undefined;
+}
+
+/**
+ * Extracts a descriptive error type string from an error object.
+ *
+ * Uses the error's constructor name (e.g. "APIConnectionError",
+ * "APIConnectionTimeoutError") which is more specific than the generic
+ * `.type` field. Falls back to `.type` for SDK errors that set it,
+ * then to `error.name`, then "unknown".
+ *
+ * For network errors, appends the cause code (e.g. "ECONNREFUSED")
+ * when available.
+ *
+ * @returns A string identifying the error type.
+ */
+export function getErrorType(error: unknown): string {
+  if (typeof error !== 'object' || error === null) {
+    return 'unknown';
+  }
+
+  // Prefer the constructor name — SDK subclasses like APIConnectionError,
+  // RateLimitError etc. have meaningful names.
+  const constructorName =
+    error instanceof Error && error.constructor.name !== 'Error'
+      ? error.constructor.name
+      : undefined;
+
+  // .type is set by OpenAI SDK (e.g. "invalid_request_error")
+  const sdkType = (error as { type?: string }).type;
+
+  const baseType =
+    constructorName ??
+    sdkType ??
+    (error instanceof Error ? error.name : 'unknown');
+
+  // For network errors, append the cause code (e.g. ECONNREFUSED, ETIMEDOUT)
+  const cause = error instanceof Error ? error.cause : undefined;
+  const causeCode =
+    cause && typeof cause === 'object' && 'code' in cause
+      ? (cause as { code?: string }).code
+      : undefined;
+
+  return causeCode ? `${baseType}:${causeCode}` : baseType;
+}
+
 export class FatalError extends Error {
   constructor(
     message: string,
diff --git a/packages/core/src/utils/quotaErrorDetection.test.ts b/packages/core/src/utils/quotaErrorDetection.test.ts
index 01dccec24..0da986623 100644
--- a/packages/core/src/utils/quotaErrorDetection.test.ts
+++ b/packages/core/src/utils/quotaErrorDetection.test.ts
@@ -16,52 +16,55 @@ import {
 
 describe('quotaErrorDetection', () => {
   describe('isQwenQuotaExceededError', () => {
-    it('should detect insufficient_quota error message', () => {
-      const error = new Error('insufficient_quota');
-      expect(isQwenQuotaExceededError(error)).toBe(true);
-    });
-
-    it('should detect free allocated quota exceeded error message', () => {
-      const error = new Error('Free allocated quota exceeded.');
-      expect(isQwenQuotaExceededError(error)).toBe(true);
-    });
-
-    it('should detect quota exceeded error message', () => {
-      const error = new Error('quota exceeded');
-      expect(isQwenQuotaExceededError(error)).toBe(true);
-    });
-
-    it('should detect quota exceeded in string error', () => {
-      const error = 'insufficient_quota';
-      expect(isQwenQuotaExceededError(error)).toBe(true);
-    });
-
-    it('should detect quota exceeded in structured error', () => {
-      const error = { message: 'Free allocated quota exceeded.', status: 429 };
-      expect(isQwenQuotaExceededError(error)).toBe(true);
-    });
-
-    it('should detect quota exceeded in API error', () => {
-      const error: ApiError = {
-        error: {
-          code: 429,
-          message: 'insufficient_quota',
-          status: 'RESOURCE_EXHAUSTED',
-          details: [],
-        },
+    it('should detect the Qwen insufficient_quota error', () => {
+      const error = {
+        status: 429,
+        code: 'insufficient_quota',
+        message: 'Free allocated quota exceeded.',
       };
       expect(isQwenQuotaExceededError(error)).toBe(true);
     });
 
-    it('should not detect throttling errors as quota exceeded', () => {
-      const error = new Error('requests throttling triggered');
+    it('should not match when status is not 429', () => {
+      const error = {
+        status: 400,
+        code: 'insufficient_quota',
+        message: 'Free allocated quota exceeded.',
+      };
       expect(isQwenQuotaExceededError(error)).toBe(false);
     });
 
-    it('should not detect unrelated errors', () => {
-      const error = new Error('Network error');
+    it('should not match temporary throttling (concurrency 429)', () => {
+      const error = {
+        status: 429,
+        code: 'rate_limit_exceeded',
+        message: 'Rate limit exceeded',
+      };
       expect(isQwenQuotaExceededError(error)).toBe(false);
     });
+
+    it('should not match paid account quota exceeded', () => {
+      const error = {
+        status: 429,
+        code: 'insufficient_quota',
+        message: 'You exceeded your current quota.',
+      };
+      expect(isQwenQuotaExceededError(error)).toBe(false);
+    });
+
+    it('should not match plain Error objects', () => {
+      const error = new Error('insufficient_quota');
+      expect(isQwenQuotaExceededError(error)).toBe(false);
+    });
+
+    it('should not match string errors', () => {
+      expect(isQwenQuotaExceededError('insufficient_quota')).toBe(false);
+    });
+
+    it('should not match null or undefined', () => {
+      expect(isQwenQuotaExceededError(null)).toBe(false);
+      expect(isQwenQuotaExceededError(undefined)).toBe(false);
+    });
   });
 
   describe('isProQuotaExceededError', () => {
diff --git a/packages/core/src/utils/quotaErrorDetection.ts b/packages/core/src/utils/quotaErrorDetection.ts
index 1c8af9cd3..87e50aa98 100644
--- a/packages/core/src/utils/quotaErrorDetection.ts
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -100,27 +100,20 @@ export function isGenericQuotaExceededError(error: unknown): boolean {
 }
 
 export function isQwenQuotaExceededError(error: unknown): boolean {
-  // Check for Qwen insufficient quota errors (should not retry)
-  const checkMessage = (message: string): boolean => {
-    const lowerMessage = message.toLowerCase();
-    return (
-      lowerMessage.includes('insufficient_quota') ||
-      lowerMessage.includes('free allocated quota exceeded') ||
-      (lowerMessage.includes('quota') && lowerMessage.includes('exceeded'))
-    );
+  // Match the specific Qwen free-tier quota error to distinguish it from
+  // temporary throttling (429 due to concurrency) or paid account quota limits.
+  if (typeof error !== 'object' || error === null) {
+    return false;
+  }
+  const { status, code, message } = error as {
+    status?: number;
+    code?: string;
+    message?: string;
   };
-
-  if (typeof error === 'string') {
-    return checkMessage(error);
-  }
-
-  if (isStructuredError(error)) {
-    return checkMessage(error.message);
-  }
-
-  if (isApiError(error)) {
-    return checkMessage(error.error.message);
-  }
-
-  return false;
+  return (
+    status === 429 &&
+    code === 'insufficient_quota' &&
+    typeof message === 'string' &&
+    message.toLowerCase().includes('free allocated quota exceeded')
+  );
 }
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index a628719a5..a0e269950 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -7,7 +7,8 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import type { HttpError } from './retry.js';
-import { getErrorStatus, retryWithBackoff } from './retry.js';
+import { retryWithBackoff } from './retry.js';
+import { getErrorStatus } from './errors.js';
 import { setSimulate429 } from './testUtils.js';
 import { AuthType } from '../core/contentGenerator.js';
 
@@ -312,7 +313,10 @@ describe('retryWithBackoff', () => {
     });
 
     it('should throw immediately for Qwen OAuth with insufficient_quota message', async () => {
-      const errorWithInsufficientQuota = new Error('insufficient_quota');
+      const errorWithInsufficientQuota = Object.assign(
+        new Error('Free allocated quota exceeded.'),
+        { status: 429, code: 'insufficient_quota' },
+      );
 
       const fn = vi.fn().mockRejectedValue(errorWithInsufficientQuota);
 
@@ -330,8 +334,9 @@ describe('retryWithBackoff', () => {
     });
 
     it('should throw immediately for Qwen OAuth with free allocated quota exceeded message', async () => {
-      const errorWithQuotaExceeded = new Error(
-        'Free allocated quota exceeded.',
+      const errorWithQuotaExceeded = Object.assign(
+        new Error('Free allocated quota exceeded.'),
+        { status: 429, code: 'insufficient_quota' },
       );
 
       const fn = vi.fn().mockRejectedValue(errorWithQuotaExceeded);
@@ -403,7 +408,10 @@ describe('retryWithBackoff', () => {
     });
 
     it('should throw immediately for Qwen OAuth with quota message', async () => {
-      const errorWithQuota = new Error('quota exceeded');
+      const errorWithQuota = Object.assign(
+        new Error('Free allocated quota exceeded.'),
+        { status: 429, code: 'insufficient_quota' },
+      );
 
       const fn = vi.fn().mockRejectedValue(errorWithQuota);
 
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index 5ce79f08f..e03a3d682 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -8,6 +8,7 @@ import type { GenerateContentResponse } from '@google/genai';
 import { AuthType } from '../core/contentGenerator.js';
 import { isQwenQuotaExceededError } from './quotaErrorDetection.js';
 import { createDebugLogger } from './debugLogger.js';
+import { getErrorStatus } from './errors.js';
 
 const debugLogger = createDebugLogger('RETRY');
 
@@ -151,38 +152,6 @@ export async function retryWithBackoff<T>(
   throw new Error('Retry attempts exhausted');
 }
 
-/**
- * Extracts the HTTP status code from an error object.
- *
- * Checks the following properties in order of priority:
- * 1. `error.status` - OpenAI, Anthropic, Gemini SDK errors
- * 2. `error.statusCode` - Some HTTP client libraries
- * 3. `error.response.status` - Axios-style errors
- * 4. `error.error.code` - Nested error objects
- *
- * @param error The error object.
- * @returns The HTTP status code (100-599), or undefined if not found.
- */
-export function getErrorStatus(error: unknown): number | undefined {
-  if (typeof error !== 'object' || error === null) {
-    return undefined;
-  }
-
-  const err = error as {
-    status?: unknown;
-    statusCode?: unknown;
-    response?: { status?: unknown };
-    error?: { code?: unknown };
-  };
-
-  const value =
-    err.status ?? err.statusCode ?? err.response?.status ?? err.error?.code;
-
-  return typeof value === 'number' && value >= 100 && value <= 599
-    ? value
-    : undefined;
-}
-
 /**
  * Extracts the Retry-After delay from an error object's headers.
  * @param error The error object.
diff --git a/packages/core/src/utils/truncation.ts b/packages/core/src/utils/truncation.ts
index 47a21ef60..6672a1f83 100644
--- a/packages/core/src/utils/truncation.ts
+++ b/packages/core/src/utils/truncation.ts
@@ -6,7 +6,11 @@
 
 import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
+import * as crypto from 'node:crypto';
 import { ReadFileTool } from '../tools/read-file.js';
+import type { Config } from '../config/config.js';
+import { logToolOutputTruncated } from '../telemetry/loggers.js';
+import { ToolOutputTruncatedEvent } from '../telemetry/types.js';
 
 /**
  * Truncates large tool output and saves the full content to a temp file.
@@ -100,3 +104,50 @@ ${truncatedContent}`,
     };
   }
 }
+
+/**
+ * High-level truncation helper that reads thresholds from Config,
+ * truncates if needed, saves full output to a temp file, and logs
+ * telemetry. Returns the (possibly truncated) content and an optional
+ * output file path.
+ *
+ * Callers no longer need to duplicate config extraction, file naming,
+ * or telemetry logging.
+ */
+export async function truncateToolOutput(
+  config: Config,
+  toolName: string,
+  content: string,
+): Promise<{ content: string; outputFile?: string }> {
+  const threshold = config.getTruncateToolOutputThreshold();
+  const lines = config.getTruncateToolOutputLines();
+
+  if (threshold <= 0 || lines <= 0) {
+    return { content };
+  }
+
+  const originalLength = content.length;
+  const fileName = `${toolName}_${crypto.randomBytes(6).toString('hex')}`;
+  const result = await truncateAndSaveToFile(
+    content,
+    fileName,
+    config.storage.getProjectTempDir(),
+    threshold,
+    lines,
+  );
+
+  if (result.outputFile) {
+    logToolOutputTruncated(
+      config,
+      new ToolOutputTruncatedEvent('', {
+        toolName,
+        originalContentLength: originalLength,
+        truncatedContentLength: result.content.length,
+        threshold,
+        lines,
+      }),
+    );
+  }
+
+  return result;
+}
diff --git a/packages/sdk-typescript/README.md b/packages/sdk-typescript/README.md
index 292a7550a..96e5db072 100644
--- a/packages/sdk-typescript/README.md
+++ b/packages/sdk-typescript/README.md
@@ -60,6 +60,7 @@ Creates a new query session with the Qwen Code.
 | `permissionMode`         | `'default' \| 'plan' \| 'auto-edit' \| 'yolo'` | `'default'`      | Permission mode controlling tool execution approval. See [Permission Modes](#permission-modes) for details.                                                                                                                                                                                                                                                                                                                                                                           |
 | `canUseTool`             | `CanUseTool`                                   | -                | Custom permission handler for tool execution approval. Invoked when a tool requires confirmation. Must respond within 60 seconds or the request will be auto-denied. See [Custom Permission Handler](#custom-permission-handler).                                                                                                                                                                                                                                                     |
 | `env`                    | `Record<string, string>`                       | -                | Environment variables to pass to the Qwen Code process. Merged with the current process environment.                                                                                                                                                                                                                                                                                                                                                                                  |
+| `systemPrompt`           | `string \| QuerySystemPromptPreset`            | -                | System prompt configuration for the main session. Use a string to fully override the built-in Qwen Code system prompt, or a preset object to keep the built-in prompt and append extra instructions.                                                                                                                                                                                                                                                                                  |
 | `mcpServers`             | `Record<string, McpServerConfig>`              | -                | MCP (Model Context Protocol) servers to connect. Supports external servers (stdio/SSE/HTTP) and SDK-embedded servers. External servers are configured with transport options like `command`, `args`, `url`, `httpUrl`, etc. SDK servers use `{ type: 'sdk', name: string, instance: Server }`.                                                                                                                                                                                        |
 | `abortController`        | `AbortController`                              | -                | Controller to cancel the query session. Call `abortController.abort()` to terminate the session and cleanup resources.                                                                                                                                                                                                                                                                                                                                                                |
 | `debug`                  | `boolean`                                      | `false`          | Enable debug mode for verbose logging from the CLI process.                                                                                                                                                                                                                                                                                                                                                                                                                           |
@@ -247,6 +248,36 @@ const result = query({
 });
 ```
 
+### Override the System Prompt
+
+```typescript
+import { query } from '@qwen-code/sdk';
+
+const result = query({
+  prompt: 'Say hello in one sentence.',
+  options: {
+    systemPrompt: 'You are a terse assistant. Answer in exactly one sentence.',
+  },
+});
+```
+
+### Append to the Built-in System Prompt
+
+```typescript
+import { query } from '@qwen-code/sdk';
+
+const result = query({
+  prompt: 'Review the current directory.',
+  options: {
+    systemPrompt: {
+      type: 'preset',
+      preset: 'qwen_code',
+      append: 'Be terse and focus on concrete findings.',
+    },
+  },
+});
+```
+
 ### With SDK-Embedded MCP Servers
 
 The SDK provides `tool` and `createSdkMcpServer` to create MCP servers that run in the same process as your SDK application. This is useful when you want to expose custom tools to the AI without running a separate server process.
diff --git a/packages/sdk-typescript/src/index.ts b/packages/sdk-typescript/src/index.ts
index 4ae465975..805d03cfb 100644
--- a/packages/sdk-typescript/src/index.ts
+++ b/packages/sdk-typescript/src/index.ts
@@ -55,6 +55,8 @@ export type {
   PermissionMode,
   CanUseTool,
   PermissionResult,
+  QuerySystemPrompt,
+  QuerySystemPromptPreset,
   CLIMcpServerConfig,
   McpServerConfig,
   McpOAuthConfig,
diff --git a/packages/sdk-typescript/src/query/createQuery.ts b/packages/sdk-typescript/src/query/createQuery.ts
index 5ffcd1dda..42d332b15 100644
--- a/packages/sdk-typescript/src/query/createQuery.ts
+++ b/packages/sdk-typescript/src/query/createQuery.ts
@@ -7,7 +7,11 @@ import { serializeJsonLine } from '../utils/jsonLines.js';
 import { ProcessTransport } from '../transport/ProcessTransport.js';
 import { prepareSpawnInfo, type SpawnInfo } from '../utils/cliPath.js';
 import { Query } from './Query.js';
-import type { QueryOptions } from '../types/types.js';
+import type {
+  QueryOptions,
+  QuerySystemPrompt,
+  TransportOptions,
+} from '../types/types.js';
 import { QueryOptionsSchema } from '../types/queryOptionsSchema.js';
 import { SdkLogger } from '../utils/logger.js';
 import { randomUUID } from 'node:crypto';
@@ -44,6 +48,7 @@ export function query({
 
   // Generate or use provided session ID for SDK-CLI alignment
   const sessionId = options.resume ?? options.sessionId ?? randomUUID();
+  const resolvedSystemPrompt = resolveSystemPromptOption(options.systemPrompt);
 
   const transport = new ProcessTransport({
     pathToQwenExecutable,
@@ -52,6 +57,7 @@ export function query({
     model: options.model,
     permissionMode: options.permissionMode,
     env: options.env,
+    ...resolvedSystemPrompt,
     abortController,
     debug: options.debug,
     stderr: options.stderr,
@@ -112,6 +118,20 @@ export function query({
   return queryInstance;
 }
 
+function resolveSystemPromptOption(
+  systemPrompt: QuerySystemPrompt | undefined,
+): Pick<TransportOptions, 'systemPrompt' | 'appendSystemPrompt'> {
+  if (!systemPrompt) {
+    return {};
+  }
+
+  if (typeof systemPrompt === 'string') {
+    return { systemPrompt };
+  }
+
+  return systemPrompt.append ? { appendSystemPrompt: systemPrompt.append } : {};
+}
+
 function validateOptions(options: QueryOptions): SpawnInfo | undefined {
   const validationResult = QueryOptionsSchema.safeParse(options);
   if (!validationResult.success) {
diff --git a/packages/sdk-typescript/src/transport/ProcessTransport.ts b/packages/sdk-typescript/src/transport/ProcessTransport.ts
index a763a519c..fa55d0327 100644
--- a/packages/sdk-typescript/src/transport/ProcessTransport.ts
+++ b/packages/sdk-typescript/src/transport/ProcessTransport.ts
@@ -232,6 +232,14 @@ export class ProcessTransport implements Transport {
       args.push('--model', this.options.model);
     }
 
+    if (this.options.systemPrompt) {
+      args.push('--system-prompt', this.options.systemPrompt);
+    }
+
+    if (this.options.appendSystemPrompt) {
+      args.push('--append-system-prompt', this.options.appendSystemPrompt);
+    }
+
     if (this.options.permissionMode) {
       args.push('--approval-mode', this.options.permissionMode);
     }
diff --git a/packages/sdk-typescript/src/types/queryOptionsSchema.ts b/packages/sdk-typescript/src/types/queryOptionsSchema.ts
index 6781bb6dc..823bc7085 100644
--- a/packages/sdk-typescript/src/types/queryOptionsSchema.ts
+++ b/packages/sdk-typescript/src/types/queryOptionsSchema.ts
@@ -123,12 +123,29 @@ export const TimeoutConfigSchema = z.object({
   streamClose: z.number().positive().optional(),
 });
 
+const QuerySystemPromptPresetSchema = z
+  .object({
+    type: z.literal('preset'),
+    preset: z.literal('qwen_code'),
+    append: z
+      .string()
+      .min(1, 'systemPrompt.append must be a non-empty string')
+      .optional(),
+  })
+  .strict();
+
 export const QueryOptionsSchema = z
   .object({
     cwd: z.string().optional(),
     model: z.string().optional(),
     pathToQwenExecutable: z.string().optional(),
     env: z.record(z.string(), z.string()).optional(),
+    systemPrompt: z
+      .union([
+        z.string().min(1, 'systemPrompt must be a non-empty string'),
+        QuerySystemPromptPresetSchema,
+      ])
+      .optional(),
     permissionMode: z.enum(['default', 'plan', 'auto-edit', 'yolo']).optional(),
     canUseTool: z
       .custom<CanUseTool>((val) => typeof val === 'function', {
diff --git a/packages/sdk-typescript/src/types/types.ts b/packages/sdk-typescript/src/types/types.ts
index e726f4a2c..b532adc8f 100644
--- a/packages/sdk-typescript/src/types/types.ts
+++ b/packages/sdk-typescript/src/types/types.ts
@@ -16,6 +16,8 @@ export type TransportOptions = {
   model?: string;
   permissionMode?: PermissionMode;
   env?: Record<string, string>;
+  systemPrompt?: string;
+  appendSystemPrompt?: string;
   abortController?: AbortController;
   debug?: boolean;
   stderr?: (message: string) => void;
@@ -46,6 +48,14 @@ export type TransportOptions = {
   sessionId?: string;
 };
 
+export interface QuerySystemPromptPreset {
+  type: 'preset';
+  preset: 'qwen_code';
+  append?: string;
+}
+
+export type QuerySystemPrompt = string | QuerySystemPromptPreset;
+
 type ToolInput = Record<string, unknown>;
 
 export type CanUseTool = (
@@ -226,6 +236,16 @@ export interface QueryOptions {
    */
   env?: Record<string, string>;
 
+  /**
+   * System prompt configuration for the Qwen CLI session.
+   *
+   * - `string`: fully overrides the main session system prompt
+   * - `{ type: 'preset', preset: 'qwen_code', append?: string }`:
+   *   uses Qwen Code's built-in prompt as the base and optionally appends extra
+   *   instructions for the main session
+   */
+  systemPrompt?: QuerySystemPrompt;
+
   /**
    * Permission mode controlling how the SDK handles tool execution approval.
    *
diff --git a/packages/sdk-typescript/test/unit/ProcessTransport.test.ts b/packages/sdk-typescript/test/unit/ProcessTransport.test.ts
index 327166528..b5e6c19c0 100644
--- a/packages/sdk-typescript/test/unit/ProcessTransport.test.ts
+++ b/packages/sdk-typescript/test/unit/ProcessTransport.test.ts
@@ -196,6 +196,84 @@ describe('ProcessTransport', () => {
       );
     });
 
+    it('should pass systemPrompt through --system-prompt', () => {
+      mockPrepareSpawnInfo.mockReturnValue({
+        command: 'qwen',
+        args: [],
+        type: 'native',
+        originalInput: 'qwen',
+      });
+      mockSpawn.mockReturnValue(mockChildProcess);
+
+      const options: TransportOptions = {
+        pathToQwenExecutable: 'qwen',
+        systemPrompt: 'You are a test system prompt.',
+      };
+
+      new ProcessTransport(options);
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'qwen',
+        expect.arrayContaining([
+          '--system-prompt',
+          'You are a test system prompt.',
+        ]),
+        expect.any(Object),
+      );
+    });
+
+    it('should pass appendSystemPrompt through --append-system-prompt', () => {
+      mockPrepareSpawnInfo.mockReturnValue({
+        command: 'qwen',
+        args: [],
+        type: 'native',
+        originalInput: 'qwen',
+      });
+      mockSpawn.mockReturnValue(mockChildProcess);
+
+      const options: TransportOptions = {
+        pathToQwenExecutable: 'qwen',
+        appendSystemPrompt: 'Be extra concise.',
+      };
+
+      new ProcessTransport(options);
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'qwen',
+        expect.arrayContaining(['--append-system-prompt', 'Be extra concise.']),
+        expect.any(Object),
+      );
+    });
+
+    it('should pass both systemPrompt and appendSystemPrompt when provided', () => {
+      mockPrepareSpawnInfo.mockReturnValue({
+        command: 'qwen',
+        args: [],
+        type: 'native',
+        originalInput: 'qwen',
+      });
+      mockSpawn.mockReturnValue(mockChildProcess);
+
+      const options: TransportOptions = {
+        pathToQwenExecutable: 'qwen',
+        systemPrompt: 'Override prompt',
+        appendSystemPrompt: 'Append prompt',
+      };
+
+      new ProcessTransport(options);
+
+      expect(mockSpawn).toHaveBeenCalledWith(
+        'qwen',
+        expect.arrayContaining([
+          '--system-prompt',
+          'Override prompt',
+          '--append-system-prompt',
+          'Append prompt',
+        ]),
+        expect.any(Object),
+      );
+    });
+
     it('should include --resume argument when provided', () => {
       mockPrepareSpawnInfo.mockReturnValue({
         command: 'qwen',
diff --git a/packages/sdk-typescript/test/unit/createQuery.test.ts b/packages/sdk-typescript/test/unit/createQuery.test.ts
new file mode 100644
index 000000000..66b48e938
--- /dev/null
+++ b/packages/sdk-typescript/test/unit/createQuery.test.ts
@@ -0,0 +1,97 @@
+/**
+ * Unit tests for query() option mapping
+ */
+
+import { describe, expect, it, vi, beforeEach } from 'vitest';
+import type { QueryOptions } from '../../src/query/createQuery.js';
+
+const mockProcessTransport = vi.fn();
+const mockQuery = vi.fn();
+const mockPrepareSpawnInfo = vi.fn();
+
+vi.mock('../../src/transport/ProcessTransport.js', () => ({
+  ProcessTransport: mockProcessTransport,
+}));
+
+vi.mock('../../src/query/Query.js', () => ({
+  Query: mockQuery,
+}));
+
+vi.mock('../../src/utils/cliPath.js', () => ({
+  prepareSpawnInfo: mockPrepareSpawnInfo,
+}));
+
+describe('query()', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+
+    mockPrepareSpawnInfo.mockReturnValue(undefined);
+    mockProcessTransport.mockImplementation(() => ({
+      write: vi.fn(),
+      readMessages: vi.fn(),
+      close: vi.fn(),
+      waitForExit: vi.fn(),
+      endInput: vi.fn(),
+      exitError: null,
+    }));
+    mockQuery.mockImplementation(() => ({
+      initialized: Promise.resolve(),
+      getSessionId: () => 'test-session-id',
+      streamInput: vi.fn(),
+    }));
+  });
+
+  it('maps string systemPrompt to TransportOptions.systemPrompt', async () => {
+    const { query } = await import('../../src/query/createQuery.js');
+
+    query({
+      prompt: 'hello',
+      options: {
+        systemPrompt: 'You are a strict reviewer.',
+      } satisfies QueryOptions,
+    });
+
+    expect(mockProcessTransport).toHaveBeenCalledWith(
+      expect.objectContaining({
+        systemPrompt: 'You are a strict reviewer.',
+      }),
+    );
+  });
+
+  it('maps preset systemPrompt append to TransportOptions.appendSystemPrompt', async () => {
+    const { query } = await import('../../src/query/createQuery.js');
+
+    query({
+      prompt: 'hello',
+      options: {
+        systemPrompt: {
+          type: 'preset',
+          preset: 'qwen_code',
+          append: 'Be terse.',
+        },
+      } satisfies QueryOptions,
+    });
+
+    const transportOptions = mockProcessTransport.mock.calls[0]?.[0];
+
+    expect(transportOptions.appendSystemPrompt).toBe('Be terse.');
+    expect(transportOptions.systemPrompt).toBeUndefined();
+  });
+
+  it('rejects non-qwen preset names at runtime validation', async () => {
+    const { query } = await import('../../src/query/createQuery.js');
+
+    expect(() =>
+      query({
+        prompt: 'hello',
+        options: {
+          systemPrompt: {
+            type: 'preset',
+            preset: 'claude_code',
+            append: 'Be terse.',
+          } as never,
+        } satisfies QueryOptions,
+      }),
+    ).toThrow(/systemPrompt/);
+  });
+});
diff --git a/packages/test-utils/package.json b/packages/test-utils/package.json
index ec41df8fb..d4d5c1d85 100644
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/qwen-code-test-utils",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "private": true,
   "main": "src/index.ts",
   "license": "Apache-2.0",
diff --git a/packages/vscode-ide-companion/package.json b/packages/vscode-ide-companion/package.json
index f344a48e0..a7c18ab4b 100644
--- a/packages/vscode-ide-companion/package.json
+++ b/packages/vscode-ide-companion/package.json
@@ -2,7 +2,7 @@
   "name": "qwen-code-vscode-ide-companion",
   "displayName": "Qwen Code Companion",
   "description": "Enable Qwen Code with direct access to your VS Code workspace.",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "publisher": "qwenlm",
   "icon": "assets/icon.png",
   "repository": {
diff --git a/packages/web-templates/package.json b/packages/web-templates/package.json
index 3cb48bdc4..fbedb34d0 100644
--- a/packages/web-templates/package.json
+++ b/packages/web-templates/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/web-templates",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "description": "Web templates bundled as embeddable JS/CSS strings",
   "repository": {
     "type": "git",
diff --git a/packages/webui/package.json b/packages/webui/package.json
index 1f1df2e72..da5a463ab 100644
--- a/packages/webui/package.json
+++ b/packages/webui/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qwen-code/webui",
-  "version": "0.12.5",
+  "version": "0.13.0",
   "description": "Shared UI components for Qwen Code packages",
   "type": "module",
   "main": "./dist/index.cjs",