diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
index a1fce9231..7aee9b928 100644
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@@ -553,25 +553,26 @@ For authentication-related variables (like `OPENAI_*`) and the recommended `.qwe
 
 ### Environment Variables Table
 
-| Variable                       | Description                                                                                                                                                                                                                                                                    | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `QWEN_TELEMETRY_ENABLED`       | Set to `true` or `1` to enable telemetry. Any other value is treated as disabling it.                                                                                                                                                                                          | Overrides the `telemetry.enabled` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| `QWEN_TELEMETRY_TARGET`        | Sets the telemetry target (`local` or `gcp`).                                                                                                                                                                                                                                  | Overrides the `telemetry.target` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| `QWEN_TELEMETRY_OTLP_ENDPOINT` | Sets the OTLP endpoint for telemetry.                                                                                                                                                                                                                                          | Overrides the `telemetry.otlpEndpoint` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| `QWEN_TELEMETRY_OTLP_PROTOCOL` | Sets the OTLP protocol (`grpc` or `http`).                                                                                                                                                                                                                                     | Overrides the `telemetry.otlpProtocol` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| `QWEN_TELEMETRY_LOG_PROMPTS`   | Set to `true` or `1` to enable or disable logging of user prompts. Any other value is treated as disabling it.                                                                                                                                                                 | Overrides the `telemetry.logPrompts` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| `QWEN_TELEMETRY_OUTFILE`       | Sets the file path to write telemetry to when the target is `local`.                                                                                                                                                                                                           | Overrides the `telemetry.outfile` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| `QWEN_TELEMETRY_USE_COLLECTOR` | Set to `true` or `1` to enable or disable using an external OTLP collector. Any other value is treated as disabling it.                                                                                                                                                        | Overrides the `telemetry.useCollector` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| `QWEN_SANDBOX`                 | Alternative to the `sandbox` setting in `settings.json`.                                                                                                                                                                                                                       | Accepts `true`, `false`, `docker`, `podman`, or a custom command string.                                                                                                                                                                                                                                                                                                                                                                                                           |
-| `QWEN_SANDBOX_IMAGE`           | Overrides sandbox image selection for Docker/Podman.                                                                                                                                                                                                                           | Takes precedence over `tools.sandboxImage`.                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| `SEATBELT_PROFILE`             | (macOS specific) Switches the Seatbelt (`sandbox-exec`) profile on macOS.                                                                                                                                                                                                      | `permissive-open`: (Default) Restricts writes to the project folder (and a few other folders, see `packages/cli/src/utils/sandbox-macos-permissive-open.sb`) but allows other operations. `strict`: Uses a strict profile that declines operations by default. `<profile_name>`: Uses a custom profile. To define a custom profile, create a file named `sandbox-macos-<profile_name>.sb` in your project's `.qwen/` directory (e.g., `my-project/.qwen/sandbox-macos-custom.sb`). |
-| `DEBUG` or `DEBUG_MODE`        | (often used by underlying libraries or the CLI itself) Set to `true` or `1` to enable verbose debug logging, which can be helpful for troubleshooting.                                                                                                                         | **Note:** These variables are automatically excluded from project `.env` files by default to prevent interference with the CLI behavior. Use `.qwen/.env` files if you need to set these for Qwen Code specifically.                                                                                                                                                                                                                                                               |
-| `NO_COLOR`                     | Set to any value to disable all color output in the CLI.                                                                                                                                                                                                                       |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| `CLI_TITLE`                    | Set to a string to customize the title of the CLI.                                                                                                                                                                                                                             |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| `CODE_ASSIST_ENDPOINT`         | Specifies the endpoint for the code assist server.                                                                                                                                                                                                                             | This is useful for development and testing.                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| `QWEN_CODE_MAX_OUTPUT_TOKENS`  | Overrides the default maximum output tokens per response. When not set, Qwen Code uses an adaptive strategy: starts with 8K tokens and automatically retries with 64K if the response is truncated. Set this to a specific value (e.g., `16000`) to use a fixed limit instead. | Takes precedence over the capped default (8K) but is overridden by `samplingParams.max_tokens` in settings. Disables automatic escalation when set. Example: `export QWEN_CODE_MAX_OUTPUT_TOKENS=16000`                                                                                                                                                                                                                                                                            |
-| `TAVILY_API_KEY`               | Your API key for the Tavily web search service.                                                                                                                                                                                                                                | Used to enable the `web_search` tool functionality. Example: `export TAVILY_API_KEY="tvly-your-api-key-here"`                                                                                                                                                                                                                                                                                                                                                                      |
-| `QWEN_CODE_PROFILE_STARTUP`    | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations.                                                                                                                                           | Only active inside the sandbox child process. Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1`                                                                                                                                                                                                                                                                                                                                                            |
+| Variable                       | Description                                                                                                                                                                                                                                                                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| ------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `QWEN_TELEMETRY_ENABLED`       | Set to `true` or `1` to enable telemetry. Any other value is treated as disabling it.                                                                                                                                                                                             | Overrides the `telemetry.enabled` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| `QWEN_TELEMETRY_TARGET`        | Sets the telemetry target (`local` or `gcp`).                                                                                                                                                                                                                                     | Overrides the `telemetry.target` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| `QWEN_TELEMETRY_OTLP_ENDPOINT` | Sets the OTLP endpoint for telemetry.                                                                                                                                                                                                                                             | Overrides the `telemetry.otlpEndpoint` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `QWEN_TELEMETRY_OTLP_PROTOCOL` | Sets the OTLP protocol (`grpc` or `http`).                                                                                                                                                                                                                                        | Overrides the `telemetry.otlpProtocol` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `QWEN_TELEMETRY_LOG_PROMPTS`   | Set to `true` or `1` to enable or disable logging of user prompts. Any other value is treated as disabling it.                                                                                                                                                                    | Overrides the `telemetry.logPrompts` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| `QWEN_TELEMETRY_OUTFILE`       | Sets the file path to write telemetry to when the target is `local`.                                                                                                                                                                                                              | Overrides the `telemetry.outfile` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| `QWEN_TELEMETRY_USE_COLLECTOR` | Set to `true` or `1` to enable or disable using an external OTLP collector. Any other value is treated as disabling it.                                                                                                                                                           | Overrides the `telemetry.useCollector` setting.                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `QWEN_SANDBOX`                 | Alternative to the `sandbox` setting in `settings.json`.                                                                                                                                                                                                                          | Accepts `true`, `false`, `docker`, `podman`, or a custom command string.                                                                                                                                                                                                                                                                                                                                                                                                           |
+| `QWEN_SANDBOX_IMAGE`           | Overrides sandbox image selection for Docker/Podman.                                                                                                                                                                                                                              | Takes precedence over `tools.sandboxImage`.                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `SEATBELT_PROFILE`             | (macOS specific) Switches the Seatbelt (`sandbox-exec`) profile on macOS.                                                                                                                                                                                                         | `permissive-open`: (Default) Restricts writes to the project folder (and a few other folders, see `packages/cli/src/utils/sandbox-macos-permissive-open.sb`) but allows other operations. `strict`: Uses a strict profile that declines operations by default. `<profile_name>`: Uses a custom profile. To define a custom profile, create a file named `sandbox-macos-<profile_name>.sb` in your project's `.qwen/` directory (e.g., `my-project/.qwen/sandbox-macos-custom.sb`). |
+| `DEBUG` or `DEBUG_MODE`        | (often used by underlying libraries or the CLI itself) Set to `true` or `1` to enable verbose debug logging, which can be helpful for troubleshooting.                                                                                                                            | **Note:** These variables are automatically excluded from project `.env` files by default to prevent interference with the CLI behavior. Use `.qwen/.env` files if you need to set these for Qwen Code specifically.                                                                                                                                                                                                                                                               |
+| `NO_COLOR`                     | Set to any value to disable all color output in the CLI.                                                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `CLI_TITLE`                    | Set to a string to customize the title of the CLI.                                                                                                                                                                                                                                |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `CODE_ASSIST_ENDPOINT`         | Specifies the endpoint for the code assist server.                                                                                                                                                                                                                                | This is useful for development and testing.                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `QWEN_CODE_MAX_OUTPUT_TOKENS`  | Overrides the default maximum output tokens per response. When not set, Qwen Code uses an adaptive strategy: starts with 8K tokens and automatically retries with 64K if the response is truncated. Set this to a specific value (e.g., `16000`) to use a fixed limit instead.    | Takes precedence over the capped default (8K) but is overridden by `samplingParams.max_tokens` in settings. Disables automatic escalation when set. Example: `export QWEN_CODE_MAX_OUTPUT_TOKENS=16000`                                                                                                                                                                                                                                                                            |
+| `TAVILY_API_KEY`               | Your API key for the Tavily web search service.                                                                                                                                                                                                                                   | Used to enable the `web_search` tool functionality. Example: `export TAVILY_API_KEY="tvly-your-api-key-here"`                                                                                                                                                                                                                                                                                                                                                                      |
+| `QWEN_CODE_UNATTENDED_RETRY`   | Set to `true` or `1` to enable persistent retry mode. When enabled, transient API capacity errors (HTTP 429 Rate Limit and 529 Overloaded) are retried indefinitely with exponential backoff (capped at 5 minutes per retry) and heartbeat keepalives every 30 seconds on stderr. | Designed for CI/CD pipelines and background automation where long-running tasks should survive temporary API outages. Must be set explicitly — `CI=true` alone does **not** activate this mode. See [Headless Mode](../features/headless#persistent-retry-mode) for details. Example: `export QWEN_CODE_UNATTENDED_RETRY=1`                                                                                                                                                        |
+| `QWEN_CODE_PROFILE_STARTUP`    | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations.                                                                                                                                              | Only active inside the sandbox child process. Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1`                                                                                                                                                                                                                                                                                                                                                            |
 
 ## Command-Line Arguments
 
diff --git a/docs/users/features/headless.md b/docs/users/features/headless.md
index 12172f121..e6e0492d5 100644
--- a/docs/users/features/headless.md
+++ b/docs/users/features/headless.md
@@ -310,6 +310,67 @@ echo "Recent usage trends:"
 tail -5 usage.log
 ```
 
+## Persistent Retry Mode
+
+When Qwen Code runs in CI/CD pipelines or as a background daemon, a brief API outage (rate limiting or overload) should not kill a multi-hour task. **Persistent retry mode** makes Qwen Code retry transient API errors indefinitely until the service recovers.
+
+### How it works
+
+- **Transient errors only**: HTTP 429 (Rate Limit) and 529 (Overloaded) are retried indefinitely. Other errors (400, 500, etc.) still fail normally.
+- **Exponential backoff with cap**: Retry delays grow exponentially but are capped at **5 minutes** per retry.
+- **Heartbeat keepalive**: During long waits, a status line is printed to stderr every **30 seconds** to prevent CI runners from killing the process due to inactivity.
+- **Graceful degradation**: Non-transient errors and interactive mode are completely unaffected.
+
+### Activation
+
+Set the `QWEN_CODE_UNATTENDED_RETRY` environment variable to `true` or `1` (strict match, case-sensitive):
+
+```bash
+export QWEN_CODE_UNATTENDED_RETRY=1
+```
+
+> [!important]
+> Persistent retry requires an **explicit opt-in**. `CI=true` alone does **not** activate it — silently turning a fast-fail CI job into an infinite-wait job would be dangerous. Always set `QWEN_CODE_UNATTENDED_RETRY` explicitly in your pipeline configuration.
+
+### Examples
+
+#### GitHub Actions
+
+```yaml
+- name: Automated code review
+  env:
+    QWEN_CODE_UNATTENDED_RETRY: '1'
+  run: |
+    qwen -p "Review all files in src/ for security issues" \
+      --output-format json \
+      --yolo > review.json
+```
+
+#### Overnight batch processing
+
+```bash
+export QWEN_CODE_UNATTENDED_RETRY=1
+qwen -p "Migrate all callback-style functions to async/await in src/" --yolo
+```
+
+#### Background daemon
+
+```bash
+QWEN_CODE_UNATTENDED_RETRY=1 nohup qwen -p "Audit all dependencies for known CVEs" \
+  --output-format json > audit.json 2> audit.log &
+```
+
+### Monitoring
+
+During persistent retry, heartbeat messages are printed to **stderr**:
+
+```
+[qwen-code] Waiting for API capacity... attempt 3, retry in 45s
+[qwen-code] Waiting for API capacity... attempt 3, retry in 15s
+```
+
+These messages keep CI runners alive and let you monitor progress. They do not appear in stdout, so JSON output piped to other tools remains clean.
+
 ## Resources
 
 - [CLI Configuration](../configuration/settings#command-line-arguments) - Complete configuration guide
diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts
index df8d82cf9..a760a33ca 100644
--- a/packages/core/src/core/baseLlmClient.test.ts
+++ b/packages/core/src/core/baseLlmClient.test.ts
@@ -39,6 +39,7 @@ vi.mock('../utils/generateContentResponseUtilities.js', () => ({
 
 vi.mock('../utils/retry.js', () => ({
   retryWithBackoff: vi.fn(async (fn) => await fn()),
+  isUnattendedMode: vi.fn(() => false),
 }));
 
 const mockGenerateContent = vi.fn();
diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts
index 53df44fa5..a7ac0402d 100644
--- a/packages/core/src/core/baseLlmClient.ts
+++ b/packages/core/src/core/baseLlmClient.ts
@@ -17,7 +17,7 @@ import type { Config } from '../config/config.js';
 import type { ContentGenerator } from './contentGenerator.js';
 import { reportError } from '../utils/errorReporting.js';
 import { getErrorMessage } from '../utils/errors.js';
-import { retryWithBackoff } from '../utils/retry.js';
+import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js';
 import { getFunctionCalls } from '../utils/generateContentResponseUtilities.js';
 
 const DEFAULT_MAX_ATTEMPTS = 7;
@@ -117,6 +117,13 @@ export class BaseLlmClient {
 
       const result = await retryWithBackoff(apiCall, {
         maxAttempts: maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
+        persistentMode: isUnattendedMode(),
+        signal: abortSignal,
+        heartbeatFn: (info) => {
+          process.stderr.write(
+            `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`,
+          );
+        },
       });
 
       const functionCalls = getFunctionCalls(result);
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index c58b3db73..8c648f59d 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -77,7 +77,7 @@ import { getErrorMessage } from '../utils/errors.js';
 import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
 import { flatMapTextParts } from '../utils/partUtils.js';
 import { promptIdContext } from '../utils/promptIdContext.js';
-import { retryWithBackoff } from '../utils/retry.js';
+import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js';
 
 // Hook types and utilities
 import {
@@ -1137,6 +1137,13 @@ export class GeminiClient {
       };
       const result = await retryWithBackoff(apiCall, {
         authType: this.config.getContentGeneratorConfig()?.authType,
+        persistentMode: isUnattendedMode(),
+        signal: abortSignal,
+        heartbeatFn: (info) => {
+          process.stderr.write(
+            `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`,
+          );
+        },
       });
       return result;
     } catch (error: unknown) {
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 932fc7301..ff76eb5c6 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -17,7 +17,7 @@ import type {
   GenerateContentResponseUsageMetadata,
 } from '@google/genai';
 import { createUserContent, FinishReason } from '@google/genai';
-import { retryWithBackoff } from '../utils/retry.js';
+import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js';
 import { getErrorStatus } from '../utils/errors.js';
 import { createDebugLogger } from '../utils/debugLogger.js';
 import { parseAndFormatApiError } from '../utils/errorParsing.js';
@@ -726,6 +726,13 @@ export class GeminiChat {
         return false;
       },
       authType: this.config.getContentGeneratorConfig()?.authType,
+      persistentMode: isUnattendedMode(),
+      signal: params.config?.abortSignal,
+      heartbeatFn: (info) => {
+        process.stderr.write(
+          `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`,
+        );
+      },
     });
 
     return this.processStreamResponse(model, streamResponse);
diff --git a/packages/core/src/utils/editor.test.ts b/packages/core/src/utils/editor.test.ts
index 851336941..14a88686f 100644
--- a/packages/core/src/utils/editor.test.ts
+++ b/packages/core/src/utils/editor.test.ts
@@ -462,7 +462,9 @@ describe('editor utils', () => {
           throw new Error(); // CLI not found
         });
         // Accept any path containing Zed.app
-        (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app'));
+        (existsSync as Mock).mockImplementation((path: string) =>
+          path.includes('Zed.app'),
+        );
 
         const mockSpawnOn = vi.fn((event, cb) => {
           if (event === 'close') {
@@ -716,7 +718,9 @@ describe('editor utils', () => {
           throw new Error(); // CLI not found
         });
         // Accept any path containing Zed.app (the CLI check will be for Contents/MacOS/cli)
-        (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app'));
+        (existsSync as Mock).mockImplementation((path: string) =>
+          path.includes('Zed.app'),
+        );
 
         const diffCommand = getDiffCommand('old.txt', 'new.txt', 'zed');
         expect(diffCommand).not.toBeNull();
@@ -759,7 +763,9 @@ describe('editor utils', () => {
           throw new Error(); // CLI not found
         });
         // Accept any path containing Zed.app
-        (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app'));
+        (existsSync as Mock).mockImplementation((path: string) =>
+          path.includes('Zed.app'),
+        );
 
         const diffCommand = getDiffCommand('old.txt', 'new.txt', 'zed');
         expect(diffCommand).not.toBeNull();
diff --git a/packages/core/src/utils/editor.ts b/packages/core/src/utils/editor.ts
index d5d22623a..c19963169 100644
--- a/packages/core/src/utils/editor.ts
+++ b/packages/core/src/utils/editor.ts
@@ -125,7 +125,7 @@ export function getEditorExecutable(editorType: EditorType): string | null {
     return found;
   }
 
-    // Special handling for Zed on macOS: check app bundle CLI as fallback
+  // Special handling for Zed on macOS: check app bundle CLI as fallback
   if (editorType === 'zed' && process.platform === 'darwin') {
     for (const appPath of getZedAppPaths()) {
       const cliPath = join(appPath, 'Contents/MacOS/cli');
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index d9aaa5ba1..0fd247860 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -5,9 +5,21 @@
  */
 
 /* eslint-disable @typescript-eslint/no-explicit-any */
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  describe,
+  it,
+  expect,
+  vi,
+  beforeEach,
+  afterEach,
+  afterAll,
+} from 'vitest';
 import type { HttpError } from './retry.js';
-import { retryWithBackoff } from './retry.js';
+import {
+  retryWithBackoff,
+  isTransientCapacityError,
+  isUnattendedMode,
+} from './retry.js';
 import { getErrorStatus } from './errors.js';
 import { setSimulate429 } from './testUtils.js';
 import { AuthType } from '../core/contentGenerator.js';
@@ -462,6 +474,447 @@ describe('retryWithBackoff', () => {
   });
 });
 
+describe('isTransientCapacityError', () => {
+  it('should return true for 429 errors', () => {
+    const error = { status: 429 };
+    expect(isTransientCapacityError(error)).toBe(true);
+  });
+
+  it('should return true for 529 errors', () => {
+    const error = { status: 529 };
+    expect(isTransientCapacityError(error)).toBe(true);
+  });
+
+  it('should return false for 500 errors', () => {
+    const error = { status: 500 };
+    expect(isTransientCapacityError(error)).toBe(false);
+  });
+
+  it('should return false for 400 errors', () => {
+    const error = { status: 400 };
+    expect(isTransientCapacityError(error)).toBe(false);
+  });
+
+  it('should return false for errors without status', () => {
+    expect(isTransientCapacityError(new Error('generic'))).toBe(false);
+    expect(isTransientCapacityError(null)).toBe(false);
+  });
+});
+
+describe('isUnattendedMode', () => {
+  const originalEnv = process.env;
+
+  beforeEach(() => {
+    process.env = { ...originalEnv };
+    delete process.env['QWEN_CODE_UNATTENDED_RETRY'];
+  });
+
+  afterAll(() => {
+    process.env = originalEnv;
+  });
+
+  it('should return true when QWEN_CODE_UNATTENDED_RETRY=1', () => {
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = '1';
+    expect(isUnattendedMode()).toBe(true);
+  });
+
+  it('should return true when QWEN_CODE_UNATTENDED_RETRY=true', () => {
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'true';
+    expect(isUnattendedMode()).toBe(true);
+  });
+
+  it('should return false when no env vars are set', () => {
+    expect(isUnattendedMode()).toBe(false);
+  });
+
+  it('should NOT activate on CI=true alone', () => {
+    process.env['CI'] = 'true';
+    expect(isUnattendedMode()).toBe(false);
+  });
+
+  it('should return false for non-matching values', () => {
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = '0';
+    expect(isUnattendedMode()).toBe(false);
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'false';
+    expect(isUnattendedMode()).toBe(false);
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = '';
+    expect(isUnattendedMode()).toBe(false);
+  });
+
+  it('should use strict matching consistent with parseBooleanEnvFlag', () => {
+    // Only 'true' and '1' are accepted — matches project convention
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'TRUE';
+    expect(isUnattendedMode()).toBe(false); // strict: not 'true'
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = ' 1 ';
+    expect(isUnattendedMode()).toBe(false); // strict: not '1'
+    process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'yes';
+    expect(isUnattendedMode()).toBe(false);
+  });
+});
+
+describe('retryWithBackoff - persistent mode', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+    setSimulate429(false);
+    console.warn = vi.fn();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.useRealTimers();
+  });
+
+  it('should retry indefinitely for 429 errors in persistent mode', async () => {
+    // Fail 10 times with 429, then succeed
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 10) {
+        const error: HttpError = new Error('Rate limited');
+        error.status = 429;
+        throw error;
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3, // Would normally fail after 3
+      initialDelayMs: 10,
+      persistentMode: true,
+    });
+
+    await vi.runAllTimersAsync();
+    const result = await promise;
+
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(11); // 10 failures + 1 success
+  });
+
+  it('should retry indefinitely for 529 errors in persistent mode', async () => {
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 8) {
+        const error: HttpError = new Error('Overloaded');
+        error.status = 529;
+        throw error;
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10,
+      persistentMode: true,
+    });
+
+    await vi.runAllTimersAsync();
+    const result = await promise;
+
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(9);
+  });
+
+  it('should NOT retry indefinitely for 500 errors in persistent mode', async () => {
+    const fn = vi.fn(async () => {
+      const error: HttpError = new Error('Internal Server Error');
+      error.status = 500;
+      throw error;
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10,
+      persistentMode: true,
+    });
+
+    // eslint-disable-next-line vitest/valid-expect
+    const assertionPromise = expect(promise).rejects.toThrow(
+      'Internal Server Error',
+    );
+    await vi.runAllTimersAsync();
+    await assertionPromise;
+
+    // Should stop at maxAttempts for non-transient errors
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('should cap single retry backoff at persistentMaxBackoffMs', async () => {
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 20) {
+        const error: HttpError = new Error('Rate limited');
+        error.status = 429;
+        throw error;
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      persistentMode: true,
+      persistentMaxBackoffMs: 5000, // 5 seconds cap for test
+    });
+
+    await vi.runAllTimersAsync();
+    await promise;
+
+    // Jitter is re-capped, so no delay should exceed the cap itself
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    for (const d of delays) {
+      expect(d).toBeLessThanOrEqual(5000 + 1); // cap + rounding tolerance
+    }
+  });
+
+  it('should call heartbeatFn during persistent retry waits', async () => {
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 2) {
+        const error: HttpError = new Error('Rate limited');
+        error.status = 429;
+        throw error;
+      }
+      return 'success';
+    });
+
+    const heartbeatFn = vi.fn();
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      persistentMode: true,
+      heartbeatIntervalMs: 30, // Short interval for test
+      heartbeatFn,
+    });
+
+    await vi.runAllTimersAsync();
+    await promise;
+
+    // Heartbeat should have been called at least once during waits > heartbeatInterval
+    expect(heartbeatFn).toHaveBeenCalled();
+    // Verify heartbeat info structure
+    const call = heartbeatFn.mock.calls[0][0];
+    expect(call).toHaveProperty('attempt');
+    expect(call).toHaveProperty('remainingMs');
+    expect(call).toHaveProperty('error');
+  });
+
+  it('should abort persistent retry when signal is aborted', async () => {
+    const controller = new AbortController();
+    const fn = vi.fn(async () => {
+      const error: HttpError = new Error('Rate limited');
+      error.status = 429;
+      throw error;
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10000, // Long delay so abort happens during sleep
+      persistentMode: true,
+      heartbeatIntervalMs: 50,
+      signal: controller.signal,
+    });
+
+    // Abort after the first retry starts waiting
+    setTimeout(() => controller.abort(), 100);
+
+    // eslint-disable-next-line vitest/valid-expect
+    const assertionPromise = expect(promise).rejects.toThrow(
+      'Retry aborted by signal',
+    );
+    await vi.runAllTimersAsync();
+    await assertionPromise;
+  });
+
+  it('should respect shouldRetryOnError even in persistent mode', async () => {
+    // Caller explicitly says "don't retry 429" — persistent mode must obey
+    const fn = vi.fn(async () => {
+      const error: HttpError = new Error('Rate limited');
+      error.status = 429;
+      throw error;
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10,
+      persistentMode: true,
+      shouldRetryOnError: () => false, // force fast-fail
+    });
+
+    // eslint-disable-next-line vitest/valid-expect
+    const assertionPromise = expect(promise).rejects.toThrow('Rate limited');
+    await vi.runAllTimersAsync();
+    await assertionPromise;
+
+    // Should fail on first attempt — shouldRetryOnError trumps persistent mode
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+
+  it('should not infinite-loop when heartbeatIntervalMs is 0', async () => {
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 2) {
+        const error: HttpError = new Error('Rate limited');
+        error.status = 429;
+        throw error;
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10,
+      persistentMode: true,
+      heartbeatIntervalMs: 0, // Would cause infinite loop without Math.max(1, ...)
+    });
+
+    await vi.runAllTimersAsync();
+    const result = await promise;
+
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+
+  it('should not affect normal mode behavior when persistentMode is false', async () => {
+    const fn = vi.fn(async () => {
+      const error: HttpError = new Error('Rate limited');
+      error.status = 429;
+      throw error;
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 10,
+      persistentMode: false,
+    });
+
+    // eslint-disable-next-line vitest/valid-expect
+    const assertionPromise = expect(promise).rejects.toThrow('Rate limited');
+    await vi.runAllTimersAsync();
+    await assertionPromise;
+
+    expect(fn).toHaveBeenCalledTimes(3);
+  });
+});
+
+describe('retryWithBackoff - Retry-After handling in persistent mode', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+    setSimulate429(false);
+    console.warn = vi.fn();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.useRealTimers();
+  });
+
+  // Helper: create a 429 error with Retry-After header
+  function make429WithRetryAfter(seconds: number): HttpError {
+    const error: HttpError & { response: { headers: Record<string, string> } } =
+      Object.assign(new Error('Rate limited'), {
+        status: 429,
+        response: { headers: { 'retry-after': String(seconds) } },
+      });
+    return error;
+  }
+
+  it('should respect Retry-After and NOT cap at maxBackoff', async () => {
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 1) {
+        throw make429WithRetryAfter(600); // server says wait 10 minutes
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      persistentMode: true,
+      persistentMaxBackoffMs: 5000, // 5 seconds — Retry-After must NOT be capped to this
+    });
+
+    await vi.runAllTimersAsync();
+    await promise;
+
+    // The first retry delay should be ~600s (600000ms), not 5s (5000ms)
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    const firstRetryDelay = delays[0];
+    expect(firstRetryDelay).toBeGreaterThan(5000); // NOT capped at maxBackoff
+    expect(firstRetryDelay).toBeLessThanOrEqual(600 * 1000); // respects server value
+  });
+
+  it('should cap Retry-After at persistentCapMs', async () => {
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+    let attempts = 0;
+    const fn = vi.fn(async () => {
+      attempts++;
+      if (attempts <= 1) {
+        throw make429WithRetryAfter(100); // server says wait 100s
+      }
+      return 'success';
+    });
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      persistentMode: true,
+      persistentCapMs: 50_000, // absolute cap 50s — less than Retry-After
+    });
+
+    await vi.runAllTimersAsync();
+    await promise;
+
+    // Delay should be capped at persistentCapMs (50s), not the full 100s
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    const firstRetryDelay = delays[0];
+    expect(firstRetryDelay).toBeLessThanOrEqual(50_000 + 1);
+  });
+
+  it('should NOT add jitter to Retry-After delays', async () => {
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+    // Run multiple times to check for jitter variance
+    const observedDelays: number[] = [];
+
+    for (let run = 0; run < 5; run++) {
+      setTimeoutSpy.mockClear();
+      let attempts = 0;
+      const fn = vi.fn(async () => {
+        attempts++;
+        if (attempts <= 1) {
+          throw make429WithRetryAfter(10); // 10 seconds
+        }
+        return 'success';
+      });
+
+      const promise = retryWithBackoff(fn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        persistentMode: true,
+      });
+
+      await vi.runAllTimersAsync();
+      await promise;
+
+      const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+      observedDelays.push(delays[0]);
+    }
+
+    // All delays should be exactly 10000ms — no jitter
+    for (const d of observedDelays) {
+      expect(d).toBe(10_000);
+    }
+  });
+});
+
 describe('getErrorStatus', () => {
   it('should extract status from error.status (OpenAI/Anthropic/Gemini style)', () => {
     expect(getErrorStatus({ status: 429 })).toBe(429);
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index d1c3e7ba6..54b0b6db9 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -12,10 +12,21 @@ import { getErrorStatus } from './errors.js';
 
 const debugLogger = createDebugLogger('RETRY');
 
+// Persistent retry mode constants
+const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000; // 5 minutes — single retry backoff cap
+const PERSISTENT_CAP_MS = 6 * 60 * 60 * 1000; // 6 hours — absolute single wait cap
+const HEARTBEAT_INTERVAL_MS = 30_000; // 30 seconds
+
 export interface HttpError extends Error {
   status?: number;
 }
 
+export interface HeartbeatInfo {
+  attempt: number;
+  remainingMs: number;
+  error: unknown;
+}
+
 export interface RetryOptions {
   maxAttempts: number;
   initialDelayMs: number;
@@ -23,6 +34,13 @@ export interface RetryOptions {
   shouldRetryOnError: (error: Error) => boolean;
   shouldRetryOnContent?: (content: GenerateContentResponse) => boolean;
   authType?: string;
+  // Persistent retry mode options
+  persistentMode?: boolean;
+  persistentMaxBackoffMs?: number;
+  persistentCapMs?: number;
+  heartbeatIntervalMs?: number;
+  heartbeatFn?: (info: HeartbeatInfo) => void;
+  signal?: AbortSignal;
 }
 
 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
@@ -45,6 +63,27 @@ function defaultShouldRetry(error: Error | unknown): boolean {
   );
 }
 
+/**
+ * Determines if an error is a transient capacity error eligible for persistent retry.
+ * Only 429 (Rate Limit) and 529 (Overloaded) qualify — HTTP 500 is excluded
+ * because it may indicate a permanent server bug.
+ */
+export function isTransientCapacityError(error: unknown): boolean {
+  const status = getErrorStatus(error);
+  return status === 429 || status === 529;
+}
+
+/**
+ * Detects whether persistent retry mode is explicitly enabled.
+ * Requires the user to opt in via QWEN_CODE_UNATTENDED_RETRY — we intentionally
+ * do NOT auto-activate on CI=true, because silently turning a fast-fail CI job
+ * into an infinite-wait job would be surprising and dangerous.
+ */
+export function isUnattendedMode(): boolean {
+  const val = process.env['QWEN_CODE_UNATTENDED_RETRY'];
+  return val === 'true' || val === '1';
+}
+
 /**
  * Delays execution for a specified number of milliseconds.
  * @param ms The number of milliseconds to delay.
@@ -54,8 +93,45 @@ function delay(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
 
+/**
+ * Sleeps in chunks, emitting heartbeat callbacks at regular intervals.
+ * Supports AbortSignal for graceful cancellation.
+ */
+async function sleepWithHeartbeat(
+  totalMs: number,
+  ctx: {
+    attempt: number;
+    error: unknown;
+    heartbeatInterval: number;
+    heartbeatFn?: (info: HeartbeatInfo) => void;
+    signal?: AbortSignal;
+  },
+): Promise<void> {
+  let remaining = totalMs;
+
+  while (remaining > 0) {
+    if (ctx.signal?.aborted) {
+      throw new Error('Retry aborted by signal');
+    }
+
+    const chunk = Math.max(1, Math.min(remaining, ctx.heartbeatInterval));
+    await delay(chunk);
+    remaining -= chunk;
+
+    if (remaining > 0 && ctx.heartbeatFn) {
+      ctx.heartbeatFn({
+        attempt: ctx.attempt,
+        remainingMs: remaining,
+        error: ctx.error,
+      });
+    }
+  }
+}
+
 /**
  * Retries a function with exponential backoff and jitter.
+ * Supports persistent retry mode for unattended/CI environments where transient
+ * capacity errors (429/529) should be retried indefinitely rather than failing.
  * @param fn The asynchronous function to retry.
  * @param options Optional retry configuration.
  * @returns A promise that resolves with the result of the function if successful.
@@ -80,12 +156,24 @@ export async function retryWithBackoff<T>(
     authType,
     shouldRetryOnError,
     shouldRetryOnContent,
+    persistentMode,
+    persistentMaxBackoffMs,
+    persistentCapMs,
+    heartbeatIntervalMs,
+    heartbeatFn,
+    signal,
   } = {
     ...DEFAULT_RETRY_OPTIONS,
     ...cleanOptions,
   };
 
+  const persistent = persistentMode ?? false;
+  const maxBackoff = persistentMaxBackoffMs ?? PERSISTENT_MAX_BACKOFF_MS;
+  const capMs = persistentCapMs ?? PERSISTENT_CAP_MS;
+  const heartbeatInterval = heartbeatIntervalMs ?? HEARTBEAT_INTERVAL_MS;
+
   let attempt = 0;
+  let persistentAttempt = 0;
   let currentDelay = initialDelayMs;
 
   while (attempt < maxAttempts) {
@@ -120,31 +208,86 @@ export async function retryWithBackoff<T>(
         );
       }
 
+      // Determine if this error qualifies for persistent retry.
+      // Persistent mode still respects shouldRetryOnError — callers can force
+      // fast-fail even for transient errors if they explicitly return false.
+      const isTransient = isTransientCapacityError(error);
+      const callerAllowsRetry = shouldRetryOnError(error as Error);
+      const shouldPersist = persistent && isTransient && callerAllowsRetry;
+
       // Check if we've exhausted retries or shouldn't retry
-      if (attempt >= maxAttempts || !shouldRetryOnError(error as Error)) {
-        throw error;
+      if (!shouldPersist) {
+        if (attempt >= maxAttempts || !callerAllowsRetry) {
+          throw error;
+        }
       }
 
-      const retryAfterMs =
-        errorStatus === 429 ? getRetryAfterDelayMs(error) : 0;
+      // === Calculate delay ===
+      let delayMs: number;
 
-      if (retryAfterMs > 0) {
-        // Respect Retry-After header if present and parsed
+      if (shouldPersist) {
+        persistentAttempt++;
+
+        // Prefer Retry-After header for 429 errors
+        const retryAfterMs =
+          errorStatus === 429 ? getRetryAfterDelayMs(error) : 0;
+
+        if (retryAfterMs > 0) {
+          // Retry-After is a server-specified wait — respect it, only cap at
+          // the absolute limit (capMs/6h), NOT at maxBackoff (5min).
+          delayMs = Math.min(retryAfterMs, capMs);
+        } else {
+          // Exponential backoff — cap at maxBackoff (5min) then absolute cap
+          delayMs = Math.min(
+            initialDelayMs * Math.pow(2, persistentAttempt - 1),
+            maxBackoff,
+          );
+          delayMs = Math.min(delayMs, capMs);
+
+          // Add jitter (±25%), then re-apply caps so delay never exceeds limits
+          delayMs += delayMs * 0.25 * (Math.random() * 2 - 1);
+          delayMs = Math.min(Math.max(0, delayMs), maxBackoff, capMs);
+        }
+
+        const reportedAttempt = persistentAttempt;
         debugLogger.warn(
-          `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`,
+          `[Persistent] Attempt ${reportedAttempt} failed with status ${errorStatus ?? 'unknown'}. ` +
+            `Retrying in ${Math.ceil(delayMs / 1000)}s...`,
           error,
         );
-        await delay(retryAfterMs);
-        // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time
-        currentDelay = initialDelayMs;
+
+        // Heartbeat sleep — chunked to keep CI alive
+        await sleepWithHeartbeat(delayMs, {
+          attempt: reportedAttempt,
+          error,
+          heartbeatInterval,
+          heartbeatFn,
+          signal,
+        });
+
+        // Clamp attempt so the while-loop never exits
+        if (attempt >= maxAttempts) {
+          attempt = maxAttempts - 1;
+        }
       } else {
-        // Fallback to exponential backoff with jitter
-        logRetryAttempt(attempt, error, errorStatus);
-        // Add jitter: +/- 30% of currentDelay
-        const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
-        const delayWithJitter = Math.max(0, currentDelay + jitter);
-        await delay(delayWithJitter);
-        currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+        // Normal retry path (unchanged behavior)
+        const retryAfterMs =
+          errorStatus === 429 ? getRetryAfterDelayMs(error) : 0;
+
+        if (retryAfterMs > 0) {
+          debugLogger.warn(
+            `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`,
+            error,
+          );
+          await delay(retryAfterMs);
+          currentDelay = initialDelayMs;
+        } else {
+          logRetryAttempt(attempt, error, errorStatus);
+          const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1);
+          const delayWithJitter = Math.max(0, currentDelay + jitter);
+          await delay(delayWithJitter);
+          currentDelay = Math.min(maxDelayMs, currentDelay * 2);
+        }
       }
     }
   }