diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index a1fce9231..7aee9b928 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -553,25 +553,26 @@ For authentication-related variables (like `OPENAI_*`) and the recommended `.qwe ### Environment Variables Table -| Variable | Description | Notes | -| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `QWEN_TELEMETRY_ENABLED` | Set to `true` or `1` to enable telemetry. Any other value is treated as disabling it. | Overrides the `telemetry.enabled` setting. | -| `QWEN_TELEMETRY_TARGET` | Sets the telemetry target (`local` or `gcp`). | Overrides the `telemetry.target` setting. | -| `QWEN_TELEMETRY_OTLP_ENDPOINT` | Sets the OTLP endpoint for telemetry. | Overrides the `telemetry.otlpEndpoint` setting. | -| `QWEN_TELEMETRY_OTLP_PROTOCOL` | Sets the OTLP protocol (`grpc` or `http`). | Overrides the `telemetry.otlpProtocol` setting. | -| `QWEN_TELEMETRY_LOG_PROMPTS` | Set to `true` or `1` to enable or disable logging of user prompts. Any other value is treated as disabling it. | Overrides the `telemetry.logPrompts` setting. | -| `QWEN_TELEMETRY_OUTFILE` | Sets the file path to write telemetry to when the target is `local`. | Overrides the `telemetry.outfile` setting. | -| `QWEN_TELEMETRY_USE_COLLECTOR` | Set to `true` or `1` to enable or disable using an external OTLP collector. Any other value is treated as disabling it. | Overrides the `telemetry.useCollector` setting. | -| `QWEN_SANDBOX` | Alternative to the `sandbox` setting in `settings.json`. | Accepts `true`, `false`, `docker`, `podman`, or a custom command string. | -| `QWEN_SANDBOX_IMAGE` | Overrides sandbox image selection for Docker/Podman. | Takes precedence over `tools.sandboxImage`. | -| `SEATBELT_PROFILE` | (macOS specific) Switches the Seatbelt (`sandbox-exec`) profile on macOS. | `permissive-open`: (Default) Restricts writes to the project folder (and a few other folders, see `packages/cli/src/utils/sandbox-macos-permissive-open.sb`) but allows other operations. `strict`: Uses a strict profile that declines operations by default. ``: Uses a custom profile. To define a custom profile, create a file named `sandbox-macos-.sb` in your project's `.qwen/` directory (e.g., `my-project/.qwen/sandbox-macos-custom.sb`). | -| `DEBUG` or `DEBUG_MODE` | (often used by underlying libraries or the CLI itself) Set to `true` or `1` to enable verbose debug logging, which can be helpful for troubleshooting. | **Note:** These variables are automatically excluded from project `.env` files by default to prevent interference with the CLI behavior. Use `.qwen/.env` files if you need to set these for Qwen Code specifically. | -| `NO_COLOR` | Set to any value to disable all color output in the CLI. | | -| `CLI_TITLE` | Set to a string to customize the title of the CLI. | | -| `CODE_ASSIST_ENDPOINT` | Specifies the endpoint for the code assist server. | This is useful for development and testing. | -| `QWEN_CODE_MAX_OUTPUT_TOKENS` | Overrides the default maximum output tokens per response. When not set, Qwen Code uses an adaptive strategy: starts with 8K tokens and automatically retries with 64K if the response is truncated. Set this to a specific value (e.g., `16000`) to use a fixed limit instead. | Takes precedence over the capped default (8K) but is overridden by `samplingParams.max_tokens` in settings. Disables automatic escalation when set. Example: `export QWEN_CODE_MAX_OUTPUT_TOKENS=16000` | -| `TAVILY_API_KEY` | Your API key for the Tavily web search service. | Used to enable the `web_search` tool functionality. Example: `export TAVILY_API_KEY="tvly-your-api-key-here"` | -| `QWEN_CODE_PROFILE_STARTUP` | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations. | Only active inside the sandbox child process. Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1` | +| Variable | Description | Notes | +| ------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `QWEN_TELEMETRY_ENABLED` | Set to `true` or `1` to enable telemetry. Any other value is treated as disabling it. | Overrides the `telemetry.enabled` setting. | +| `QWEN_TELEMETRY_TARGET` | Sets the telemetry target (`local` or `gcp`). | Overrides the `telemetry.target` setting. | +| `QWEN_TELEMETRY_OTLP_ENDPOINT` | Sets the OTLP endpoint for telemetry. | Overrides the `telemetry.otlpEndpoint` setting. | +| `QWEN_TELEMETRY_OTLP_PROTOCOL` | Sets the OTLP protocol (`grpc` or `http`). | Overrides the `telemetry.otlpProtocol` setting. | +| `QWEN_TELEMETRY_LOG_PROMPTS` | Set to `true` or `1` to enable or disable logging of user prompts. Any other value is treated as disabling it. | Overrides the `telemetry.logPrompts` setting. | +| `QWEN_TELEMETRY_OUTFILE` | Sets the file path to write telemetry to when the target is `local`. | Overrides the `telemetry.outfile` setting. | +| `QWEN_TELEMETRY_USE_COLLECTOR` | Set to `true` or `1` to enable or disable using an external OTLP collector. Any other value is treated as disabling it. | Overrides the `telemetry.useCollector` setting. | +| `QWEN_SANDBOX` | Alternative to the `sandbox` setting in `settings.json`. | Accepts `true`, `false`, `docker`, `podman`, or a custom command string. | +| `QWEN_SANDBOX_IMAGE` | Overrides sandbox image selection for Docker/Podman. | Takes precedence over `tools.sandboxImage`. | +| `SEATBELT_PROFILE` | (macOS specific) Switches the Seatbelt (`sandbox-exec`) profile on macOS. | `permissive-open`: (Default) Restricts writes to the project folder (and a few other folders, see `packages/cli/src/utils/sandbox-macos-permissive-open.sb`) but allows other operations. `strict`: Uses a strict profile that declines operations by default. ``: Uses a custom profile. To define a custom profile, create a file named `sandbox-macos-.sb` in your project's `.qwen/` directory (e.g., `my-project/.qwen/sandbox-macos-custom.sb`). | +| `DEBUG` or `DEBUG_MODE` | (often used by underlying libraries or the CLI itself) Set to `true` or `1` to enable verbose debug logging, which can be helpful for troubleshooting. | **Note:** These variables are automatically excluded from project `.env` files by default to prevent interference with the CLI behavior. Use `.qwen/.env` files if you need to set these for Qwen Code specifically. | +| `NO_COLOR` | Set to any value to disable all color output in the CLI. | | +| `CLI_TITLE` | Set to a string to customize the title of the CLI. | | +| `CODE_ASSIST_ENDPOINT` | Specifies the endpoint for the code assist server. | This is useful for development and testing. | +| `QWEN_CODE_MAX_OUTPUT_TOKENS` | Overrides the default maximum output tokens per response. When not set, Qwen Code uses an adaptive strategy: starts with 8K tokens and automatically retries with 64K if the response is truncated. Set this to a specific value (e.g., `16000`) to use a fixed limit instead. | Takes precedence over the capped default (8K) but is overridden by `samplingParams.max_tokens` in settings. Disables automatic escalation when set. Example: `export QWEN_CODE_MAX_OUTPUT_TOKENS=16000` | +| `TAVILY_API_KEY` | Your API key for the Tavily web search service. | Used to enable the `web_search` tool functionality. Example: `export TAVILY_API_KEY="tvly-your-api-key-here"` | +| `QWEN_CODE_UNATTENDED_RETRY` | Set to `true` or `1` to enable persistent retry mode. When enabled, transient API capacity errors (HTTP 429 Rate Limit and 529 Overloaded) are retried indefinitely with exponential backoff (capped at 5 minutes per retry) and heartbeat keepalives every 30 seconds on stderr. | Designed for CI/CD pipelines and background automation where long-running tasks should survive temporary API outages. Must be set explicitly — `CI=true` alone does **not** activate this mode. See [Headless Mode](../features/headless#persistent-retry-mode) for details. Example: `export QWEN_CODE_UNATTENDED_RETRY=1` | +| `QWEN_CODE_PROFILE_STARTUP` | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations. | Only active inside the sandbox child process. Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1` | ## Command-Line Arguments diff --git a/docs/users/features/headless.md b/docs/users/features/headless.md index 12172f121..e6e0492d5 100644 --- a/docs/users/features/headless.md +++ b/docs/users/features/headless.md @@ -310,6 +310,67 @@ echo "Recent usage trends:" tail -5 usage.log ``` +## Persistent Retry Mode + +When Qwen Code runs in CI/CD pipelines or as a background daemon, a brief API outage (rate limiting or overload) should not kill a multi-hour task. **Persistent retry mode** makes Qwen Code retry transient API errors indefinitely until the service recovers. + +### How it works + +- **Transient errors only**: HTTP 429 (Rate Limit) and 529 (Overloaded) are retried indefinitely. Other errors (400, 500, etc.) still fail normally. +- **Exponential backoff with cap**: Retry delays grow exponentially but are capped at **5 minutes** per retry. +- **Heartbeat keepalive**: During long waits, a status line is printed to stderr every **30 seconds** to prevent CI runners from killing the process due to inactivity. +- **Graceful degradation**: Non-transient errors and interactive mode are completely unaffected. + +### Activation + +Set the `QWEN_CODE_UNATTENDED_RETRY` environment variable to `true` or `1` (strict match, case-sensitive): + +```bash +export QWEN_CODE_UNATTENDED_RETRY=1 +``` + +> [!important] +> Persistent retry requires an **explicit opt-in**. `CI=true` alone does **not** activate it — silently turning a fast-fail CI job into an infinite-wait job would be dangerous. Always set `QWEN_CODE_UNATTENDED_RETRY` explicitly in your pipeline configuration. + +### Examples + +#### GitHub Actions + +```yaml +- name: Automated code review + env: + QWEN_CODE_UNATTENDED_RETRY: '1' + run: | + qwen -p "Review all files in src/ for security issues" \ + --output-format json \ + --yolo > review.json +``` + +#### Overnight batch processing + +```bash +export QWEN_CODE_UNATTENDED_RETRY=1 +qwen -p "Migrate all callback-style functions to async/await in src/" --yolo +``` + +#### Background daemon + +```bash +QWEN_CODE_UNATTENDED_RETRY=1 nohup qwen -p "Audit all dependencies for known CVEs" \ + --output-format json > audit.json 2> audit.log & +``` + +### Monitoring + +During persistent retry, heartbeat messages are printed to **stderr**: + +``` +[qwen-code] Waiting for API capacity... attempt 3, retry in 45s +[qwen-code] Waiting for API capacity... attempt 3, retry in 15s +``` + +These messages keep CI runners alive and let you monitor progress. They do not appear in stdout, so JSON output piped to other tools remains clean. + ## Resources - [CLI Configuration](../configuration/settings#command-line-arguments) - Complete configuration guide diff --git a/packages/core/src/core/baseLlmClient.test.ts b/packages/core/src/core/baseLlmClient.test.ts index df8d82cf9..a760a33ca 100644 --- a/packages/core/src/core/baseLlmClient.test.ts +++ b/packages/core/src/core/baseLlmClient.test.ts @@ -39,6 +39,7 @@ vi.mock('../utils/generateContentResponseUtilities.js', () => ({ vi.mock('../utils/retry.js', () => ({ retryWithBackoff: vi.fn(async (fn) => await fn()), + isUnattendedMode: vi.fn(() => false), })); const mockGenerateContent = vi.fn(); diff --git a/packages/core/src/core/baseLlmClient.ts b/packages/core/src/core/baseLlmClient.ts index 53df44fa5..a7ac0402d 100644 --- a/packages/core/src/core/baseLlmClient.ts +++ b/packages/core/src/core/baseLlmClient.ts @@ -17,7 +17,7 @@ import type { Config } from '../config/config.js'; import type { ContentGenerator } from './contentGenerator.js'; import { reportError } from '../utils/errorReporting.js'; import { getErrorMessage } from '../utils/errors.js'; -import { retryWithBackoff } from '../utils/retry.js'; +import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js'; import { getFunctionCalls } from '../utils/generateContentResponseUtilities.js'; const DEFAULT_MAX_ATTEMPTS = 7; @@ -117,6 +117,13 @@ export class BaseLlmClient { const result = await retryWithBackoff(apiCall, { maxAttempts: maxAttempts ?? DEFAULT_MAX_ATTEMPTS, + persistentMode: isUnattendedMode(), + signal: abortSignal, + heartbeatFn: (info) => { + process.stderr.write( + `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`, + ); + }, }); const functionCalls = getFunctionCalls(result); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index c58b3db73..8c648f59d 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -77,7 +77,7 @@ import { getErrorMessage } from '../utils/errors.js'; import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js'; import { flatMapTextParts } from '../utils/partUtils.js'; import { promptIdContext } from '../utils/promptIdContext.js'; -import { retryWithBackoff } from '../utils/retry.js'; +import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js'; // Hook types and utilities import { @@ -1137,6 +1137,13 @@ export class GeminiClient { }; const result = await retryWithBackoff(apiCall, { authType: this.config.getContentGeneratorConfig()?.authType, + persistentMode: isUnattendedMode(), + signal: abortSignal, + heartbeatFn: (info) => { + process.stderr.write( + `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`, + ); + }, }); return result; } catch (error: unknown) { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 932fc7301..ff76eb5c6 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -17,7 +17,7 @@ import type { GenerateContentResponseUsageMetadata, } from '@google/genai'; import { createUserContent, FinishReason } from '@google/genai'; -import { retryWithBackoff } from '../utils/retry.js'; +import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js'; import { getErrorStatus } from '../utils/errors.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import { parseAndFormatApiError } from '../utils/errorParsing.js'; @@ -726,6 +726,13 @@ export class GeminiChat { return false; }, authType: this.config.getContentGeneratorConfig()?.authType, + persistentMode: isUnattendedMode(), + signal: params.config?.abortSignal, + heartbeatFn: (info) => { + process.stderr.write( + `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`, + ); + }, }); return this.processStreamResponse(model, streamResponse); diff --git a/packages/core/src/utils/editor.test.ts b/packages/core/src/utils/editor.test.ts index 851336941..14a88686f 100644 --- a/packages/core/src/utils/editor.test.ts +++ b/packages/core/src/utils/editor.test.ts @@ -462,7 +462,9 @@ describe('editor utils', () => { throw new Error(); // CLI not found }); // Accept any path containing Zed.app - (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app')); + (existsSync as Mock).mockImplementation((path: string) => + path.includes('Zed.app'), + ); const mockSpawnOn = vi.fn((event, cb) => { if (event === 'close') { @@ -716,7 +718,9 @@ describe('editor utils', () => { throw new Error(); // CLI not found }); // Accept any path containing Zed.app (the CLI check will be for Contents/MacOS/cli) - (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app')); + (existsSync as Mock).mockImplementation((path: string) => + path.includes('Zed.app'), + ); const diffCommand = getDiffCommand('old.txt', 'new.txt', 'zed'); expect(diffCommand).not.toBeNull(); @@ -759,7 +763,9 @@ describe('editor utils', () => { throw new Error(); // CLI not found }); // Accept any path containing Zed.app - (existsSync as Mock).mockImplementation((path: string) => path.includes('Zed.app')); + (existsSync as Mock).mockImplementation((path: string) => + path.includes('Zed.app'), + ); const diffCommand = getDiffCommand('old.txt', 'new.txt', 'zed'); expect(diffCommand).not.toBeNull(); diff --git a/packages/core/src/utils/editor.ts b/packages/core/src/utils/editor.ts index d5d22623a..c19963169 100644 --- a/packages/core/src/utils/editor.ts +++ b/packages/core/src/utils/editor.ts @@ -125,7 +125,7 @@ export function getEditorExecutable(editorType: EditorType): string | null { return found; } - // Special handling for Zed on macOS: check app bundle CLI as fallback + // Special handling for Zed on macOS: check app bundle CLI as fallback if (editorType === 'zed' && process.platform === 'darwin') { for (const appPath of getZedAppPaths()) { const cliPath = join(appPath, 'Contents/MacOS/cli'); diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index d9aaa5ba1..0fd247860 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -5,9 +5,21 @@ */ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + afterAll, +} from 'vitest'; import type { HttpError } from './retry.js'; -import { retryWithBackoff } from './retry.js'; +import { + retryWithBackoff, + isTransientCapacityError, + isUnattendedMode, +} from './retry.js'; import { getErrorStatus } from './errors.js'; import { setSimulate429 } from './testUtils.js'; import { AuthType } from '../core/contentGenerator.js'; @@ -462,6 +474,447 @@ describe('retryWithBackoff', () => { }); }); +describe('isTransientCapacityError', () => { + it('should return true for 429 errors', () => { + const error = { status: 429 }; + expect(isTransientCapacityError(error)).toBe(true); + }); + + it('should return true for 529 errors', () => { + const error = { status: 529 }; + expect(isTransientCapacityError(error)).toBe(true); + }); + + it('should return false for 500 errors', () => { + const error = { status: 500 }; + expect(isTransientCapacityError(error)).toBe(false); + }); + + it('should return false for 400 errors', () => { + const error = { status: 400 }; + expect(isTransientCapacityError(error)).toBe(false); + }); + + it('should return false for errors without status', () => { + expect(isTransientCapacityError(new Error('generic'))).toBe(false); + expect(isTransientCapacityError(null)).toBe(false); + }); +}); + +describe('isUnattendedMode', () => { + const originalEnv = process.env; + + beforeEach(() => { + process.env = { ...originalEnv }; + delete process.env['QWEN_CODE_UNATTENDED_RETRY']; + }); + + afterAll(() => { + process.env = originalEnv; + }); + + it('should return true when QWEN_CODE_UNATTENDED_RETRY=1', () => { + process.env['QWEN_CODE_UNATTENDED_RETRY'] = '1'; + expect(isUnattendedMode()).toBe(true); + }); + + it('should return true when QWEN_CODE_UNATTENDED_RETRY=true', () => { + process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'true'; + expect(isUnattendedMode()).toBe(true); + }); + + it('should return false when no env vars are set', () => { + expect(isUnattendedMode()).toBe(false); + }); + + it('should NOT activate on CI=true alone', () => { + process.env['CI'] = 'true'; + expect(isUnattendedMode()).toBe(false); + }); + + it('should return false for non-matching values', () => { + process.env['QWEN_CODE_UNATTENDED_RETRY'] = '0'; + expect(isUnattendedMode()).toBe(false); + process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'false'; + expect(isUnattendedMode()).toBe(false); + process.env['QWEN_CODE_UNATTENDED_RETRY'] = ''; + expect(isUnattendedMode()).toBe(false); + }); + + it('should use strict matching consistent with parseBooleanEnvFlag', () => { + // Only 'true' and '1' are accepted — matches project convention + process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'TRUE'; + expect(isUnattendedMode()).toBe(false); // strict: not 'true' + process.env['QWEN_CODE_UNATTENDED_RETRY'] = ' 1 '; + expect(isUnattendedMode()).toBe(false); // strict: not '1' + process.env['QWEN_CODE_UNATTENDED_RETRY'] = 'yes'; + expect(isUnattendedMode()).toBe(false); + }); +}); + +describe('retryWithBackoff - persistent mode', () => { + beforeEach(() => { + vi.useFakeTimers(); + setSimulate429(false); + console.warn = vi.fn(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + vi.useRealTimers(); + }); + + it('should retry indefinitely for 429 errors in persistent mode', async () => { + // Fail 10 times with 429, then succeed + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 10) { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, // Would normally fail after 3 + initialDelayMs: 10, + persistentMode: true, + }); + + await vi.runAllTimersAsync(); + const result = await promise; + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(11); // 10 failures + 1 success + }); + + it('should retry indefinitely for 529 errors in persistent mode', async () => { + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 8) { + const error: HttpError = new Error('Overloaded'); + error.status = 529; + throw error; + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10, + persistentMode: true, + }); + + await vi.runAllTimersAsync(); + const result = await promise; + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(9); + }); + + it('should NOT retry indefinitely for 500 errors in persistent mode', async () => { + const fn = vi.fn(async () => { + const error: HttpError = new Error('Internal Server Error'); + error.status = 500; + throw error; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10, + persistentMode: true, + }); + + // eslint-disable-next-line vitest/valid-expect + const assertionPromise = expect(promise).rejects.toThrow( + 'Internal Server Error', + ); + await vi.runAllTimersAsync(); + await assertionPromise; + + // Should stop at maxAttempts for non-transient errors + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('should cap single retry backoff at persistentMaxBackoffMs', async () => { + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 20) { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + persistentMode: true, + persistentMaxBackoffMs: 5000, // 5 seconds cap for test + }); + + await vi.runAllTimersAsync(); + await promise; + + // Jitter is re-capped, so no delay should exceed the cap itself + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + for (const d of delays) { + expect(d).toBeLessThanOrEqual(5000 + 1); // cap + rounding tolerance + } + }); + + it('should call heartbeatFn during persistent retry waits', async () => { + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 2) { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + } + return 'success'; + }); + + const heartbeatFn = vi.fn(); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + persistentMode: true, + heartbeatIntervalMs: 30, // Short interval for test + heartbeatFn, + }); + + await vi.runAllTimersAsync(); + await promise; + + // Heartbeat should have been called at least once during waits > heartbeatInterval + expect(heartbeatFn).toHaveBeenCalled(); + // Verify heartbeat info structure + const call = heartbeatFn.mock.calls[0][0]; + expect(call).toHaveProperty('attempt'); + expect(call).toHaveProperty('remainingMs'); + expect(call).toHaveProperty('error'); + }); + + it('should abort persistent retry when signal is aborted', async () => { + const controller = new AbortController(); + const fn = vi.fn(async () => { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10000, // Long delay so abort happens during sleep + persistentMode: true, + heartbeatIntervalMs: 50, + signal: controller.signal, + }); + + // Abort after the first retry starts waiting + setTimeout(() => controller.abort(), 100); + + // eslint-disable-next-line vitest/valid-expect + const assertionPromise = expect(promise).rejects.toThrow( + 'Retry aborted by signal', + ); + await vi.runAllTimersAsync(); + await assertionPromise; + }); + + it('should respect shouldRetryOnError even in persistent mode', async () => { + // Caller explicitly says "don't retry 429" — persistent mode must obey + const fn = vi.fn(async () => { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10, + persistentMode: true, + shouldRetryOnError: () => false, // force fast-fail + }); + + // eslint-disable-next-line vitest/valid-expect + const assertionPromise = expect(promise).rejects.toThrow('Rate limited'); + await vi.runAllTimersAsync(); + await assertionPromise; + + // Should fail on first attempt — shouldRetryOnError trumps persistent mode + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should not infinite-loop when heartbeatIntervalMs is 0', async () => { + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 2) { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10, + persistentMode: true, + heartbeatIntervalMs: 0, // Would cause infinite loop without Math.max(1, ...) + }); + + await vi.runAllTimersAsync(); + const result = await promise; + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('should not affect normal mode behavior when persistentMode is false', async () => { + const fn = vi.fn(async () => { + const error: HttpError = new Error('Rate limited'); + error.status = 429; + throw error; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 10, + persistentMode: false, + }); + + // eslint-disable-next-line vitest/valid-expect + const assertionPromise = expect(promise).rejects.toThrow('Rate limited'); + await vi.runAllTimersAsync(); + await assertionPromise; + + expect(fn).toHaveBeenCalledTimes(3); + }); +}); + +describe('retryWithBackoff - Retry-After handling in persistent mode', () => { + beforeEach(() => { + vi.useFakeTimers(); + setSimulate429(false); + console.warn = vi.fn(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + vi.useRealTimers(); + }); + + // Helper: create a 429 error with Retry-After header + function make429WithRetryAfter(seconds: number): HttpError { + const error: HttpError & { response: { headers: Record } } = + Object.assign(new Error('Rate limited'), { + status: 429, + response: { headers: { 'retry-after': String(seconds) } }, + }); + return error; + } + + it('should respect Retry-After and NOT cap at maxBackoff', async () => { + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 1) { + throw make429WithRetryAfter(600); // server says wait 10 minutes + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + persistentMode: true, + persistentMaxBackoffMs: 5000, // 5 seconds — Retry-After must NOT be capped to this + }); + + await vi.runAllTimersAsync(); + await promise; + + // The first retry delay should be ~600s (600000ms), not 5s (5000ms) + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + const firstRetryDelay = delays[0]; + expect(firstRetryDelay).toBeGreaterThan(5000); // NOT capped at maxBackoff + expect(firstRetryDelay).toBeLessThanOrEqual(600 * 1000); // respects server value + }); + + it('should cap Retry-After at persistentCapMs', async () => { + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 1) { + throw make429WithRetryAfter(100); // server says wait 100s + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + persistentMode: true, + persistentCapMs: 50_000, // absolute cap 50s — less than Retry-After + }); + + await vi.runAllTimersAsync(); + await promise; + + // Delay should be capped at persistentCapMs (50s), not the full 100s + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + const firstRetryDelay = delays[0]; + expect(firstRetryDelay).toBeLessThanOrEqual(50_000 + 1); + }); + + it('should NOT add jitter to Retry-After delays', async () => { + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + // Run multiple times to check for jitter variance + const observedDelays: number[] = []; + + for (let run = 0; run < 5; run++) { + setTimeoutSpy.mockClear(); + let attempts = 0; + const fn = vi.fn(async () => { + attempts++; + if (attempts <= 1) { + throw make429WithRetryAfter(10); // 10 seconds + } + return 'success'; + }); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + persistentMode: true, + }); + + await vi.runAllTimersAsync(); + await promise; + + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + observedDelays.push(delays[0]); + } + + // All delays should be exactly 10000ms — no jitter + for (const d of observedDelays) { + expect(d).toBe(10_000); + } + }); +}); + describe('getErrorStatus', () => { it('should extract status from error.status (OpenAI/Anthropic/Gemini style)', () => { expect(getErrorStatus({ status: 429 })).toBe(429); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index d1c3e7ba6..54b0b6db9 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -12,10 +12,21 @@ import { getErrorStatus } from './errors.js'; const debugLogger = createDebugLogger('RETRY'); +// Persistent retry mode constants +const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000; // 5 minutes — single retry backoff cap +const PERSISTENT_CAP_MS = 6 * 60 * 60 * 1000; // 6 hours — absolute single wait cap +const HEARTBEAT_INTERVAL_MS = 30_000; // 30 seconds + export interface HttpError extends Error { status?: number; } +export interface HeartbeatInfo { + attempt: number; + remainingMs: number; + error: unknown; +} + export interface RetryOptions { maxAttempts: number; initialDelayMs: number; @@ -23,6 +34,13 @@ export interface RetryOptions { shouldRetryOnError: (error: Error) => boolean; shouldRetryOnContent?: (content: GenerateContentResponse) => boolean; authType?: string; + // Persistent retry mode options + persistentMode?: boolean; + persistentMaxBackoffMs?: number; + persistentCapMs?: number; + heartbeatIntervalMs?: number; + heartbeatFn?: (info: HeartbeatInfo) => void; + signal?: AbortSignal; } const DEFAULT_RETRY_OPTIONS: RetryOptions = { @@ -45,6 +63,27 @@ function defaultShouldRetry(error: Error | unknown): boolean { ); } +/** + * Determines if an error is a transient capacity error eligible for persistent retry. + * Only 429 (Rate Limit) and 529 (Overloaded) qualify — HTTP 500 is excluded + * because it may indicate a permanent server bug. + */ +export function isTransientCapacityError(error: unknown): boolean { + const status = getErrorStatus(error); + return status === 429 || status === 529; +} + +/** + * Detects whether persistent retry mode is explicitly enabled. + * Requires the user to opt in via QWEN_CODE_UNATTENDED_RETRY — we intentionally + * do NOT auto-activate on CI=true, because silently turning a fast-fail CI job + * into an infinite-wait job would be surprising and dangerous. + */ +export function isUnattendedMode(): boolean { + const val = process.env['QWEN_CODE_UNATTENDED_RETRY']; + return val === 'true' || val === '1'; +} + /** * Delays execution for a specified number of milliseconds. * @param ms The number of milliseconds to delay. @@ -54,8 +93,45 @@ function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } +/** + * Sleeps in chunks, emitting heartbeat callbacks at regular intervals. + * Supports AbortSignal for graceful cancellation. + */ +async function sleepWithHeartbeat( + totalMs: number, + ctx: { + attempt: number; + error: unknown; + heartbeatInterval: number; + heartbeatFn?: (info: HeartbeatInfo) => void; + signal?: AbortSignal; + }, +): Promise { + let remaining = totalMs; + + while (remaining > 0) { + if (ctx.signal?.aborted) { + throw new Error('Retry aborted by signal'); + } + + const chunk = Math.max(1, Math.min(remaining, ctx.heartbeatInterval)); + await delay(chunk); + remaining -= chunk; + + if (remaining > 0 && ctx.heartbeatFn) { + ctx.heartbeatFn({ + attempt: ctx.attempt, + remainingMs: remaining, + error: ctx.error, + }); + } + } +} + /** * Retries a function with exponential backoff and jitter. + * Supports persistent retry mode for unattended/CI environments where transient + * capacity errors (429/529) should be retried indefinitely rather than failing. * @param fn The asynchronous function to retry. * @param options Optional retry configuration. * @returns A promise that resolves with the result of the function if successful. @@ -80,12 +156,24 @@ export async function retryWithBackoff( authType, shouldRetryOnError, shouldRetryOnContent, + persistentMode, + persistentMaxBackoffMs, + persistentCapMs, + heartbeatIntervalMs, + heartbeatFn, + signal, } = { ...DEFAULT_RETRY_OPTIONS, ...cleanOptions, }; + const persistent = persistentMode ?? false; + const maxBackoff = persistentMaxBackoffMs ?? PERSISTENT_MAX_BACKOFF_MS; + const capMs = persistentCapMs ?? PERSISTENT_CAP_MS; + const heartbeatInterval = heartbeatIntervalMs ?? HEARTBEAT_INTERVAL_MS; + let attempt = 0; + let persistentAttempt = 0; let currentDelay = initialDelayMs; while (attempt < maxAttempts) { @@ -120,31 +208,86 @@ export async function retryWithBackoff( ); } + // Determine if this error qualifies for persistent retry. + // Persistent mode still respects shouldRetryOnError — callers can force + // fast-fail even for transient errors if they explicitly return false. + const isTransient = isTransientCapacityError(error); + const callerAllowsRetry = shouldRetryOnError(error as Error); + const shouldPersist = persistent && isTransient && callerAllowsRetry; + // Check if we've exhausted retries or shouldn't retry - if (attempt >= maxAttempts || !shouldRetryOnError(error as Error)) { - throw error; + if (!shouldPersist) { + if (attempt >= maxAttempts || !callerAllowsRetry) { + throw error; + } } - const retryAfterMs = - errorStatus === 429 ? getRetryAfterDelayMs(error) : 0; + // === Calculate delay === + let delayMs: number; - if (retryAfterMs > 0) { - // Respect Retry-After header if present and parsed + if (shouldPersist) { + persistentAttempt++; + + // Prefer Retry-After header for 429 errors + const retryAfterMs = + errorStatus === 429 ? getRetryAfterDelayMs(error) : 0; + + if (retryAfterMs > 0) { + // Retry-After is a server-specified wait — respect it, only cap at + // the absolute limit (capMs/6h), NOT at maxBackoff (5min). + delayMs = Math.min(retryAfterMs, capMs); + } else { + // Exponential backoff — cap at maxBackoff (5min) then absolute cap + delayMs = Math.min( + initialDelayMs * Math.pow(2, persistentAttempt - 1), + maxBackoff, + ); + delayMs = Math.min(delayMs, capMs); + + // Add jitter (±25%), then re-apply caps so delay never exceeds limits + delayMs += delayMs * 0.25 * (Math.random() * 2 - 1); + delayMs = Math.min(Math.max(0, delayMs), maxBackoff, capMs); + } + + const reportedAttempt = persistentAttempt; debugLogger.warn( - `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`, + `[Persistent] Attempt ${reportedAttempt} failed with status ${errorStatus ?? 'unknown'}. ` + + `Retrying in ${Math.ceil(delayMs / 1000)}s...`, error, ); - await delay(retryAfterMs); - // Reset currentDelay for next potential non-429 error, or if Retry-After is not present next time - currentDelay = initialDelayMs; + + // Heartbeat sleep — chunked to keep CI alive + await sleepWithHeartbeat(delayMs, { + attempt: reportedAttempt, + error, + heartbeatInterval, + heartbeatFn, + signal, + }); + + // Clamp attempt so the while-loop never exits + if (attempt >= maxAttempts) { + attempt = maxAttempts - 1; + } } else { - // Fallback to exponential backoff with jitter - logRetryAttempt(attempt, error, errorStatus); - // Add jitter: +/- 30% of currentDelay - const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); - const delayWithJitter = Math.max(0, currentDelay + jitter); - await delay(delayWithJitter); - currentDelay = Math.min(maxDelayMs, currentDelay * 2); + // Normal retry path (unchanged behavior) + const retryAfterMs = + errorStatus === 429 ? getRetryAfterDelayMs(error) : 0; + + if (retryAfterMs > 0) { + debugLogger.warn( + `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`, + error, + ); + await delay(retryAfterMs); + currentDelay = initialDelayMs; + } else { + logRetryAttempt(attempt, error, errorStatus); + const jitter = currentDelay * 0.3 * (Math.random() * 2 - 1); + const delayWithJitter = Math.max(0, currentDelay + jitter); + await delay(delayWithJitter); + currentDelay = Math.min(maxDelayMs, currentDelay * 2); + } } } }