diff --git a/packages/cli/src/i18n/locales/en.js b/packages/cli/src/i18n/locales/en.js index 79af44452..f119dd3e9 100644 --- a/packages/cli/src/i18n/locales/en.js +++ b/packages/cli/src/i18n/locales/en.js @@ -1362,4 +1362,11 @@ export default { 'Opening extensions page in your browser: {{url}}', 'Failed to open browser. Check out the extensions gallery at {{url}}': 'Failed to open browser. Check out the extensions gallery at {{url}}', + + // ============================================================================ + // Retry / Rate Limit + // ============================================================================ + 'Rate limit error: {{reason}}': 'Rate limit error: {{reason}}', + 'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})': + 'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})', }; diff --git a/packages/cli/src/i18n/locales/zh.js b/packages/cli/src/i18n/locales/zh.js index 10530a4ac..1af3b5425 100644 --- a/packages/cli/src/i18n/locales/zh.js +++ b/packages/cli/src/i18n/locales/zh.js @@ -1198,4 +1198,11 @@ export default { '正在浏览器中打开扩展页面:{{url}}', 'Failed to open browser. Check out the extensions gallery at {{url}}': '打开浏览器失败。请访问扩展市场:{{url}}', + + // ============================================================================ + // Retry / Rate Limit + // ============================================================================ + 'Rate limit error: {{reason}}': '触发限流:{{reason}}', + 'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})': + '将于 {{seconds}} 秒后重试…(第 {{attempt}}/{{maxRetries}} 次)', }; diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index a4fa9ee7c..73bdd6de3 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -20,6 +20,7 @@ import { GeminiThoughtMessageContent } from './messages/GeminiThoughtMessageCont import { CompressionMessage } from './messages/CompressionMessage.js'; import { SummaryMessage } from './messages/SummaryMessage.js'; import { WarningMessage } from './messages/WarningMessage.js'; +import { RetryCountdownMessage } from './messages/RetryCountdownMessage.js'; import { Box } from 'ink'; import { AboutBox } from './AboutBox.js'; import { StatsDisplay } from './StatsDisplay.js'; @@ -126,6 +127,9 @@ const HistoryItemDisplayComponent: React.FC = ({ {itemForDisplay.type === 'error' && ( )} + {itemForDisplay.type === 'retry_countdown' && ( + + )} {itemForDisplay.type === 'about' && ( )} diff --git a/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx b/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx new file mode 100644 index 000000000..0f4727574 --- /dev/null +++ b/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Text, Box } from 'ink'; +import { theme } from '../../semantic-colors.js'; + +interface RetryCountdownMessageProps { + text: string; +} + +/** + * Displays a retry countdown message in a dimmed/secondary style + * to visually distinguish it from error messages. + */ +export const RetryCountdownMessage: React.FC = ({ + text, +}) => { + if (!text || text.trim() === '') { + return null; + } + + const prefix = '↻ '; + const prefixWidth = prefix.length; + + return ( + + + {prefix} + + + + {text} + + + + ); +}; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 2d90012cd..ab88ec4cf 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -2296,6 +2296,107 @@ describe('useGeminiStream', () => { }); }); + it('should show a retry countdown and update pending history over time', async () => { + vi.useFakeTimers(); + try { + let resolveStream: (() => void) | undefined; + mockSendMessageStream.mockReturnValue( + (async function* () { + yield { + type: ServerGeminiEventType.Retry, + retryInfo: { + reason: 'Rate limit exceeded', + attempt: 1, + maxRetries: 3, + delayMs: 3000, + }, + }; + await new Promise((resolve) => { + resolveStream = resolve; + }); + yield { + type: ServerGeminiEventType.Finished, + value: { reason: 'STOP', usageMetadata: undefined }, + }; + })(), + ); + + const { result } = renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + false, // visionModelPreviewEnabled + () => {}, + 80, + 24, + ), + ); + + act(() => { + void result.current.submitQuery('Trigger retry'); + }); + + await act(async () => { + await Promise.resolve(); + }); + + // Error line should be rendered as ERROR type + const errorItem = result.current.pendingHistoryItems.find( + (item) => item.type === MessageType.ERROR, + ); + expect(errorItem?.text).toContain('Rate limit exceeded'); + + // Countdown line should be rendered as retry_countdown type + const countdownItem = result.current.pendingHistoryItems.find( + (item) => item.type === ('retry_countdown' as MessageType), + ); + expect(countdownItem?.text).toContain('Retrying in 3 seconds'); + + await act(async () => { + await vi.advanceTimersByTimeAsync(1000); + }); + + const countdownAfterOneSecond = result.current.pendingHistoryItems.find( + (item) => item.type === ('retry_countdown' as MessageType), + ); + expect(countdownAfterOneSecond?.text).toContain( + 'Retrying in 2 seconds', + ); + + resolveStream?.(); + + await act(async () => { + await Promise.resolve(); + await vi.runAllTimersAsync(); + }); + + // Both error and countdown should be cleared after retry succeeds + const remainingError = result.current.pendingHistoryItems.find( + (item) => item.type === MessageType.ERROR, + ); + const remainingCountdown = result.current.pendingHistoryItems.find( + (item) => item.type === ('retry_countdown' as MessageType), + ); + expect(remainingError).toBeUndefined(); + expect(remainingCountdown).toBeUndefined(); + } finally { + vi.useRealTimers(); + } + }); + it('should memoize pendingHistoryItems', () => { mockUseReactToolScheduler.mockReturnValue([ [], diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index e142d91f0..fa2866528 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -65,6 +65,7 @@ import path from 'node:path'; import { useSessionStats } from '../contexts/SessionContext.js'; import { useKeypress } from './useKeypress.js'; import type { LoadedSettings } from '../../config/settings.js'; +import { t } from '../../i18n/index.js'; const debugLogger = createDebugLogger('GEMINI_STREAM'); @@ -125,6 +126,13 @@ export const useGeminiStream = ( const [thought, setThought] = useState(null); const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] = useStateAndRef(null); + const [pendingRetryErrorItem, setPendingRetryErrorItem] = + useState(null); + const [pendingRetryCountdownItem, setPendingRetryCountdownItem] = + useState(null); + const retryCountdownTimerRef = useRef | null>( + null, + ); const processedMemoryToolsRef = useRef>(new Set()); const { startNewPrompt, @@ -189,6 +197,67 @@ export const useGeminiStream = ( onComplete: (result: { userSelection: 'disable' | 'keep' }) => void; } | null>(null); + const stopRetryCountdownTimer = useCallback(() => { + if (retryCountdownTimerRef.current) { + clearInterval(retryCountdownTimerRef.current); + retryCountdownTimerRef.current = null; + } + }, []); + + const clearRetryCountdown = useCallback(() => { + stopRetryCountdownTimer(); + setPendingRetryErrorItem(null); + setPendingRetryCountdownItem(null); + }, [stopRetryCountdownTimer]); + + const startRetryCountdown = useCallback( + (retryInfo: { + reason: string; + attempt: number; + maxRetries: number; + delayMs: number; + }) => { + stopRetryCountdownTimer(); + const startTime = Date.now(); + const { reason, attempt, maxRetries, delayMs } = retryInfo; + + // Error line stays static (red with ✕ prefix) + setPendingRetryErrorItem({ + type: MessageType.ERROR, + text: t('Rate limit error: {{reason}}', { reason }), + }); + + // Countdown line updates every second (dim/secondary color) + const updateCountdown = () => { + const elapsedMs = Date.now() - startTime; + const remainingMs = Math.max(0, delayMs - elapsedMs); + const remainingSec = Math.ceil(remainingMs / 1000); + + setPendingRetryCountdownItem({ + type: 'retry_countdown', + text: t( + 'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})', + { + seconds: String(remainingSec), + attempt: String(attempt), + maxRetries: String(maxRetries), + }, + ), + } as HistoryItemWithoutId); + + if (remainingMs <= 0) { + stopRetryCountdownTimer(); + } + }; + + updateCountdown(); + retryCountdownTimerRef.current = setInterval(updateCountdown, 1000); + }, + [stopRetryCountdownTimer], + ); + + useEffect(() => () => stopRetryCountdownTimer(), [stopRetryCountdownTimer]); + const onExec = useCallback(async (done: Promise) => { setIsResponding(true); await done; @@ -295,6 +364,7 @@ export const useGeminiStream = ( Date.now(), ); setPendingHistoryItem(null); + clearRetryCountdown(); onCancelSubmit(); setIsResponding(false); setShellInputFocused(false); @@ -305,6 +375,7 @@ export const useGeminiStream = ( onCancelSubmit, pendingHistoryItemRef, setShellInputFocused, + clearRetryCountdown, config, getPromptCount, ]); @@ -609,10 +680,17 @@ export const useGeminiStream = ( { type: MessageType.INFO, text: 'User cancelled the request.' }, userMessageTimestamp, ); + clearRetryCountdown(); setIsResponding(false); setThought(null); // Reset thought when user cancels }, - [addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought], + [ + addItem, + pendingHistoryItemRef, + setPendingHistoryItem, + setThought, + clearRetryCountdown, + ], ); const handleErrorEvent = useCallback( @@ -631,9 +709,17 @@ export const useGeminiStream = ( }, userMessageTimestamp, ); + clearRetryCountdown(); setThought(null); // Reset thought when there's an error }, - [addItem, pendingHistoryItemRef, setPendingHistoryItem, config, setThought], + [ + addItem, + pendingHistoryItemRef, + setPendingHistoryItem, + config, + setThought, + clearRetryCountdown, + ], ); const handleCitationEvent = useCallback( @@ -693,8 +779,9 @@ export const useGeminiStream = ( userMessageTimestamp, ); } + clearRetryCountdown(); }, - [addItem], + [addItem, clearRetryCountdown], ); const handleChatCompressionEvent = useCallback( @@ -853,7 +940,16 @@ export const useGeminiStream = ( loopDetectedRef.current = true; break; case ServerGeminiEventType.Retry: - // Will add the missing logic later + // Clear any pending partial content from the failed attempt + if (pendingHistoryItemRef.current) { + setPendingHistoryItem(null); + } + // Show retry info if available (rate-limit / throttling errors) + if (event.retryInfo) { + startRetryCountdown(event.retryInfo); + } else { + clearRetryCountdown(); + } break; default: { // enforces exhaustive switch-case @@ -878,7 +974,11 @@ export const useGeminiStream = ( handleMaxSessionTurnsEvent, handleSessionTokenLimitExceededEvent, handleCitationEvent, + startRetryCountdown, + clearRetryCountdown, setThought, + pendingHistoryItemRef, + setPendingHistoryItem, ], ); @@ -1216,10 +1316,18 @@ export const useGeminiStream = ( const pendingHistoryItems = useMemo( () => - [pendingHistoryItem, pendingToolCallGroupDisplay].filter( - (i) => i !== undefined && i !== null, - ), - [pendingHistoryItem, pendingToolCallGroupDisplay], + [ + pendingHistoryItem, + pendingRetryErrorItem, + pendingRetryCountdownItem, + pendingToolCallGroupDisplay, + ].filter((i) => i !== undefined && i !== null), + [ + pendingHistoryItem, + pendingRetryErrorItem, + pendingRetryCountdownItem, + pendingToolCallGroupDisplay, + ], ); useEffect(() => { diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index b111f9ac7..ae799bfa6 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -128,6 +128,11 @@ export type HistoryItemWarning = HistoryItemBase & { text: string; }; +export type HistoryItemRetryCountdown = HistoryItemBase & { + type: 'retry_countdown'; + text: string; +}; + export type HistoryItemAbout = HistoryItemBase & { type: 'about'; systemInfo: { @@ -265,6 +270,7 @@ export type HistoryItemWithoutId = | HistoryItemInfo | HistoryItemError | HistoryItemWarning + | HistoryItemRetryCountdown | HistoryItemAbout | HistoryItemHelp | HistoryItemToolGroup diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 538d782e6..a5b0c9612 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -938,7 +938,7 @@ describe('GeminiChat', () => { const tpmError = new StreamContentError('Throttling: TPM(1/1)'); async function* failingStreamGenerator() { throw tpmError; - + yield {} as GenerateContentResponse; } const failingStream = failingStreamGenerator(); @@ -1005,6 +1005,91 @@ describe('GeminiChat', () => { } }); + it('should retry on GLM rate limit StreamContentError with backoff delay', async () => { + vi.useFakeTimers(); + + try { + const glmError = new StreamContentError( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + async function* failingStreamGenerator() { + throw glmError; + + yield {} as GenerateContentResponse; + } + const failingStream = failingStreamGenerator(); + const successStream = (async function* () { + yield { + candidates: [ + { + content: { parts: [{ text: 'Success after GLM retry' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(); + + vi.mocked(mockContentGenerator.generateContentStream) + .mockResolvedValueOnce(failingStream) + .mockResolvedValueOnce(successStream); + + const stream = await chat.sendMessageStream( + 'test-model', + { message: 'test' }, + 'prompt-id-glm-retry', + ); + + const iterator = stream[Symbol.asyncIterator](); + const first = await iterator.next(); + + expect(first.done).toBe(false); + expect(first.value.type).toBe(StreamEventType.RETRY); + + // Resume generator to schedule the rate limit delay, then advance timers. + const secondPromise = iterator.next(); + await vi.advanceTimersByTimeAsync(1_500); + const second = await secondPromise; + + expect(second.done).toBe(false); + expect(second.value.type).toBe(StreamEventType.RETRY); + + // Verify retryInfo contains the GLM error reason + if ( + second.value.type === StreamEventType.RETRY && + second.value.retryInfo + ) { + expect(second.value.retryInfo.reason).toContain('速率限制'); + expect(second.value.retryInfo.attempt).toBe(1); + expect(second.value.retryInfo.maxRetries).toBe(3); + expect(second.value.retryInfo.delayMs).toBe(1500); + } + + const events: StreamEvent[] = [first.value, second.value]; + for (;;) { + const next = await iterator.next(); + if (next.done) break; + events.push(next.value); + } + + expect( + mockContentGenerator.generateContentStream, + ).toHaveBeenCalledTimes(2); + expect( + events.filter((e) => e.type === StreamEventType.RETRY), + ).toHaveLength(2); + expect( + events.some( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Success after GLM retry', + ), + ).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + describe('API error retry behavior', () => { beforeEach(() => { // Use a more direct mock for retry testing diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index ee16bb669..853eed0b0 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -20,9 +20,8 @@ import { createUserContent } from '@google/genai'; import { getErrorStatus, retryWithBackoff, - isTPMThrottlingError, + getRateLimitRetryInfo, } from '../utils/retry.js'; -import { StreamContentError } from './openaiContentGenerator/pipeline.js'; import { createDebugLogger } from '../utils/debugLogger.js'; import type { Config } from '../config/config.js'; import { hasCycleInSchema } from '../tools/tools.js'; @@ -48,9 +47,20 @@ export enum StreamEventType { RETRY = 'retry', } +export interface RetryInfo { + /** Human-readable error reason. */ + reason: string; + /** Current retry attempt (1-based). */ + attempt: number; + /** Max retries allowed. */ + maxRetries: number; + /** Delay in milliseconds before the retry happens. */ + delayMs: number; +} + export type StreamEvent = | { type: StreamEventType.CHUNK; value: GenerateContentResponse } - | { type: StreamEventType.RETRY }; + | { type: StreamEventType.RETRY; retryInfo?: RetryInfo }; /** * Options for retrying due to invalid content from the model. @@ -68,14 +78,22 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = { }; /** - * Options for retrying on TPM (Tokens Per Minute) throttling errors. - * These errors occur when the API rate limit is exceeded and are returned - * as stream content (finish_reason="error_finish") rather than HTTP errors. + * Options for retrying on rate-limit throttling errors returned as stream content. */ -const TPM_RETRY_OPTIONS = { +const RATE_LIMIT_RETRY_OPTIONS = { maxRetries: 3, - delayMs: 60_000, // 1 minute - TPM quota typically resets within a minute window }; + +const RATE_LIMIT_BACKOFF_OPTIONS = { + initialDelayMs: 1500, + maxDelayMs: 30000, +}; + +function getRateLimitBackoffDelay(retryCount: number): number { + const delay = + RATE_LIMIT_BACKOFF_OPTIONS.initialDelayMs * 2 ** (retryCount - 1); + return Math.min(RATE_LIMIT_BACKOFF_OPTIONS.maxDelayMs, delay); +} /** * Returns true if the response is valid, false otherwise. * @@ -286,7 +304,7 @@ export class GeminiChat { return (async function* () { try { let lastError: unknown = new Error('Request failed after all retries.'); - let tpmRetryCount = 0; + let rateLimitRetryCount = 0; for ( let attempt = 0; @@ -294,7 +312,7 @@ export class GeminiChat { attempt++ ) { try { - if (attempt > 0 || tpmRetryCount > 0) { + if (attempt > 0 || rateLimitRetryCount > 0) { yield { type: StreamEventType.RETRY }; } @@ -314,25 +332,35 @@ export class GeminiChat { } catch (error) { lastError = error; - // Handle TPM throttling errors returned as stream content. + // Handle rate-limit / throttling errors returned as stream content. // These arrive as StreamContentError with finish_reason="error_finish" // from the pipeline, containing the throttling message in the content. + // Covers TPM throttling, GLM rate limits, and other provider throttling. + const rateLimitInfo = getRateLimitRetryInfo(error); if ( - (error instanceof StreamContentError || - isTPMThrottlingError(error)) && - tpmRetryCount < TPM_RETRY_OPTIONS.maxRetries + rateLimitInfo && + rateLimitRetryCount < RATE_LIMIT_RETRY_OPTIONS.maxRetries ) { - tpmRetryCount++; + rateLimitRetryCount++; + const delayMs = + rateLimitInfo.delayMs ?? + getRateLimitBackoffDelay(rateLimitRetryCount); debugLogger.warn( - `TPM throttling detected (retry ${tpmRetryCount}/${TPM_RETRY_OPTIONS.maxRetries}). ` + - `Waiting ${TPM_RETRY_OPTIONS.delayMs / 1000}s before retrying...`, + `Rate limit throttling detected (retry ${rateLimitRetryCount}/${RATE_LIMIT_RETRY_OPTIONS.maxRetries}). ` + + `Waiting ${delayMs / 1000}s before retrying...`, ); - yield { type: StreamEventType.RETRY }; - // Don't count TPM retries against the content retry limit + yield { + type: StreamEventType.RETRY, + retryInfo: { + reason: rateLimitInfo.reason, + attempt: rateLimitRetryCount, + maxRetries: RATE_LIMIT_RETRY_OPTIONS.maxRetries, + delayMs, + }, + }; + // Don't count rate-limit retries against the content retry limit attempt--; - await new Promise((res) => - setTimeout(res, TPM_RETRY_OPTIONS.delayMs), - ); + await new Promise((res) => setTimeout(res, delayMs)); continue; } diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index b600d3d99..9b50a16b5 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -26,7 +26,7 @@ import { UnauthorizedError, toFriendlyError, } from '../utils/errors.js'; -import type { GeminiChat } from './geminiChat.js'; +import type { GeminiChat, RetryInfo } from './geminiChat.js'; import { getThoughtText, parseThought, @@ -67,6 +67,7 @@ export enum GeminiEventType { export type ServerGeminiRetryEvent = { type: GeminiEventType.Retry; + retryInfo?: RetryInfo; }; export interface StructuredError { @@ -255,7 +256,10 @@ export class Turn { // Handle the new RETRY event if (streamEvent.type === 'retry') { - yield { type: GeminiEventType.Retry }; + yield { + type: GeminiEventType.Retry, + retryInfo: streamEvent.retryInfo, + }; continue; // Skip to the next event in the stream } diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index 26cb52aa5..5dfabebbb 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -9,7 +9,10 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { HttpError } from './retry.js'; import { getErrorStatus, + getRateLimitRetryInfo, + isGLMRateLimitError, isTPMThrottlingError, + isRateLimitThrottlingError, retryWithBackoff, } from './retry.js'; import { setSimulate429 } from './testUtils.js'; @@ -578,6 +581,119 @@ describe('isTPMThrottlingError', () => { }); }); +describe('isRateLimitThrottlingError', () => { + it('should detect TPM throttling errors (superset of isTPMThrottlingError)', () => { + expect( + isRateLimitThrottlingError('Throttling: TPM(10680324/10000000)'), + ).toBe(true); + expect( + isRateLimitThrottlingError( + new Error('Throttling: TPM(10680324/10000000)'), + ), + ).toBe(true); + }); + + it('should detect GLM rate limit error (Chinese message)', () => { + const glmError = new Error( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + expect(isRateLimitThrottlingError(glmError)).toBe(true); + }); + + it('should detect GLM rate limit from nested error object', () => { + const error = { + error: { + message: '您的账户已达到速率限制,请您控制请求频率', + code: '1302', + }, + }; + expect(isRateLimitThrottlingError(error)).toBe(true); + }); + + it('should detect general Throttling: prefix errors', () => { + expect( + isRateLimitThrottlingError(new Error('Throttling: RPM exceeded')), + ).toBe(true); + expect( + isRateLimitThrottlingError('Throttling: concurrent limit reached'), + ).toBe(true); + }); + + it('should detect English rate limit errors', () => { + expect(isRateLimitThrottlingError(new Error('Rate limit exceeded'))).toBe( + true, + ); + expect( + isRateLimitThrottlingError({ + error: { message: 'API rate limit reached. Please slow down.' }, + }), + ).toBe(true); + }); + + it('should return false for non-rate-limit errors', () => { + expect(isRateLimitThrottlingError('Regular error message')).toBe(false); + expect(isRateLimitThrottlingError(new Error('Connection refused'))).toBe( + false, + ); + expect(isRateLimitThrottlingError(null)).toBe(false); + expect(isRateLimitThrottlingError(undefined)).toBe(false); + expect(isRateLimitThrottlingError(429)).toBe(false); + }); +}); + +describe('isGLMRateLimitError', () => { + it('should detect GLM rate limit error from JSON string', () => { + const glmError = + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}'; + expect(isGLMRateLimitError(glmError)).toBe(true); + }); + + it('should detect GLM rate limit error from Error object', () => { + const glmError = new Error( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + expect(isGLMRateLimitError(glmError)).toBe(true); + }); + + it('should detect GLM rate limit from nested error object', () => { + const error = { + error: { + message: '您的账户已达到速率限制,请您控制请求频率', + code: '1302', + }, + }; + expect(isGLMRateLimitError(error)).toBe(true); + }); + + it('should return false for non-GLM errors', () => { + expect(isGLMRateLimitError('Rate limit exceeded')).toBe(false); + expect(isGLMRateLimitError(new Error('Throttling: TPM(1/1)'))).toBe(false); + }); +}); + +describe('getRateLimitRetryInfo', () => { + it('should return fixed delay for TPM throttling errors', () => { + const info = getRateLimitRetryInfo('Throttling: TPM(1/1)'); + expect(info).not.toBeNull(); + expect(info?.delayMs).toBe(60000); + }); + + it('should return no fixed delay for GLM 1302 rate limit errors', () => { + const info = getRateLimitRetryInfo( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + expect(info).not.toBeNull(); + expect(info?.delayMs).toBeUndefined(); + }); + + it('should extract a human-readable reason from JSON error strings', () => { + const info = getRateLimitRetryInfo( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + expect(info?.reason).toBe('您的账户已达到速率限制,请您控制请求频率'); + }); +}); + describe('TPM throttling retry handling', () => { beforeEach(() => { vi.useFakeTimers(); @@ -751,4 +867,89 @@ describe('TPM throttling retry handling', () => { await expect(promise).rejects.toThrow('Throttling: TPM(10680324/10000000)'); expect(fn).toHaveBeenCalledTimes(3); }); + + it('should use exponential backoff for GLM rate limit errors when delay is unknown', async () => { + const glmError = new Error( + '{"error":{"code":"1302","message":"您的账户已达到速率限制,请您控制请求频率"}}', + ); + + const fn = vi + .fn() + .mockRejectedValueOnce(glmError) + .mockResolvedValue('success'); + + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + maxDelayMs: 1000, + }); + + await vi.runAllTimersAsync(); + + await expect(promise).resolves.toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7); + expect(delays[0]).toBeLessThanOrEqual(100 * 1.3); + }); + + it('should use exponential backoff for general English rate limit errors', async () => { + const rateLimitError = new Error('Rate limit exceeded. Please slow down.'); + + const fn = vi + .fn() + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValue('success'); + + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + maxDelayMs: 1000, + }); + + await vi.runAllTimersAsync(); + + await expect(promise).resolves.toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7); + expect(delays[0]).toBeLessThanOrEqual(100 * 1.3); + }); + + it('should retry nested GLM rate limit error objects with backoff', async () => { + const nestedGlmError = { + error: { + message: '您的账户已达到速率限制,请您控制请求频率', + code: '1302', + }, + }; + + const fn = vi + .fn() + .mockRejectedValueOnce(nestedGlmError) + .mockResolvedValue('success'); + + const setTimeoutSpy = vi.spyOn(global, 'setTimeout'); + + const promise = retryWithBackoff(fn, { + maxAttempts: 3, + initialDelayMs: 100, + maxDelayMs: 1000, + }); + + await vi.runAllTimersAsync(); + + await expect(promise).resolves.toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + + const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number); + expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7); + expect(delays[0]).toBeLessThanOrEqual(100 * 1.3); + }); }); diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index 6c580548c..73ed194a5 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -25,6 +25,11 @@ export interface RetryOptions { authType?: string; } +export interface RateLimitRetryInfo { + reason: string; + delayMs?: number; +} + const DEFAULT_RETRY_OPTIONS: RetryOptions = { maxAttempts: 7, initialDelayMs: 1500, @@ -32,6 +37,12 @@ const DEFAULT_RETRY_OPTIONS: RetryOptions = { shouldRetryOnError: defaultShouldRetry, }; +// Z.AI GLM rate limit code reference: https://docs.z.ai/api-reference/api-code +const GLM_RATE_LIMIT_CODE = '1302'; + +// DashScope/Model Studio TPM rate limit reference: https://help.aliyun.com/zh/model-studio/rate-limit +const TPM_RATE_LIMIT_DELAY_MS = 60000; + /** * Default predicate function to determine if a retry should be attempted. * Retries on 429 (Too Many Requests) and 5xx server errors. @@ -120,23 +131,24 @@ export async function retryWithBackoff( throw error; } - // Check for TPM throttling error - use fixed 1 minute delay - // This check is prioritized over shouldRetryOnError because TPM errors - // may not have a standard HTTP status code (like 429) but still need retry - if (isTPMThrottlingError(error)) { - const tpmDelayMs = 60000; // 1 minute + // Check for rate-limit / throttling errors with a fixed delay. + // This check is prioritized over shouldRetryOnError because provider + // rate-limit errors may not have a standard HTTP status code (like 429) + // but still need retry (e.g., TPM throttling). + const rateLimitInfo = getRateLimitRetryInfo(error); + if (rateLimitInfo?.delayMs !== undefined) { debugLogger.warn( - `Attempt ${attempt} failed with TPM throttling error. Retrying after ${tpmDelayMs}ms (1 minute)...`, + `Attempt ${attempt} failed with rate limit error. Retrying after ${rateLimitInfo.delayMs}ms...`, error, ); - await delay(tpmDelayMs); - // Reset currentDelay for next potential non-TPM error + await delay(rateLimitInfo.delayMs); + // Reset currentDelay for next potential non-rate-limit error currentDelay = initialDelayMs; continue; } // Check if we shouldn't retry based on error type - if (!shouldRetryOnError(error as Error)) { + if (!rateLimitInfo && !shouldRetryOnError(error as Error)) { throw error; } @@ -179,12 +191,162 @@ export function isTPMThrottlingError(error: unknown): boolean { const checkMessage = (msg: string) => msg.includes('Throttling: TPM('); if (typeof error === 'string') return checkMessage(error); + if (error instanceof Error) return checkMessage(error.message); if (isStructuredError(error)) return checkMessage(error.message); if (isApiError(error)) return checkMessage(error.error.message); return false; } +/** + * Checks if an error is a GLM rate limit error (code 1302). + * + * @param error The error object. + * @returns True if the error matches GLM rate limit code 1302. + */ +export function isGLMRateLimitError(error: unknown): boolean { + const matchesCode = (code: unknown): boolean => + code !== undefined && String(code) === GLM_RATE_LIMIT_CODE; + + if (isApiError(error)) { + return matchesCode(error.error.code); + } + + if (isStructuredError(error) && !(error instanceof Error)) { + return false; + } + + const message = getErrorMessage(error); + if (!message) { + return false; + } + + const parsed = extractErrorDetailsFromString(message); + if (parsed && matchesCode(parsed.code)) { + return true; + } + + return ( + message.includes(`"code":"${GLM_RATE_LIMIT_CODE}"`) || + message.includes(`"code":${GLM_RATE_LIMIT_CODE}`) + ); +} + +/** + * Checks if an error is a rate-limit / throttling error from any provider. + * This is a superset of isTPMThrottlingError that also covers: + * - GLM rate limit: {"error":{"code":"1302","message":"您的账户已达到速率限制..."}} + * - General throttling: "Throttling: ..." + * - English rate limit messages + * + * @param error The error object. + * @returns True if the error is a rate-limit or throttling error. + */ +export function isRateLimitThrottlingError(error: unknown): boolean { + if (isTPMThrottlingError(error)) return true; + if (isGLMRateLimitError(error)) return true; + + const checkMessage = (msg: string): boolean => { + const lower = msg.toLowerCase(); + return ( + lower.includes('速率限制') || + lower.includes('throttling:') || + (lower.includes('rate') && lower.includes('limit')) + ); + }; + + const message = getErrorMessage(error); + if (message) return checkMessage(message); + + return false; +} + +/** + * Returns rate-limit retry info when an error is detected as rate-limited. + * For TPM throttling errors, a fixed 60s delay is returned. For other + * provider rate-limit errors, delayMs is left undefined so callers can apply + * their own backoff strategy. + */ +export function getRateLimitRetryInfo( + error: unknown, +): RateLimitRetryInfo | null { + if (!isRateLimitThrottlingError(error)) { + return null; + } + + return { + reason: getRateLimitReason(error), + delayMs: isTPMThrottlingError(error) ? TPM_RATE_LIMIT_DELAY_MS : undefined, + }; +} + +function getRateLimitReason(error: unknown): string { + if (isApiError(error)) { + return error.error.message; + } + + if (isStructuredError(error)) { + return error.message; + } + + if (error instanceof Error) { + return extractReasonFromString(error.message); + } + + if (typeof error === 'string') { + return extractReasonFromString(error); + } + + return String(error); +} + +function getErrorMessage(error: unknown): string | undefined { + if (typeof error === 'string') return error; + if (error instanceof Error) return error.message; + if (isStructuredError(error)) return error.message; + if (isApiError(error)) return error.error.message; + return undefined; +} + +function extractReasonFromString(message: string): string { + const parsed = extractErrorDetailsFromString(message); + if (parsed?.message) { + return parsed.message; + } + return message; +} + +function extractErrorDetailsFromString( + message: string, +): { code?: unknown; message?: string } | null { + const trimmed = message.trim().replace(/^data:\s*/i, ''); + if (!trimmed.startsWith('{')) { + return null; + } + try { + const parsed = JSON.parse(trimmed) as unknown; + if (!parsed || typeof parsed !== 'object') { + return null; + } + const errorObject = + 'error' in parsed && + typeof (parsed as { error?: unknown }).error === 'object' + ? (parsed as { error: Record }).error + : (parsed as Record); + const code = errorObject?.['code']; + const messageValue = + typeof errorObject?.['message'] === 'string' + ? errorObject['message'] + : undefined; + if (code === undefined && messageValue === undefined) { + return null; + } + return { code, message: messageValue }; + } catch { + return null; + } +} + /** * Extracts the HTTP status code from an error object. *