diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 75a1c5364..7cf270ce6 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -9,6 +9,7 @@ import type { Config, EditorType, GeminiClient, + RetryInfo, ServerGeminiChatCompressedEvent, ServerGeminiContentEvent as ContentEvent, ServerGeminiFinishedEvent, @@ -268,6 +269,7 @@ export const useGeminiStream = ( */ const clearRetryCountdown = useCallback(() => { stopRetryCountdownTimer(); + skipRetryDelayRef.current = null; setPendingRetryErrorItem(null); setPendingRetryCountdownItem(null); }, [ @@ -276,14 +278,14 @@ export const useGeminiStream = ( stopRetryCountdownTimer, ]); + // Holds the skipDelay callback from the current rate-limit RetryInfo. + // Managed symmetrically: set in startRetryCountdown, cleared in clearRetryCountdown. + const skipRetryDelayRef = useRef<(() => void) | null>(null); + const startRetryCountdown = useCallback( - (retryInfo: { - message?: string; - attempt: number; - maxRetries: number; - delayMs: number; - }) => { + (retryInfo: RetryInfo) => { stopRetryCountdownTimer(); + skipRetryDelayRef.current = retryInfo.skipDelay; const startTime = Date.now(); const { message, attempt, maxRetries, delayMs } = retryInfo; const retryReasonText = @@ -1293,6 +1295,15 @@ export const useGeminiStream = ( * when the user presses Ctrl+Y (bound to Command.RETRY_LAST in keyBindings.ts). */ const retryLastPrompt = useCallback(async () => { + // During a rate-limit retry countdown, skip the delay so the generator + // retries immediately — no abort/re-submit needed. + if (skipRetryDelayRef.current) { + skipRetryDelayRef.current(); + skipRetryDelayRef.current = null; + clearRetryCountdown(); + return; + } + if ( streamingState === StreamingState.Responding || streamingState === StreamingState.WaitingForConfirmation diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 8422968e7..e5d92f5e9 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -1098,6 +1098,80 @@ describe('GeminiChat', async () => { } }); + it('should retry immediately when skipDelay is called during rate-limit wait', async () => { + vi.useFakeTimers(); + + try { + const tpmError = new StreamContentError( + '{"error":{"code":"429","message":"Throttling: TPM(1/1)"}}', + ); + const successStream = (async function* () { + yield { + candidates: [ + { + content: { parts: [{ text: 'Success after skip' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(); + + vi.mocked(mockContentGenerator.generateContentStream) + .mockResolvedValueOnce( + (async function* () { + throw tpmError; + + yield {} as GenerateContentResponse; + })(), + ) + .mockResolvedValueOnce(successStream); + + const stream = await chat.sendMessageStream( + 'test-model', + { message: 'test' }, + 'prompt-id-skip-delay', + ); + + const iterator = stream[Symbol.asyncIterator](); + // First event: RETRY with retryInfo containing skipDelay + const first = await iterator.next(); + expect(first.value.type).toBe(StreamEventType.RETRY); + const skipDelay = first.value.retryInfo!.skipDelay!; + + // Resume generator — it's now awaiting the 60s delay. + // Call skipDelay() to resolve it immediately instead of advancing timers. + const secondPromise = iterator.next(); + skipDelay(); + const second = await secondPromise; + + // The generator should have continued to the next attempt immediately + expect(second.done).toBe(false); + expect(second.value.type).toBe(StreamEventType.RETRY); // retry-start marker + + // Consume remaining events + const events: StreamEvent[] = [first.value, second.value]; + for (;;) { + const next = await iterator.next(); + if (next.done) break; + events.push(next.value); + } + + expect( + mockContentGenerator.generateContentStream, + ).toHaveBeenCalledTimes(2); + expect( + events.some( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Success after skip', + ), + ).toBe(true); + } finally { + vi.useRealTimers(); + } + }); + it('should retry on GLM rate limit StreamContentError with backoff delay', async () => { vi.useFakeTimers(); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 03b78f06c..874a1725f 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -84,6 +84,29 @@ const RATE_LIMIT_RETRY_OPTIONS = { delayMs: 60000, }; +/** + * Creates a promise that resolves after the specified delay, but can be + * resolved early by calling the returned `skip` function. + */ +function skippableDelay(delayMs: number): { + promise: Promise; + skip: () => void; +} { + let resolveRef: () => void; + let timeoutId: ReturnType; + const promise = new Promise((resolve) => { + resolveRef = resolve; + timeoutId = setTimeout(resolve, delayMs); + }); + return { + promise, + skip: () => { + clearTimeout(timeoutId); + resolveRef(); + }, + }; +} + /** * Returns true if the response is valid, false otherwise. * @@ -348,6 +371,7 @@ export class GeminiChat { `Rate limit throttling detected (retry ${rateLimitRetryCount}/${maxRateLimitRetries}). ` + `Waiting ${delayMs / 1000}s before retrying...`, ); + const { promise: delayPromise, skip } = skippableDelay(delayMs); yield { type: StreamEventType.RETRY, retryInfo: { @@ -355,11 +379,12 @@ export class GeminiChat { attempt: rateLimitRetryCount, maxRetries: maxRateLimitRetries, delayMs, + skipDelay: skip, }, }; // Don't count rate-limit retries against the content retry limit attempt--; - await new Promise((res) => setTimeout(res, delayMs)); + await delayPromise; continue; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e1fe65d2f..6b3efb346 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -210,6 +210,7 @@ export * from './utils/paths.js'; export * from './utils/projectSummary.js'; export * from './utils/promptIdContext.js'; export * from './utils/quotaErrorDetection.js'; +export * from './utils/rateLimit.js'; export * from './utils/readManyFiles.js'; export * from './utils/request-tokenizer/supportedImageFormats.js'; export * from './utils/retry.js'; diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 2c90c6e71..823b947f2 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -124,8 +124,8 @@ const setupConflictingPathEnv = () => { }; const expectNormalizedWindowsPathEnv = (env: NodeJS.ProcessEnv) => { - expect(env.PATH).toBe(EXPECTED_MERGED_WINDOWS_PATH); - expect(env.Path).toBeUndefined(); + expect(env['PATH']).toBe(EXPECTED_MERGED_WINDOWS_PATH); + expect(env['Path']).toBeUndefined(); }; describe('ShellExecutionService', () => { diff --git a/packages/core/src/utils/rateLimit.ts b/packages/core/src/utils/rateLimit.ts index 19466e90f..3ccd7e4d3 100644 --- a/packages/core/src/utils/rateLimit.ts +++ b/packages/core/src/utils/rateLimit.ts @@ -22,6 +22,8 @@ export interface RetryInfo { maxRetries: number; /** Delay in milliseconds before the retry happens. */ delayMs: number; + /** When called, resolves the delay promise early so the retry happens immediately. */ + skipDelay: () => void; } /**