diff --git a/packages/cli/src/i18n/locales/en.js b/packages/cli/src/i18n/locales/en.js
index 79af44452..f119dd3e9 100644
--- a/packages/cli/src/i18n/locales/en.js
+++ b/packages/cli/src/i18n/locales/en.js
@@ -1362,4 +1362,11 @@ export default {
     'Opening extensions page in your browser: {{url}}',
   'Failed to open browser. Check out the extensions gallery at {{url}}':
     'Failed to open browser. Check out the extensions gallery at {{url}}',
+
+  // ============================================================================
+  // Retry / Rate Limit
+  // ============================================================================
+  'Rate limit error: {{reason}}': 'Rate limit error: {{reason}}',
+  'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})':
+    'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})',
 };
diff --git a/packages/cli/src/i18n/locales/zh.js b/packages/cli/src/i18n/locales/zh.js
index 10530a4ac..1af3b5425 100644
--- a/packages/cli/src/i18n/locales/zh.js
+++ b/packages/cli/src/i18n/locales/zh.js
@@ -1198,4 +1198,11 @@ export default {
     '正在浏览器中打开扩展页面：{{url}}',
   'Failed to open browser. Check out the extensions gallery at {{url}}':
     '打开浏览器失败。请访问扩展市场：{{url}}',
+
+  // ============================================================================
+  // Retry / Rate Limit
+  // ============================================================================
+  'Rate limit error: {{reason}}': '触发限流：{{reason}}',
+  'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})':
+    '将于 {{seconds}} 秒后重试…（第 {{attempt}}/{{maxRetries}} 次）',
 };
diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
index a4fa9ee7c..73bdd6de3 100644
--- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
@@ -20,6 +20,7 @@ import { GeminiThoughtMessageContent } from './messages/GeminiThoughtMessageCont
 import { CompressionMessage } from './messages/CompressionMessage.js';
 import { SummaryMessage } from './messages/SummaryMessage.js';
 import { WarningMessage } from './messages/WarningMessage.js';
+import { RetryCountdownMessage } from './messages/RetryCountdownMessage.js';
 import { Box } from 'ink';
 import { AboutBox } from './AboutBox.js';
 import { StatsDisplay } from './StatsDisplay.js';
@@ -126,6 +127,9 @@ const HistoryItemDisplayComponent: React.FC<HistoryItemDisplayProps> = ({
       {itemForDisplay.type === 'error' && (
         <ErrorMessage text={itemForDisplay.text} />
       )}
+      {itemForDisplay.type === 'retry_countdown' && (
+        <RetryCountdownMessage text={itemForDisplay.text} />
+      )}
       {itemForDisplay.type === 'about' && (
         <AboutBox {...itemForDisplay.systemInfo} width={boxWidth} />
       )}
diff --git a/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx b/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx
new file mode 100644
index 000000000..0f4727574
--- /dev/null
+++ b/packages/cli/src/ui/components/messages/RetryCountdownMessage.tsx
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2025 Qwen
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type React from 'react';
+import { Text, Box } from 'ink';
+import { theme } from '../../semantic-colors.js';
+
+interface RetryCountdownMessageProps {
+  text: string;
+}
+
+/**
+ * Displays a retry countdown message in a dimmed/secondary style
+ * to visually distinguish it from error messages.
+ */
+export const RetryCountdownMessage: React.FC<RetryCountdownMessageProps> = ({
+  text,
+}) => {
+  if (!text || text.trim() === '') {
+    return null;
+  }
+
+  const prefix = '↻ ';
+  const prefixWidth = prefix.length;
+
+  return (
+    <Box flexDirection="row">
+      <Box width={prefixWidth}>
+        <Text color={theme.text.secondary}>{prefix}</Text>
+      </Box>
+      <Box flexGrow={1}>
+        <Text wrap="wrap" color={theme.text.secondary}>
+          {text}
+        </Text>
+      </Box>
+    </Box>
+  );
+};
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 2d90012cd..ab88ec4cf 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -2296,6 +2296,107 @@ describe('useGeminiStream', () => {
       });
     });
 
+    it('should show a retry countdown and update pending history over time', async () => {
+      vi.useFakeTimers();
+      try {
+        let resolveStream: (() => void) | undefined;
+        mockSendMessageStream.mockReturnValue(
+          (async function* () {
+            yield {
+              type: ServerGeminiEventType.Retry,
+              retryInfo: {
+                reason: 'Rate limit exceeded',
+                attempt: 1,
+                maxRetries: 3,
+                delayMs: 3000,
+              },
+            };
+            await new Promise<void>((resolve) => {
+              resolveStream = resolve;
+            });
+            yield {
+              type: ServerGeminiEventType.Finished,
+              value: { reason: 'STOP', usageMetadata: undefined },
+            };
+          })(),
+        );
+
+        const { result } = renderHook(() =>
+          useGeminiStream(
+            new MockedGeminiClientClass(mockConfig),
+            [],
+            mockAddItem,
+            mockConfig,
+            mockLoadedSettings,
+            mockOnDebugMessage,
+            mockHandleSlashCommand,
+            false,
+            () => 'vscode' as EditorType,
+            () => {},
+            () => Promise.resolve(),
+            false,
+            () => {},
+            () => {},
+            () => {},
+            false, // visionModelPreviewEnabled
+            () => {},
+            80,
+            24,
+          ),
+        );
+
+        act(() => {
+          void result.current.submitQuery('Trigger retry');
+        });
+
+        await act(async () => {
+          await Promise.resolve();
+        });
+
+        // Error line should be rendered as ERROR type
+        const errorItem = result.current.pendingHistoryItems.find(
+          (item) => item.type === MessageType.ERROR,
+        );
+        expect(errorItem?.text).toContain('Rate limit exceeded');
+
+        // Countdown line should be rendered as retry_countdown type
+        const countdownItem = result.current.pendingHistoryItems.find(
+          (item) => item.type === ('retry_countdown' as MessageType),
+        );
+        expect(countdownItem?.text).toContain('Retrying in 3 seconds');
+
+        await act(async () => {
+          await vi.advanceTimersByTimeAsync(1000);
+        });
+
+        const countdownAfterOneSecond = result.current.pendingHistoryItems.find(
+          (item) => item.type === ('retry_countdown' as MessageType),
+        );
+        expect(countdownAfterOneSecond?.text).toContain(
+          'Retrying in 2 seconds',
+        );
+
+        resolveStream?.();
+
+        await act(async () => {
+          await Promise.resolve();
+          await vi.runAllTimersAsync();
+        });
+
+        // Both error and countdown should be cleared after retry succeeds
+        const remainingError = result.current.pendingHistoryItems.find(
+          (item) => item.type === MessageType.ERROR,
+        );
+        const remainingCountdown = result.current.pendingHistoryItems.find(
+          (item) => item.type === ('retry_countdown' as MessageType),
+        );
+        expect(remainingError).toBeUndefined();
+        expect(remainingCountdown).toBeUndefined();
+      } finally {
+        vi.useRealTimers();
+      }
+    });
+
     it('should memoize pendingHistoryItems', () => {
       mockUseReactToolScheduler.mockReturnValue([
         [],
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index e142d91f0..fa2866528 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -65,6 +65,7 @@ import path from 'node:path';
 import { useSessionStats } from '../contexts/SessionContext.js';
 import { useKeypress } from './useKeypress.js';
 import type { LoadedSettings } from '../../config/settings.js';
+import { t } from '../../i18n/index.js';
 
 const debugLogger = createDebugLogger('GEMINI_STREAM');
 
@@ -125,6 +126,13 @@ export const useGeminiStream = (
   const [thought, setThought] = useState<ThoughtSummary | null>(null);
   const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] =
     useStateAndRef<HistoryItemWithoutId | null>(null);
+  const [pendingRetryErrorItem, setPendingRetryErrorItem] =
+    useState<HistoryItemWithoutId | null>(null);
+  const [pendingRetryCountdownItem, setPendingRetryCountdownItem] =
+    useState<HistoryItemWithoutId | null>(null);
+  const retryCountdownTimerRef = useRef<ReturnType<typeof setInterval> | null>(
+    null,
+  );
   const processedMemoryToolsRef = useRef<Set<string>>(new Set());
   const {
     startNewPrompt,
@@ -189,6 +197,67 @@ export const useGeminiStream = (
     onComplete: (result: { userSelection: 'disable' | 'keep' }) => void;
   } | null>(null);
 
+  const stopRetryCountdownTimer = useCallback(() => {
+    if (retryCountdownTimerRef.current) {
+      clearInterval(retryCountdownTimerRef.current);
+      retryCountdownTimerRef.current = null;
+    }
+  }, []);
+
+  const clearRetryCountdown = useCallback(() => {
+    stopRetryCountdownTimer();
+    setPendingRetryErrorItem(null);
+    setPendingRetryCountdownItem(null);
+  }, [stopRetryCountdownTimer]);
+
+  const startRetryCountdown = useCallback(
+    (retryInfo: {
+      reason: string;
+      attempt: number;
+      maxRetries: number;
+      delayMs: number;
+    }) => {
+      stopRetryCountdownTimer();
+      const startTime = Date.now();
+      const { reason, attempt, maxRetries, delayMs } = retryInfo;
+
+      // Error line stays static (red with ✕ prefix)
+      setPendingRetryErrorItem({
+        type: MessageType.ERROR,
+        text: t('Rate limit error: {{reason}}', { reason }),
+      });
+
+      // Countdown line updates every second (dim/secondary color)
+      const updateCountdown = () => {
+        const elapsedMs = Date.now() - startTime;
+        const remainingMs = Math.max(0, delayMs - elapsedMs);
+        const remainingSec = Math.ceil(remainingMs / 1000);
+
+        setPendingRetryCountdownItem({
+          type: 'retry_countdown',
+          text: t(
+            'Retrying in {{seconds}} seconds… (attempt {{attempt}}/{{maxRetries}})',
+            {
+              seconds: String(remainingSec),
+              attempt: String(attempt),
+              maxRetries: String(maxRetries),
+            },
+          ),
+        } as HistoryItemWithoutId);
+
+        if (remainingMs <= 0) {
+          stopRetryCountdownTimer();
+        }
+      };
+
+      updateCountdown();
+      retryCountdownTimerRef.current = setInterval(updateCountdown, 1000);
+    },
+    [stopRetryCountdownTimer],
+  );
+
+  useEffect(() => () => stopRetryCountdownTimer(), [stopRetryCountdownTimer]);
+
   const onExec = useCallback(async (done: Promise<void>) => {
     setIsResponding(true);
     await done;
@@ -295,6 +364,7 @@ export const useGeminiStream = (
       Date.now(),
     );
     setPendingHistoryItem(null);
+    clearRetryCountdown();
     onCancelSubmit();
     setIsResponding(false);
     setShellInputFocused(false);
@@ -305,6 +375,7 @@ export const useGeminiStream = (
     onCancelSubmit,
     pendingHistoryItemRef,
     setShellInputFocused,
+    clearRetryCountdown,
     config,
     getPromptCount,
   ]);
@@ -609,10 +680,17 @@ export const useGeminiStream = (
         { type: MessageType.INFO, text: 'User cancelled the request.' },
         userMessageTimestamp,
       );
+      clearRetryCountdown();
       setIsResponding(false);
       setThought(null); // Reset thought when user cancels
     },
-    [addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought],
+    [
+      addItem,
+      pendingHistoryItemRef,
+      setPendingHistoryItem,
+      setThought,
+      clearRetryCountdown,
+    ],
   );
 
   const handleErrorEvent = useCallback(
@@ -631,9 +709,17 @@ export const useGeminiStream = (
         },
         userMessageTimestamp,
       );
+      clearRetryCountdown();
       setThought(null); // Reset thought when there's an error
     },
-    [addItem, pendingHistoryItemRef, setPendingHistoryItem, config, setThought],
+    [
+      addItem,
+      pendingHistoryItemRef,
+      setPendingHistoryItem,
+      config,
+      setThought,
+      clearRetryCountdown,
+    ],
   );
 
   const handleCitationEvent = useCallback(
@@ -693,8 +779,9 @@ export const useGeminiStream = (
           userMessageTimestamp,
         );
       }
+      clearRetryCountdown();
     },
-    [addItem],
+    [addItem, clearRetryCountdown],
   );
 
   const handleChatCompressionEvent = useCallback(
@@ -853,7 +940,16 @@ export const useGeminiStream = (
             loopDetectedRef.current = true;
             break;
           case ServerGeminiEventType.Retry:
-            // Will add the missing logic later
+            // Clear any pending partial content from the failed attempt
+            if (pendingHistoryItemRef.current) {
+              setPendingHistoryItem(null);
+            }
+            // Show retry info if available (rate-limit / throttling errors)
+            if (event.retryInfo) {
+              startRetryCountdown(event.retryInfo);
+            } else {
+              clearRetryCountdown();
+            }
             break;
           default: {
             // enforces exhaustive switch-case
@@ -878,7 +974,11 @@ export const useGeminiStream = (
       handleMaxSessionTurnsEvent,
       handleSessionTokenLimitExceededEvent,
       handleCitationEvent,
+      startRetryCountdown,
+      clearRetryCountdown,
       setThought,
+      pendingHistoryItemRef,
+      setPendingHistoryItem,
     ],
   );
 
@@ -1216,10 +1316,18 @@ export const useGeminiStream = (
 
   const pendingHistoryItems = useMemo(
     () =>
-      [pendingHistoryItem, pendingToolCallGroupDisplay].filter(
-        (i) => i !== undefined && i !== null,
-      ),
-    [pendingHistoryItem, pendingToolCallGroupDisplay],
+      [
+        pendingHistoryItem,
+        pendingRetryErrorItem,
+        pendingRetryCountdownItem,
+        pendingToolCallGroupDisplay,
+      ].filter((i) => i !== undefined && i !== null),
+    [
+      pendingHistoryItem,
+      pendingRetryErrorItem,
+      pendingRetryCountdownItem,
+      pendingToolCallGroupDisplay,
+    ],
   );
 
   useEffect(() => {
diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts
index b111f9ac7..ae799bfa6 100644
--- a/packages/cli/src/ui/types.ts
+++ b/packages/cli/src/ui/types.ts
@@ -128,6 +128,11 @@ export type HistoryItemWarning = HistoryItemBase & {
   text: string;
 };
 
+export type HistoryItemRetryCountdown = HistoryItemBase & {
+  type: 'retry_countdown';
+  text: string;
+};
+
 export type HistoryItemAbout = HistoryItemBase & {
   type: 'about';
   systemInfo: {
@@ -265,6 +270,7 @@ export type HistoryItemWithoutId =
   | HistoryItemInfo
   | HistoryItemError
   | HistoryItemWarning
+  | HistoryItemRetryCountdown
   | HistoryItemAbout
   | HistoryItemHelp
   | HistoryItemToolGroup
diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts
index 538d782e6..a5b0c9612 100644
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -938,7 +938,7 @@ describe('GeminiChat', () => {
         const tpmError = new StreamContentError('Throttling: TPM(1/1)');
         async function* failingStreamGenerator() {
           throw tpmError;
-           
+
           yield {} as GenerateContentResponse;
         }
         const failingStream = failingStreamGenerator();
@@ -1005,6 +1005,91 @@ describe('GeminiChat', () => {
       }
     });
 
+    it('should retry on GLM rate limit StreamContentError with backoff delay', async () => {
+      vi.useFakeTimers();
+
+      try {
+        const glmError = new StreamContentError(
+          '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+        );
+        async function* failingStreamGenerator() {
+          throw glmError;
+           
+          yield {} as GenerateContentResponse;
+        }
+        const failingStream = failingStreamGenerator();
+        const successStream = (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { parts: [{ text: 'Success after GLM retry' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })();
+
+        vi.mocked(mockContentGenerator.generateContentStream)
+          .mockResolvedValueOnce(failingStream)
+          .mockResolvedValueOnce(successStream);
+
+        const stream = await chat.sendMessageStream(
+          'test-model',
+          { message: 'test' },
+          'prompt-id-glm-retry',
+        );
+
+        const iterator = stream[Symbol.asyncIterator]();
+        const first = await iterator.next();
+
+        expect(first.done).toBe(false);
+        expect(first.value.type).toBe(StreamEventType.RETRY);
+
+        // Resume generator to schedule the rate limit delay, then advance timers.
+        const secondPromise = iterator.next();
+        await vi.advanceTimersByTimeAsync(1_500);
+        const second = await secondPromise;
+
+        expect(second.done).toBe(false);
+        expect(second.value.type).toBe(StreamEventType.RETRY);
+
+        // Verify retryInfo contains the GLM error reason
+        if (
+          second.value.type === StreamEventType.RETRY &&
+          second.value.retryInfo
+        ) {
+          expect(second.value.retryInfo.reason).toContain('速率限制');
+          expect(second.value.retryInfo.attempt).toBe(1);
+          expect(second.value.retryInfo.maxRetries).toBe(3);
+          expect(second.value.retryInfo.delayMs).toBe(1500);
+        }
+
+        const events: StreamEvent[] = [first.value, second.value];
+        for (;;) {
+          const next = await iterator.next();
+          if (next.done) break;
+          events.push(next.value);
+        }
+
+        expect(
+          mockContentGenerator.generateContentStream,
+        ).toHaveBeenCalledTimes(2);
+        expect(
+          events.filter((e) => e.type === StreamEventType.RETRY),
+        ).toHaveLength(2);
+        expect(
+          events.some(
+            (e) =>
+              e.type === StreamEventType.CHUNK &&
+              e.value.candidates?.[0]?.content?.parts?.[0]?.text ===
+                'Success after GLM retry',
+          ),
+        ).toBe(true);
+      } finally {
+        vi.useRealTimers();
+      }
+    });
+
     describe('API error retry behavior', () => {
       beforeEach(() => {
         // Use a more direct mock for retry testing
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index ee16bb669..853eed0b0 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -20,9 +20,8 @@ import { createUserContent } from '@google/genai';
 import {
   getErrorStatus,
   retryWithBackoff,
-  isTPMThrottlingError,
+  getRateLimitRetryInfo,
 } from '../utils/retry.js';
-import { StreamContentError } from './openaiContentGenerator/pipeline.js';
 import { createDebugLogger } from '../utils/debugLogger.js';
 import type { Config } from '../config/config.js';
 import { hasCycleInSchema } from '../tools/tools.js';
@@ -48,9 +47,20 @@ export enum StreamEventType {
   RETRY = 'retry',
 }
 
+export interface RetryInfo {
+  /** Human-readable error reason. */
+  reason: string;
+  /** Current retry attempt (1-based). */
+  attempt: number;
+  /** Max retries allowed. */
+  maxRetries: number;
+  /** Delay in milliseconds before the retry happens. */
+  delayMs: number;
+}
+
 export type StreamEvent =
   | { type: StreamEventType.CHUNK; value: GenerateContentResponse }
-  | { type: StreamEventType.RETRY };
+  | { type: StreamEventType.RETRY; retryInfo?: RetryInfo };
 
 /**
  * Options for retrying due to invalid content from the model.
@@ -68,14 +78,22 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
 };
 
 /**
- * Options for retrying on TPM (Tokens Per Minute) throttling errors.
- * These errors occur when the API rate limit is exceeded and are returned
- * as stream content (finish_reason="error_finish") rather than HTTP errors.
+ * Options for retrying on rate-limit throttling errors returned as stream content.
  */
-const TPM_RETRY_OPTIONS = {
+const RATE_LIMIT_RETRY_OPTIONS = {
   maxRetries: 3,
-  delayMs: 60_000, // 1 minute - TPM quota typically resets within a minute window
 };
+
+const RATE_LIMIT_BACKOFF_OPTIONS = {
+  initialDelayMs: 1500,
+  maxDelayMs: 30000,
+};
+
+function getRateLimitBackoffDelay(retryCount: number): number {
+  const delay =
+    RATE_LIMIT_BACKOFF_OPTIONS.initialDelayMs * 2 ** (retryCount - 1);
+  return Math.min(RATE_LIMIT_BACKOFF_OPTIONS.maxDelayMs, delay);
+}
 /**
  * Returns true if the response is valid, false otherwise.
  *
@@ -286,7 +304,7 @@ export class GeminiChat {
     return (async function* () {
       try {
         let lastError: unknown = new Error('Request failed after all retries.');
-        let tpmRetryCount = 0;
+        let rateLimitRetryCount = 0;
 
         for (
           let attempt = 0;
@@ -294,7 +312,7 @@ export class GeminiChat {
           attempt++
         ) {
           try {
-            if (attempt > 0 || tpmRetryCount > 0) {
+            if (attempt > 0 || rateLimitRetryCount > 0) {
               yield { type: StreamEventType.RETRY };
             }
 
@@ -314,25 +332,35 @@ export class GeminiChat {
           } catch (error) {
             lastError = error;
 
-            // Handle TPM throttling errors returned as stream content.
+            // Handle rate-limit / throttling errors returned as stream content.
             // These arrive as StreamContentError with finish_reason="error_finish"
             // from the pipeline, containing the throttling message in the content.
+            // Covers TPM throttling, GLM rate limits, and other provider throttling.
+            const rateLimitInfo = getRateLimitRetryInfo(error);
             if (
-              (error instanceof StreamContentError ||
-                isTPMThrottlingError(error)) &&
-              tpmRetryCount < TPM_RETRY_OPTIONS.maxRetries
+              rateLimitInfo &&
+              rateLimitRetryCount < RATE_LIMIT_RETRY_OPTIONS.maxRetries
             ) {
-              tpmRetryCount++;
+              rateLimitRetryCount++;
+              const delayMs =
+                rateLimitInfo.delayMs ??
+                getRateLimitBackoffDelay(rateLimitRetryCount);
               debugLogger.warn(
-                `TPM throttling detected (retry ${tpmRetryCount}/${TPM_RETRY_OPTIONS.maxRetries}). ` +
-                  `Waiting ${TPM_RETRY_OPTIONS.delayMs / 1000}s before retrying...`,
+                `Rate limit throttling detected (retry ${rateLimitRetryCount}/${RATE_LIMIT_RETRY_OPTIONS.maxRetries}). ` +
+                  `Waiting ${delayMs / 1000}s before retrying...`,
               );
-              yield { type: StreamEventType.RETRY };
-              // Don't count TPM retries against the content retry limit
+              yield {
+                type: StreamEventType.RETRY,
+                retryInfo: {
+                  reason: rateLimitInfo.reason,
+                  attempt: rateLimitRetryCount,
+                  maxRetries: RATE_LIMIT_RETRY_OPTIONS.maxRetries,
+                  delayMs,
+                },
+              };
+              // Don't count rate-limit retries against the content retry limit
               attempt--;
-              await new Promise((res) =>
-                setTimeout(res, TPM_RETRY_OPTIONS.delayMs),
-              );
+              await new Promise((res) => setTimeout(res, delayMs));
               continue;
             }
 
diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts
index b600d3d99..9b50a16b5 100644
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -26,7 +26,7 @@ import {
   UnauthorizedError,
   toFriendlyError,
 } from '../utils/errors.js';
-import type { GeminiChat } from './geminiChat.js';
+import type { GeminiChat, RetryInfo } from './geminiChat.js';
 import {
   getThoughtText,
   parseThought,
@@ -67,6 +67,7 @@ export enum GeminiEventType {
 
 export type ServerGeminiRetryEvent = {
   type: GeminiEventType.Retry;
+  retryInfo?: RetryInfo;
 };
 
 export interface StructuredError {
@@ -255,7 +256,10 @@ export class Turn {
 
         // Handle the new RETRY event
         if (streamEvent.type === 'retry') {
-          yield { type: GeminiEventType.Retry };
+          yield {
+            type: GeminiEventType.Retry,
+            retryInfo: streamEvent.retryInfo,
+          };
           continue; // Skip to the next event in the stream
         }
 
diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts
index 26cb52aa5..5dfabebbb 100644
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -9,7 +9,10 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import type { HttpError } from './retry.js';
 import {
   getErrorStatus,
+  getRateLimitRetryInfo,
+  isGLMRateLimitError,
   isTPMThrottlingError,
+  isRateLimitThrottlingError,
   retryWithBackoff,
 } from './retry.js';
 import { setSimulate429 } from './testUtils.js';
@@ -578,6 +581,119 @@ describe('isTPMThrottlingError', () => {
   });
 });
 
+describe('isRateLimitThrottlingError', () => {
+  it('should detect TPM throttling errors (superset of isTPMThrottlingError)', () => {
+    expect(
+      isRateLimitThrottlingError('Throttling: TPM(10680324/10000000)'),
+    ).toBe(true);
+    expect(
+      isRateLimitThrottlingError(
+        new Error('Throttling: TPM(10680324/10000000)'),
+      ),
+    ).toBe(true);
+  });
+
+  it('should detect GLM rate limit error (Chinese message)', () => {
+    const glmError = new Error(
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+    );
+    expect(isRateLimitThrottlingError(glmError)).toBe(true);
+  });
+
+  it('should detect GLM rate limit from nested error object', () => {
+    const error = {
+      error: {
+        message: '您的账户已达到速率限制，请您控制请求频率',
+        code: '1302',
+      },
+    };
+    expect(isRateLimitThrottlingError(error)).toBe(true);
+  });
+
+  it('should detect general Throttling: prefix errors', () => {
+    expect(
+      isRateLimitThrottlingError(new Error('Throttling: RPM exceeded')),
+    ).toBe(true);
+    expect(
+      isRateLimitThrottlingError('Throttling: concurrent limit reached'),
+    ).toBe(true);
+  });
+
+  it('should detect English rate limit errors', () => {
+    expect(isRateLimitThrottlingError(new Error('Rate limit exceeded'))).toBe(
+      true,
+    );
+    expect(
+      isRateLimitThrottlingError({
+        error: { message: 'API rate limit reached. Please slow down.' },
+      }),
+    ).toBe(true);
+  });
+
+  it('should return false for non-rate-limit errors', () => {
+    expect(isRateLimitThrottlingError('Regular error message')).toBe(false);
+    expect(isRateLimitThrottlingError(new Error('Connection refused'))).toBe(
+      false,
+    );
+    expect(isRateLimitThrottlingError(null)).toBe(false);
+    expect(isRateLimitThrottlingError(undefined)).toBe(false);
+    expect(isRateLimitThrottlingError(429)).toBe(false);
+  });
+});
+
+describe('isGLMRateLimitError', () => {
+  it('should detect GLM rate limit error from JSON string', () => {
+    const glmError =
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}';
+    expect(isGLMRateLimitError(glmError)).toBe(true);
+  });
+
+  it('should detect GLM rate limit error from Error object', () => {
+    const glmError = new Error(
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+    );
+    expect(isGLMRateLimitError(glmError)).toBe(true);
+  });
+
+  it('should detect GLM rate limit from nested error object', () => {
+    const error = {
+      error: {
+        message: '您的账户已达到速率限制，请您控制请求频率',
+        code: '1302',
+      },
+    };
+    expect(isGLMRateLimitError(error)).toBe(true);
+  });
+
+  it('should return false for non-GLM errors', () => {
+    expect(isGLMRateLimitError('Rate limit exceeded')).toBe(false);
+    expect(isGLMRateLimitError(new Error('Throttling: TPM(1/1)'))).toBe(false);
+  });
+});
+
+describe('getRateLimitRetryInfo', () => {
+  it('should return fixed delay for TPM throttling errors', () => {
+    const info = getRateLimitRetryInfo('Throttling: TPM(1/1)');
+    expect(info).not.toBeNull();
+    expect(info?.delayMs).toBe(60000);
+  });
+
+  it('should return no fixed delay for GLM 1302 rate limit errors', () => {
+    const info = getRateLimitRetryInfo(
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+    );
+    expect(info).not.toBeNull();
+    expect(info?.delayMs).toBeUndefined();
+  });
+
+  it('should extract a human-readable reason from JSON error strings', () => {
+    const info = getRateLimitRetryInfo(
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+    );
+    expect(info?.reason).toBe('您的账户已达到速率限制，请您控制请求频率');
+  });
+});
+
 describe('TPM throttling retry handling', () => {
   beforeEach(() => {
     vi.useFakeTimers();
@@ -751,4 +867,89 @@ describe('TPM throttling retry handling', () => {
     await expect(promise).rejects.toThrow('Throttling: TPM(10680324/10000000)');
     expect(fn).toHaveBeenCalledTimes(3);
   });
+
+  it('should use exponential backoff for GLM rate limit errors when delay is unknown', async () => {
+    const glmError = new Error(
+      '{"error":{"code":"1302","message":"您的账户已达到速率限制，请您控制请求频率"}}',
+    );
+
+    const fn = vi
+      .fn()
+      .mockRejectedValueOnce(glmError)
+      .mockResolvedValue('success');
+
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      maxDelayMs: 1000,
+    });
+
+    await vi.runAllTimersAsync();
+
+    await expect(promise).resolves.toBe('success');
+    expect(fn).toHaveBeenCalledTimes(2);
+
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7);
+    expect(delays[0]).toBeLessThanOrEqual(100 * 1.3);
+  });
+
+  it('should use exponential backoff for general English rate limit errors', async () => {
+    const rateLimitError = new Error('Rate limit exceeded. Please slow down.');
+
+    const fn = vi
+      .fn()
+      .mockRejectedValueOnce(rateLimitError)
+      .mockResolvedValue('success');
+
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      maxDelayMs: 1000,
+    });
+
+    await vi.runAllTimersAsync();
+
+    await expect(promise).resolves.toBe('success');
+    expect(fn).toHaveBeenCalledTimes(2);
+
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7);
+    expect(delays[0]).toBeLessThanOrEqual(100 * 1.3);
+  });
+
+  it('should retry nested GLM rate limit error objects with backoff', async () => {
+    const nestedGlmError = {
+      error: {
+        message: '您的账户已达到速率限制，请您控制请求频率',
+        code: '1302',
+      },
+    };
+
+    const fn = vi
+      .fn()
+      .mockRejectedValueOnce(nestedGlmError)
+      .mockResolvedValue('success');
+
+    const setTimeoutSpy = vi.spyOn(global, 'setTimeout');
+
+    const promise = retryWithBackoff(fn, {
+      maxAttempts: 3,
+      initialDelayMs: 100,
+      maxDelayMs: 1000,
+    });
+
+    await vi.runAllTimersAsync();
+
+    await expect(promise).resolves.toBe('success');
+    expect(fn).toHaveBeenCalledTimes(2);
+
+    const delays = setTimeoutSpy.mock.calls.map((call) => call[1] as number);
+    expect(delays[0]).toBeGreaterThanOrEqual(100 * 0.7);
+    expect(delays[0]).toBeLessThanOrEqual(100 * 1.3);
+  });
 });
diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts
index 6c580548c..73ed194a5 100644
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -25,6 +25,11 @@ export interface RetryOptions {
   authType?: string;
 }
 
+export interface RateLimitRetryInfo {
+  reason: string;
+  delayMs?: number;
+}
+
 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
   maxAttempts: 7,
   initialDelayMs: 1500,
@@ -32,6 +37,12 @@ const DEFAULT_RETRY_OPTIONS: RetryOptions = {
   shouldRetryOnError: defaultShouldRetry,
 };
 
+// Z.AI GLM rate limit code reference: https://docs.z.ai/api-reference/api-code
+const GLM_RATE_LIMIT_CODE = '1302';
+
+// DashScope/Model Studio TPM rate limit reference: https://help.aliyun.com/zh/model-studio/rate-limit
+const TPM_RATE_LIMIT_DELAY_MS = 60000;
+
 /**
  * Default predicate function to determine if a retry should be attempted.
  * Retries on 429 (Too Many Requests) and 5xx server errors.
@@ -120,23 +131,24 @@ export async function retryWithBackoff<T>(
         throw error;
       }
 
-      // Check for TPM throttling error - use fixed 1 minute delay
-      // This check is prioritized over shouldRetryOnError because TPM errors
-      // may not have a standard HTTP status code (like 429) but still need retry
-      if (isTPMThrottlingError(error)) {
-        const tpmDelayMs = 60000; // 1 minute
+      // Check for rate-limit / throttling errors with a fixed delay.
+      // This check is prioritized over shouldRetryOnError because provider
+      // rate-limit errors may not have a standard HTTP status code (like 429)
+      // but still need retry (e.g., TPM throttling).
+      const rateLimitInfo = getRateLimitRetryInfo(error);
+      if (rateLimitInfo?.delayMs !== undefined) {
         debugLogger.warn(
-          `Attempt ${attempt} failed with TPM throttling error. Retrying after ${tpmDelayMs}ms (1 minute)...`,
+          `Attempt ${attempt} failed with rate limit error. Retrying after ${rateLimitInfo.delayMs}ms...`,
           error,
         );
-        await delay(tpmDelayMs);
-        // Reset currentDelay for next potential non-TPM error
+        await delay(rateLimitInfo.delayMs);
+        // Reset currentDelay for next potential non-rate-limit error
         currentDelay = initialDelayMs;
         continue;
       }
 
       // Check if we shouldn't retry based on error type
-      if (!shouldRetryOnError(error as Error)) {
+      if (!rateLimitInfo && !shouldRetryOnError(error as Error)) {
         throw error;
       }
 
@@ -179,12 +191,162 @@ export function isTPMThrottlingError(error: unknown): boolean {
   const checkMessage = (msg: string) => msg.includes('Throttling: TPM(');
 
   if (typeof error === 'string') return checkMessage(error);
+  if (error instanceof Error) return checkMessage(error.message);
   if (isStructuredError(error)) return checkMessage(error.message);
   if (isApiError(error)) return checkMessage(error.error.message);
 
   return false;
 }
 
+/**
+ * Checks if an error is a GLM rate limit error (code 1302).
+ *
+ * @param error The error object.
+ * @returns True if the error matches GLM rate limit code 1302.
+ */
+export function isGLMRateLimitError(error: unknown): boolean {
+  const matchesCode = (code: unknown): boolean =>
+    code !== undefined && String(code) === GLM_RATE_LIMIT_CODE;
+
+  if (isApiError(error)) {
+    return matchesCode(error.error.code);
+  }
+
+  if (isStructuredError(error) && !(error instanceof Error)) {
+    return false;
+  }
+
+  const message = getErrorMessage(error);
+  if (!message) {
+    return false;
+  }
+
+  const parsed = extractErrorDetailsFromString(message);
+  if (parsed && matchesCode(parsed.code)) {
+    return true;
+  }
+
+  return (
+    message.includes(`"code":"${GLM_RATE_LIMIT_CODE}"`) ||
+    message.includes(`"code":${GLM_RATE_LIMIT_CODE}`)
+  );
+}
+
+/**
+ * Checks if an error is a rate-limit / throttling error from any provider.
+ * This is a superset of isTPMThrottlingError that also covers:
+ * - GLM rate limit: {"error":{"code":"1302","message":"您的账户已达到速率限制..."}}
+ * - General throttling: "Throttling: ..."
+ * - English rate limit messages
+ *
+ * @param error The error object.
+ * @returns True if the error is a rate-limit or throttling error.
+ */
+export function isRateLimitThrottlingError(error: unknown): boolean {
+  if (isTPMThrottlingError(error)) return true;
+  if (isGLMRateLimitError(error)) return true;
+
+  const checkMessage = (msg: string): boolean => {
+    const lower = msg.toLowerCase();
+    return (
+      lower.includes('速率限制') ||
+      lower.includes('throttling:') ||
+      (lower.includes('rate') && lower.includes('limit'))
+    );
+  };
+
+  const message = getErrorMessage(error);
+  if (message) return checkMessage(message);
+
+  return false;
+}
+
+/**
+ * Returns rate-limit retry info when an error is detected as rate-limited.
+ * For TPM throttling errors, a fixed 60s delay is returned. For other
+ * provider rate-limit errors, delayMs is left undefined so callers can apply
+ * their own backoff strategy.
+ */
+export function getRateLimitRetryInfo(
+  error: unknown,
+): RateLimitRetryInfo | null {
+  if (!isRateLimitThrottlingError(error)) {
+    return null;
+  }
+
+  return {
+    reason: getRateLimitReason(error),
+    delayMs: isTPMThrottlingError(error) ? TPM_RATE_LIMIT_DELAY_MS : undefined,
+  };
+}
+
+function getRateLimitReason(error: unknown): string {
+  if (isApiError(error)) {
+    return error.error.message;
+  }
+
+  if (isStructuredError(error)) {
+    return error.message;
+  }
+
+  if (error instanceof Error) {
+    return extractReasonFromString(error.message);
+  }
+
+  if (typeof error === 'string') {
+    return extractReasonFromString(error);
+  }
+
+  return String(error);
+}
+
+function getErrorMessage(error: unknown): string | undefined {
+  if (typeof error === 'string') return error;
+  if (error instanceof Error) return error.message;
+  if (isStructuredError(error)) return error.message;
+  if (isApiError(error)) return error.error.message;
+  return undefined;
+}
+
+function extractReasonFromString(message: string): string {
+  const parsed = extractErrorDetailsFromString(message);
+  if (parsed?.message) {
+    return parsed.message;
+  }
+  return message;
+}
+
+function extractErrorDetailsFromString(
+  message: string,
+): { code?: unknown; message?: string } | null {
+  const trimmed = message.trim().replace(/^data:\s*/i, '');
+  if (!trimmed.startsWith('{')) {
+    return null;
+  }
+  try {
+    const parsed = JSON.parse(trimmed) as unknown;
+    if (!parsed || typeof parsed !== 'object') {
+      return null;
+    }
+    const errorObject =
+      'error' in parsed &&
+      typeof (parsed as { error?: unknown }).error === 'object'
+        ? (parsed as { error: Record<string, unknown> }).error
+        : (parsed as Record<string, unknown>);
+    const code = errorObject?.['code'];
+    const messageValue =
+      typeof errorObject?.['message'] === 'string'
+        ? errorObject['message']
+        : undefined;
+    if (code === undefined && messageValue === undefined) {
+      return null;
+    }
+    return { code, message: messageValue };
+  } catch {
+    return null;
+  }
+}
+
 /**
  * Extracts the HTTP status code from an error object.
  *