fix(core): align retry diagnostics with retry policy

2026-05-19 07:54:38 +00:00 · 2026-05-18 13:26:54 +08:00 · 2026-05-18 13:26:54 +08:00 · b8b3ac8b2e
commit b8b3ac8b2e
parent f3c8d0ca55
5 changed files with 68 additions and 25 deletions
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@ -796,6 +796,7 @@ export class GeminiChat {
              const details = getRateLimitErrorDetails(error);
              const classification = classifyRetryError(error, {
                authType: cgConfig?.authType,
+                extraRetryErrorCodes,
              });
              // The classifier is observation-only here; stream retry control
              // remains governed by isRateLimitError and the retry budget.
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@ -243,7 +243,7 @@ export async function retryWithBackoff<T>(

      // Classification is diagnostic-only in this PR; retry control still
      // follows shouldRetryOnError and the persistent retry policy below.
-      const retryClassification = classifyRetryError(error, { authType });
+      const retryDiagnostics = classifyRetryError(error, { authType });

      // Determine if this error qualifies for persistent retry.
      // Persistent mode still respects shouldRetryOnError — callers can force
@ -286,7 +286,7 @@ export async function retryWithBackoff<T>(
        debugLogger.warn(
          `[Persistent] Attempt ${reportedAttempt} failed with status ${errorStatus ?? 'unknown'}. ` +
            `Retrying in ${Math.ceil(delayMs / 1000)}s...`,
-          retryClassification,
+          retryDiagnostics,
          error,
        );

@ -311,7 +311,7 @@ export async function retryWithBackoff<T>(
        if (retryAfterMs !== null && retryAfterMs > 0) {
          debugLogger.warn(
            `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`,
-            retryClassification,
+            retryDiagnostics,
            error,
          );
          // Normal HTTP retries intentionally preserve provider-directed
@ -321,7 +321,7 @@ export async function retryWithBackoff<T>(
          await delay(retryAfterMs, signal);
          currentDelay = initialDelayMs;
        } else {
-          logRetryAttempt(attempt, error, retryClassification, errorStatus);
+          logRetryAttempt(attempt, error, retryDiagnostics, errorStatus);
          const delayMs = getRetryDelayMs({
            // attempt: 1 — currentDelay already tracks exponential growth;
            // getRetryDelayMs is called here only for jitter calculation.
@ -350,7 +350,7 @@ export async function retryWithBackoff<T>(
 function logRetryAttempt(
  attempt: number,
  error: unknown,
-  retryClassification: ReturnType<typeof classifyRetryError>,
+  retryDiagnostics: ReturnType<typeof classifyRetryError>,
  errorStatus?: number,
 ): void {
  const message = errorStatus
@ -358,10 +358,10 @@ function logRetryAttempt(
    : `Attempt ${attempt} failed. Retrying with backoff...`;

  if (errorStatus === 429) {
-    debugLogger.warn(message, retryClassification, error);
+    debugLogger.warn(message, retryDiagnostics, error);
  } else if (errorStatus && errorStatus >= 500 && errorStatus < 600) {
-    debugLogger.error(message, retryClassification, error);
+    debugLogger.error(message, retryDiagnostics, error);
  } else {
-    debugLogger.warn(message, retryClassification, error);
+    debugLogger.warn(message, retryDiagnostics, error);
  }
 }
--- a/packages/core/src/utils/retryErrorClassification.test.ts
+++ b/packages/core/src/utils/retryErrorClassification.test.ts
@ -59,6 +59,21 @@ describe('classifyRetryError', () => {
    });
  });

+  it('honors caller-provided extra rate-limit codes in diagnostics', () => {
+    expect(
+      classifyRetryError(
+        { error: { code: 4999, message: 'Provider-specific throttle' } },
+        { extraRetryErrorCodes: [4999] },
+      ),
+    ).toMatchObject({
+      kind: 'provider',
+      diagnosis: 'retryable',
+      providerCode: '4999',
+      providerMessage: 'Provider-specific throttle',
+      reason: 'rate-limit',
+    });
+  });
+
  it('classifies SSE-embedded non-quota 429 errors as retryable rate limiting', () => {
    const error = new Error(
      'id:1\nevent:error\n:HTTP_STATUS/429\ndata:{"request_id":"req-1","code":"Throttling.RateLimit","message":"Rate limit exceeded"}',
@ -277,6 +292,22 @@ describe('classifyRetryError', () => {
    expect(classification).not.toHaveProperty('providerMessage');
  });

+  it('extracts provider fields from Error instances with direct SDK properties', () => {
+    const error = Object.assign(new Error('Provider-specific throttle'), {
+      code: 'Throttling.Custom',
+      request_id: 'req-direct-error',
+    });
+
+    expect(classifyRetryError(error)).toMatchObject({
+      kind: 'unknown',
+      diagnosis: 'unknown',
+      providerCode: 'Throttling.Custom',
+      providerMessage: 'Provider-specific throttle',
+      requestId: 'req-direct-error',
+      reason: 'unclassified',
+    });
+  });
+
  it('does not copy unparsed SSE frames into providerMessage', () => {
    const classification = classifyRetryError(
      new Error('id:1\nevent:error\n:HTTP_STATUS/429\ndata:not-json'),
--- a/packages/core/src/utils/retryErrorClassification.ts
+++ b/packages/core/src/utils/retryErrorClassification.ts
@ -26,6 +26,7 @@ export type RetryErrorDiagnosis =

 export interface RetryErrorClassificationContext {
  authType?: AuthType | string;
+  extraRetryErrorCodes?: readonly number[];
 }

 export interface RetryErrorClassification {
@ -92,7 +93,7 @@ export function classifyRetryError(
    };
  }

-  if (isRateLimitError(error)) {
+  if (isRateLimitError(error, context.extraRetryErrorCodes)) {
    const kind: RetryErrorKind =
      details.transport === 'sse'
        ? 'sse-provider'
@ -237,28 +238,37 @@ function getProviderFields(error: unknown): ProviderFields {
    return {};
  }

-  if (error instanceof Error) {
-    return {};
-  }
-
  const source = error as {
    code?: unknown;
    message?: unknown;
    request_id?: unknown;
    requestId?: unknown;
  };
+  const rawCode =
+    typeof source.code === 'string' || typeof source.code === 'number'
+      ? String(source.code)
+      : undefined;
+  const providerCode =
+    error instanceof Error && rawCode?.startsWith('ERR_')
+      ? undefined
+      : rawCode;
+  const requestId =
+    typeof source.request_id === 'string'
+      ? source.request_id
+      : typeof source.requestId === 'string'
+        ? source.requestId
+        : undefined;
+  const providerMessage =
+    typeof source.message === 'string' &&
+    (!(error instanceof Error) ||
+      providerCode !== undefined ||
+      requestId !== undefined)
+      ? source.message
+      : undefined;

  return {
-    ...(typeof source.code === 'string' || typeof source.code === 'number'
-      ? { providerCode: String(source.code) }
-      : {}),
-    ...(typeof source.message === 'string'
-      ? { providerMessage: source.message }
-      : {}),
-    ...(typeof source.request_id === 'string'
-      ? { requestId: source.request_id }
-      : typeof source.requestId === 'string'
-        ? { requestId: source.requestId }
-        : {}),
+    ...(providerCode !== undefined ? { providerCode } : {}),
+    ...(providerMessage !== undefined ? { providerMessage } : {}),
+    ...(requestId !== undefined ? { requestId } : {}),
  };
 }
--- a/packages/core/src/utils/retryPolicy.ts
+++ b/packages/core/src/utils/retryPolicy.ts
@ -22,7 +22,8 @@ export interface RetryDelayPolicyOptions {
 *
 * Retry-After handling depends on `retryAfterMode`:
 *   - `'ignore'` (default): do not parse Retry-After; always return the
- *     exponential delay (with optional jitter).
+ *     exponential delay (with optional jitter). Passing `error` alone does not
+ *     enable Retry-After handling.
 *   - `'minimum'`: use Retry-After as a floor on the exponential delay.
 *
 * When Retry-After is honored, `jitterRatio` is intentionally not applied —