fix(core): align retry diagnostics with retry policy

This commit is contained in:
yiliang114 2026-05-18 13:26:54 +08:00
parent f3c8d0ca55
commit b8b3ac8b2e
5 changed files with 68 additions and 25 deletions

View file

@ -796,6 +796,7 @@ export class GeminiChat {
const details = getRateLimitErrorDetails(error);
const classification = classifyRetryError(error, {
authType: cgConfig?.authType,
extraRetryErrorCodes,
});
// The classifier is observation-only here; stream retry control
// remains governed by isRateLimitError and the retry budget.

View file

@ -243,7 +243,7 @@ export async function retryWithBackoff<T>(
// Classification is diagnostic-only in this PR; retry control still
// follows shouldRetryOnError and the persistent retry policy below.
const retryClassification = classifyRetryError(error, { authType });
const retryDiagnostics = classifyRetryError(error, { authType });
// Determine if this error qualifies for persistent retry.
// Persistent mode still respects shouldRetryOnError — callers can force
@ -286,7 +286,7 @@ export async function retryWithBackoff<T>(
debugLogger.warn(
`[Persistent] Attempt ${reportedAttempt} failed with status ${errorStatus ?? 'unknown'}. ` +
`Retrying in ${Math.ceil(delayMs / 1000)}s...`,
retryClassification,
retryDiagnostics,
error,
);
@ -311,7 +311,7 @@ export async function retryWithBackoff<T>(
if (retryAfterMs !== null && retryAfterMs > 0) {
debugLogger.warn(
`Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${retryAfterMs}ms...`,
retryClassification,
retryDiagnostics,
error,
);
// Normal HTTP retries intentionally preserve provider-directed
@ -321,7 +321,7 @@ export async function retryWithBackoff<T>(
await delay(retryAfterMs, signal);
currentDelay = initialDelayMs;
} else {
logRetryAttempt(attempt, error, retryClassification, errorStatus);
logRetryAttempt(attempt, error, retryDiagnostics, errorStatus);
const delayMs = getRetryDelayMs({
// attempt: 1 — currentDelay already tracks exponential growth;
// getRetryDelayMs is called here only for jitter calculation.
@ -350,7 +350,7 @@ export async function retryWithBackoff<T>(
function logRetryAttempt(
attempt: number,
error: unknown,
retryClassification: ReturnType<typeof classifyRetryError>,
retryDiagnostics: ReturnType<typeof classifyRetryError>,
errorStatus?: number,
): void {
const message = errorStatus
@ -358,10 +358,10 @@ function logRetryAttempt(
: `Attempt ${attempt} failed. Retrying with backoff...`;
if (errorStatus === 429) {
debugLogger.warn(message, retryClassification, error);
debugLogger.warn(message, retryDiagnostics, error);
} else if (errorStatus && errorStatus >= 500 && errorStatus < 600) {
debugLogger.error(message, retryClassification, error);
debugLogger.error(message, retryDiagnostics, error);
} else {
debugLogger.warn(message, retryClassification, error);
debugLogger.warn(message, retryDiagnostics, error);
}
}

View file

@ -59,6 +59,21 @@ describe('classifyRetryError', () => {
});
});
it('honors caller-provided extra rate-limit codes in diagnostics', () => {
expect(
classifyRetryError(
{ error: { code: 4999, message: 'Provider-specific throttle' } },
{ extraRetryErrorCodes: [4999] },
),
).toMatchObject({
kind: 'provider',
diagnosis: 'retryable',
providerCode: '4999',
providerMessage: 'Provider-specific throttle',
reason: 'rate-limit',
});
});
it('classifies SSE-embedded non-quota 429 errors as retryable rate limiting', () => {
const error = new Error(
'id:1\nevent:error\n:HTTP_STATUS/429\ndata:{"request_id":"req-1","code":"Throttling.RateLimit","message":"Rate limit exceeded"}',
@ -277,6 +292,22 @@ describe('classifyRetryError', () => {
expect(classification).not.toHaveProperty('providerMessage');
});
it('extracts provider fields from Error instances with direct SDK properties', () => {
const error = Object.assign(new Error('Provider-specific throttle'), {
code: 'Throttling.Custom',
request_id: 'req-direct-error',
});
expect(classifyRetryError(error)).toMatchObject({
kind: 'unknown',
diagnosis: 'unknown',
providerCode: 'Throttling.Custom',
providerMessage: 'Provider-specific throttle',
requestId: 'req-direct-error',
reason: 'unclassified',
});
});
it('does not copy unparsed SSE frames into providerMessage', () => {
const classification = classifyRetryError(
new Error('id:1\nevent:error\n:HTTP_STATUS/429\ndata:not-json'),

View file

@ -26,6 +26,7 @@ export type RetryErrorDiagnosis =
export interface RetryErrorClassificationContext {
authType?: AuthType | string;
extraRetryErrorCodes?: readonly number[];
}
export interface RetryErrorClassification {
@ -92,7 +93,7 @@ export function classifyRetryError(
};
}
if (isRateLimitError(error)) {
if (isRateLimitError(error, context.extraRetryErrorCodes)) {
const kind: RetryErrorKind =
details.transport === 'sse'
? 'sse-provider'
@ -237,28 +238,37 @@ function getProviderFields(error: unknown): ProviderFields {
return {};
}
if (error instanceof Error) {
return {};
}
const source = error as {
code?: unknown;
message?: unknown;
request_id?: unknown;
requestId?: unknown;
};
const rawCode =
typeof source.code === 'string' || typeof source.code === 'number'
? String(source.code)
: undefined;
const providerCode =
error instanceof Error && rawCode?.startsWith('ERR_')
? undefined
: rawCode;
const requestId =
typeof source.request_id === 'string'
? source.request_id
: typeof source.requestId === 'string'
? source.requestId
: undefined;
const providerMessage =
typeof source.message === 'string' &&
(!(error instanceof Error) ||
providerCode !== undefined ||
requestId !== undefined)
? source.message
: undefined;
return {
...(typeof source.code === 'string' || typeof source.code === 'number'
? { providerCode: String(source.code) }
: {}),
...(typeof source.message === 'string'
? { providerMessage: source.message }
: {}),
...(typeof source.request_id === 'string'
? { requestId: source.request_id }
: typeof source.requestId === 'string'
? { requestId: source.requestId }
: {}),
...(providerCode !== undefined ? { providerCode } : {}),
...(providerMessage !== undefined ? { providerMessage } : {}),
...(requestId !== undefined ? { requestId } : {}),
};
}

View file

@ -22,7 +22,8 @@ export interface RetryDelayPolicyOptions {
*
* Retry-After handling depends on `retryAfterMode`:
* - `'ignore'` (default): do not parse Retry-After; always return the
* exponential delay (with optional jitter).
* exponential delay (with optional jitter). Passing `error` alone does not
* enable Retry-After handling.
* - `'minimum'`: use Retry-After as a floor on the exponential delay.
*
* When Retry-After is honored, `jitterRatio` is intentionally not applied