fix: add exponential backoff to withRetry, bump install retries to 4 (#2634)

Fixes Connection reset by peer failures on spotty networks by doubling
delay on each retry (10s→20s→40s→80s) and giving installAgent and
uploadConfigFile 4 attempts instead of 2.

Fixes #2631

Agent: ux-engineer

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
A 2026-03-14 16:11:53 -07:00 committed by GitHub
parent cef7c69522
commit 5ceffbc519
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 7 additions and 4 deletions

View file

@ -51,7 +51,7 @@ async function installAgent(
): Promise<void> {
logStep(`Installing ${agentName}...`);
const r = await asyncTryCatch(() =>
withRetry(`${agentName} install`, () => wrapSshCall(runner.runServer(installCmd, timeoutSecs)), 2, 10),
withRetry(`${agentName} install`, () => wrapSshCall(runner.runServer(installCmd, timeoutSecs)), 4, 10, true),
);
if (!r.ok) {
logError(`${agentName} installation failed`);
@ -82,8 +82,9 @@ async function uploadConfigFile(runner: CloudRunner, content: string, remotePath
);
})(),
),
2,
4,
5,
true,
),
);
tryCatchIf(isOperationalError, () => unlinkSync(tmpFile));

View file

@ -213,6 +213,7 @@ export async function withRetry<T>(
fn: () => Promise<Result<T>>,
maxAttempts = 3,
delaySec = 5,
exponential = false,
): Promise<T> {
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const result = await fn(); // throws → not retried (non-retryable)
@ -222,8 +223,9 @@ export async function withRetry<T>(
if (attempt >= maxAttempts) {
throw result.error;
}
logWarn(`${label} failed (attempt ${attempt}/${maxAttempts}), retrying in ${delaySec}s...`);
await new Promise((r) => setTimeout(r, delaySec * 1000));
const delay = exponential ? delaySec * 2 ** (attempt - 1) : delaySec;
logWarn(`${label} failed (attempt ${attempt}/${maxAttempts}), retrying in ${delay}s...`);
await new Promise((r) => setTimeout(r, delay * 1000));
}
throw new Error("unreachable");
}