diff --git a/cli/src/commands.ts b/cli/src/commands.ts index 919863bb..f40b5864 100644 --- a/cli/src/commands.ts +++ b/cli/src/commands.ts @@ -657,6 +657,37 @@ export function isRetryableExitCode(errMsg: string): boolean { return code === 255; } +function handleUserInterrupt(errMsg: string): void { + if (!errMsg.includes("interrupted by user")) return; + console.error(); + p.log.warn("Script interrupted (Ctrl+C)."); + p.log.warn("If a server was already created, it may still be running."); + p.log.warn(` Check your cloud provider dashboard to stop or delete any unused servers.`); + process.exit(130); +} + +async function runWithRetries(script: string, prompt?: string): Promise { + for (let attempt = 1; attempt <= MAX_RETRIES + 1; attempt++) { + try { + await runBash(script, prompt); + return undefined; // success + } catch (err) { + const errMsg = getErrorMessage(err); + handleUserInterrupt(errMsg); + + if (attempt <= MAX_RETRIES && isRetryableExitCode(errMsg)) { + const delay = RETRY_DELAYS[attempt - 1]; + p.log.warn(`Script failed (${errMsg}). Retrying in ${delay}s (attempt ${attempt + 1}/${MAX_RETRIES + 1})...`); + await new Promise(r => setTimeout(r, delay * 1000)); + continue; + } + + return errMsg; + } + } + return "Script failed after all retries"; +} + async function execScript(cloud: string, agent: string, prompt?: string, authHint?: string): Promise { const url = `https://openrouter.ai/labs/spawn/${cloud}/${agent}.sh`; const ghUrl = `${RAW_BASE}/${cloud}/${agent}.sh`; @@ -680,36 +711,10 @@ async function execScript(cloud: string, agent: string, prompt?: string, authHin // Non-fatal: don't block the spawn if history write fails } - let lastErr: string | undefined; - for (let attempt = 1; attempt <= MAX_RETRIES + 1; attempt++) { - try { - await runBash(scriptContent, prompt); - return; // success - } catch (err) { - const errMsg = getErrorMessage(err); - if (errMsg.includes("interrupted by user")) { - console.error(); - p.log.warn("Script interrupted (Ctrl+C)."); - p.log.warn("If a server was already created, it may still be running."); - p.log.warn(` Check your cloud provider dashboard to stop or delete any unused servers.`); - process.exit(130); - } - lastErr = errMsg; - - // Only retry for potentially transient failures - if (attempt <= MAX_RETRIES && isRetryableExitCode(errMsg)) { - const delay = RETRY_DELAYS[attempt - 1]; - p.log.warn(`Script failed (${errMsg}). Retrying in ${delay}s (attempt ${attempt + 1}/${MAX_RETRIES + 1})...`); - await new Promise(r => setTimeout(r, delay * 1000)); - continue; - } - - // Non-retryable or out of retries - break; - } + const lastErr = await runWithRetries(scriptContent, prompt); + if (lastErr) { + reportScriptFailure(lastErr, cloud, agent, authHint, prompt); } - - reportScriptFailure(lastErr!, cloud, agent, authHint, prompt); } function runBash(script: string, prompt?: string): Promise { diff --git a/shared/common.sh b/shared/common.sh index 2619f8d5..b602d945 100644 --- a/shared/common.sh +++ b/shared/common.sh @@ -1176,6 +1176,34 @@ _make_api_request() { # Retries on: 429 (rate limit), 503 (service unavailable), network errors # Internal retry loop shared by generic_cloud_api and generic_cloud_api_custom_auth # Usage: _cloud_api_retry_loop REQUEST_FUNC MAX_RETRIES API_DESCRIPTION [REQUEST_FUNC_ARGS...] +# Classify the result of an API request attempt. +# Returns a retry reason string on stdout if the request failed with a retryable error, +# or empty string on success. Caller checks the return string. +_classify_api_result() { + local curl_ok="${1}" + if [[ "${curl_ok}" != "0" ]]; then + echo "Cloud API network error" + elif [[ "${API_HTTP_CODE}" == "429" ]]; then + echo "Cloud API returned rate limit (HTTP 429)" + elif [[ "${API_HTTP_CODE}" == "503" ]]; then + echo "Cloud API returned service unavailable (HTTP 503)" + fi +} + +# Report a final API failure after retries are exhausted +_report_api_failure() { + local retry_reason="${1}" + local max_retries="${2}" + log_error "${retry_reason} after ${max_retries} attempts" + if [[ "${retry_reason}" == "Cloud API network error" ]]; then + log_warn "Check your internet connection and verify the provider's API is reachable." + else + log_warn "This is usually caused by rate limiting or temporary provider issues." + log_warn "Wait a minute and try again, or check the provider's status page." + echo "${API_RESPONSE_BODY}" + fi +} + _cloud_api_retry_loop() { local request_func="${1}" local max_retries="${2}" @@ -1187,30 +1215,19 @@ _cloud_api_retry_loop() { local max_interval=30 while [[ "${attempt}" -le "${max_retries}" ]]; do - local retry_reason="" + local curl_ok=0 + "${request_func}" "$@" || curl_ok=$? - if ! "${request_func}" "$@"; then - retry_reason="Cloud API network error" - elif [[ "${API_HTTP_CODE}" == "429" ]]; then - retry_reason="Cloud API returned rate limit (HTTP 429)" - elif [[ "${API_HTTP_CODE}" == "503" ]]; then - retry_reason="Cloud API returned service unavailable (HTTP 503)" - fi + local retry_reason + retry_reason=$(_classify_api_result "${curl_ok}") - # Success — no retryable error if [[ -z "${retry_reason}" ]]; then echo "${API_RESPONSE_BODY}" return 0 fi - # Retry or fail if ! _api_should_retry_on_error "${attempt}" "${max_retries}" "${interval}" "${max_interval}" "${retry_reason}"; then - log_error "${retry_reason} after ${max_retries} attempts" - if [[ "${retry_reason}" == "Cloud API network error" ]]; then - log_warn "Check your internet connection and verify the provider's API is reachable." - else - echo "${API_RESPONSE_BODY}" - fi + _report_api_failure "${retry_reason}" "${max_retries}" return 1 fi _update_retry_interval interval max_interval