mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-28 03:49:31 +00:00
The _run_with_restart wrapper in all 8 DigitalOcean agent scripts catches SIGTERM/SIGKILL exit codes (143/137) and retries the orchestration process. In headless mode (E2E tests), when the provision timeout kills the process, this restart loop would re-run main.ts, creating duplicate droplets and exhausting the account's droplet quota — causing ALL subsequent DO agents to fail provisioning. Skip the restart loop entirely when SPAWN_HEADLESS=1 (set by runScriptHeadless in the CLI). The restart behavior is only useful for interactive sessions where the user's SSH connection drops. Fixes #2794 Agent: code-health Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
84 lines
3.4 KiB
Bash
Executable file
84 lines
3.4 KiB
Bash
Executable file
#!/bin/bash
|
|
set -eo pipefail
|
|
|
|
# Thin shim: ensures bun is available, runs bundled digitalocean.js (local or from GitHub release)
|
|
# Includes restart loop for SIGTERM recovery on DigitalOcean
|
|
|
|
_AGENT_NAME="openclaw"
|
|
_MAX_RETRIES=3
|
|
|
|
_ensure_bun() {
|
|
if command -v bun &>/dev/null; then return 0; fi
|
|
printf '\033[0;36mInstalling bun...\033[0m\n' >&2
|
|
curl -fsSL --proto '=https' --show-error https://bun.sh/install?version=1.3.9 | bash >/dev/null || { printf '\033[0;31mFailed to install bun\033[0m\n' >&2; exit 1; }
|
|
export PATH="$HOME/.bun/bin:$PATH"
|
|
command -v bun &>/dev/null || { printf '\033[0;31mbun not found after install\033[0m\n' >&2; exit 1; }
|
|
}
|
|
|
|
# Run command in the foreground so bun gets full terminal access (raw mode,
|
|
# arrow keys for interactive prompts). The old pattern backgrounded the child
|
|
# with & + wait so a SIGTERM trap could forward the signal, but that removed
|
|
# bun from the foreground process group and broke @clack/prompts multiselect.
|
|
# Now SIGTERM is detected from exit code 143 (128 + 15) after the child exits.
|
|
_run_with_restart() {
|
|
# In headless mode (E2E / --headless), skip the restart loop entirely.
|
|
# Restarting in headless mode creates duplicate droplets, exhausting the
|
|
# account's droplet quota and causing all subsequent agents to fail.
|
|
if [ "${SPAWN_HEADLESS:-}" = "1" ]; then
|
|
"$@"
|
|
return $?
|
|
fi
|
|
|
|
local attempt=0
|
|
local backoff=2
|
|
while [ "$attempt" -lt "$_MAX_RETRIES" ]; do
|
|
attempt=$((attempt + 1))
|
|
|
|
"$@"
|
|
local exit_code=$?
|
|
|
|
# Normal exit
|
|
if [ "$exit_code" -eq 0 ]; then
|
|
return 0
|
|
fi
|
|
|
|
# SIGTERM (143) or SIGKILL (137) — attempt restart
|
|
if [ "$exit_code" -eq 143 ] || [ "$exit_code" -eq 137 ]; then
|
|
printf '\033[0;33m[spawn/%s] Agent process terminated (exit %s). The droplet is likely still running.\033[0m\n' \
|
|
"$_AGENT_NAME" "$exit_code" >&2
|
|
printf '\033[0;33m[spawn/%s] Check your DigitalOcean dashboard: https://cloud.digitalocean.com/droplets\033[0m\n' \
|
|
"$_AGENT_NAME" >&2
|
|
if [ "$attempt" -lt "$_MAX_RETRIES" ]; then
|
|
printf '\033[0;33m[spawn/%s] Restarting (attempt %s/%s, backoff %ss)...\033[0m\n' \
|
|
"$_AGENT_NAME" "$((attempt + 1))" "$_MAX_RETRIES" "$backoff" >&2
|
|
sleep "$backoff"
|
|
backoff=$((backoff * 2))
|
|
continue
|
|
else
|
|
printf '\033[0;31m[spawn/%s] Max restart attempts reached (%s). Giving up.\033[0m\n' \
|
|
"$_AGENT_NAME" "$_MAX_RETRIES" >&2
|
|
return "$exit_code"
|
|
fi
|
|
fi
|
|
|
|
# Other failure — exit with the original code
|
|
return "$exit_code"
|
|
done
|
|
}
|
|
|
|
_ensure_bun
|
|
|
|
# SPAWN_CLI_DIR override — force local source (used by e2e tests)
|
|
if [[ -n "${SPAWN_CLI_DIR:-}" && -f "$SPAWN_CLI_DIR/packages/cli/src/digitalocean/main.ts" ]]; then
|
|
_run_with_restart bun run "$SPAWN_CLI_DIR/packages/cli/src/digitalocean/main.ts" "$_AGENT_NAME" "$@"
|
|
exit $?
|
|
fi
|
|
|
|
# Remote — download bundled digitalocean.js from GitHub release
|
|
DO_JS=$(mktemp)
|
|
trap 'rm -f "$DO_JS"' EXIT
|
|
curl -fsSL --proto '=https' "https://github.com/OpenRouterTeam/spawn/releases/download/digitalocean-latest/digitalocean.js" -o "$DO_JS" \
|
|
|| { printf '\033[0;31mFailed to download digitalocean.js\033[0m\n' >&2; exit 1; }
|
|
|
|
_run_with_restart bun run "$DO_JS" "$_AGENT_NAME" "$@"
|
|
exit $?
|