spawn/sh/digitalocean/openclaw.sh
A 66036bfac9
fix(do): skip _run_with_restart in headless mode to prevent duplicate droplets (#2805)
The _run_with_restart wrapper in all 8 DigitalOcean agent scripts catches
SIGTERM/SIGKILL exit codes (143/137) and retries the orchestration process.
In headless mode (E2E tests), when the provision timeout kills the process,
this restart loop would re-run main.ts, creating duplicate droplets and
exhausting the account's droplet quota — causing ALL subsequent DO agents
to fail provisioning.

Skip the restart loop entirely when SPAWN_HEADLESS=1 (set by runScriptHeadless
in the CLI). The restart behavior is only useful for interactive sessions
where the user's SSH connection drops.

Fixes #2794

Agent: code-health

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 16:12:25 -07:00

84 lines
3.4 KiB
Bash
Executable file

#!/bin/bash
set -eo pipefail
# Thin shim: ensures bun is available, runs bundled digitalocean.js (local or from GitHub release)
# Includes restart loop for SIGTERM recovery on DigitalOcean
_AGENT_NAME="openclaw"
_MAX_RETRIES=3
_ensure_bun() {
if command -v bun &>/dev/null; then return 0; fi
printf '\033[0;36mInstalling bun...\033[0m\n' >&2
curl -fsSL --proto '=https' --show-error https://bun.sh/install?version=1.3.9 | bash >/dev/null || { printf '\033[0;31mFailed to install bun\033[0m\n' >&2; exit 1; }
export PATH="$HOME/.bun/bin:$PATH"
command -v bun &>/dev/null || { printf '\033[0;31mbun not found after install\033[0m\n' >&2; exit 1; }
}
# Run command in the foreground so bun gets full terminal access (raw mode,
# arrow keys for interactive prompts). The old pattern backgrounded the child
# with & + wait so a SIGTERM trap could forward the signal, but that removed
# bun from the foreground process group and broke @clack/prompts multiselect.
# Now SIGTERM is detected from exit code 143 (128 + 15) after the child exits.
_run_with_restart() {
# In headless mode (E2E / --headless), skip the restart loop entirely.
# Restarting in headless mode creates duplicate droplets, exhausting the
# account's droplet quota and causing all subsequent agents to fail.
if [ "${SPAWN_HEADLESS:-}" = "1" ]; then
"$@"
return $?
fi
local attempt=0
local backoff=2
while [ "$attempt" -lt "$_MAX_RETRIES" ]; do
attempt=$((attempt + 1))
"$@"
local exit_code=$?
# Normal exit
if [ "$exit_code" -eq 0 ]; then
return 0
fi
# SIGTERM (143) or SIGKILL (137) — attempt restart
if [ "$exit_code" -eq 143 ] || [ "$exit_code" -eq 137 ]; then
printf '\033[0;33m[spawn/%s] Agent process terminated (exit %s). The droplet is likely still running.\033[0m\n' \
"$_AGENT_NAME" "$exit_code" >&2
printf '\033[0;33m[spawn/%s] Check your DigitalOcean dashboard: https://cloud.digitalocean.com/droplets\033[0m\n' \
"$_AGENT_NAME" >&2
if [ "$attempt" -lt "$_MAX_RETRIES" ]; then
printf '\033[0;33m[spawn/%s] Restarting (attempt %s/%s, backoff %ss)...\033[0m\n' \
"$_AGENT_NAME" "$((attempt + 1))" "$_MAX_RETRIES" "$backoff" >&2
sleep "$backoff"
backoff=$((backoff * 2))
continue
else
printf '\033[0;31m[spawn/%s] Max restart attempts reached (%s). Giving up.\033[0m\n' \
"$_AGENT_NAME" "$_MAX_RETRIES" >&2
return "$exit_code"
fi
fi
# Other failure — exit with the original code
return "$exit_code"
done
}
_ensure_bun
# SPAWN_CLI_DIR override — force local source (used by e2e tests)
if [[ -n "${SPAWN_CLI_DIR:-}" && -f "$SPAWN_CLI_DIR/packages/cli/src/digitalocean/main.ts" ]]; then
_run_with_restart bun run "$SPAWN_CLI_DIR/packages/cli/src/digitalocean/main.ts" "$_AGENT_NAME" "$@"
exit $?
fi
# Remote — download bundled digitalocean.js from GitHub release
DO_JS=$(mktemp)
trap 'rm -f "$DO_JS"' EXIT
curl -fsSL --proto '=https' "https://github.com/OpenRouterTeam/spawn/releases/download/digitalocean-latest/digitalocean.js" -o "$DO_JS" \
|| { printf '\033[0;31mFailed to download digitalocean.js\033[0m\n' >&2; exit 1; }
_run_with_restart bun run "$DO_JS" "$_AGENT_NAME" "$@"
exit $?