mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-08 01:51:14 +00:00
fix: Use Sprite public URL for keep-alive instead of localhost
Localhost pings (curl http://localhost:8080/health) bypass the Sprite proxy entirely and don't register as "actively servicing HTTP requests." Per Sprite lifecycle rules, VMs pause when there's no inbound HTTP through the proxy and no detachable session output — so the old keep-alive was doing nothing. Now both discovery.sh and refactor.sh resolve the Sprite's public URL via `sprite-env info` and ping that instead. The request routes through the Sprite proxy, which counts as real activity and prevents pause. Also adds keep-alive to discovery.sh (previously had none at all). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b20a172ba4
commit
4d15470f8f
2 changed files with 66 additions and 6 deletions
|
|
@ -39,11 +39,46 @@ log_info() { printf "${GREEN}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m
|
|||
log_warn() { printf "${YELLOW}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m-%d %H:%M:%S')] [discovery] WARN: $1" >> "${LOG_FILE}"; }
|
||||
log_error() { printf "${RED}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m-%d %H:%M:%S')] [discovery] ERROR: $1" >> "${LOG_FILE}"; }
|
||||
|
||||
# --- Keep-alive: ping the Sprite's PUBLIC URL to prevent VM pause ---
|
||||
# Sprite only counts inbound HTTP requests through its proxy as "active."
|
||||
# Localhost requests bypass the proxy and do NOT prevent the VM from pausing.
|
||||
# We must hit the public URL so the request routes through the Sprite proxy.
|
||||
KEEPALIVE_PID=""
|
||||
SPRITE_PUBLIC_URL=""
|
||||
start_keepalive() {
|
||||
SPRITE_PUBLIC_URL=$(sprite-env info 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['sprite_url'])" 2>/dev/null) || SPRITE_PUBLIC_URL=""
|
||||
|
||||
if [[ -z "${SPRITE_PUBLIC_URL}" ]]; then
|
||||
log_warn "Could not resolve Sprite public URL — keep-alive will use localhost (may not prevent pause)"
|
||||
SPRITE_PUBLIC_URL="http://localhost:8080"
|
||||
else
|
||||
log_info "Keep-alive will ping: ${SPRITE_PUBLIC_URL}/health"
|
||||
fi
|
||||
|
||||
(
|
||||
while true; do
|
||||
curl -sf "${SPRITE_PUBLIC_URL}/health" >/dev/null 2>&1 || true
|
||||
sleep 30
|
||||
done
|
||||
) &
|
||||
KEEPALIVE_PID=$!
|
||||
}
|
||||
stop_keepalive() {
|
||||
if [[ -n "${KEEPALIVE_PID}" ]]; then
|
||||
kill "${KEEPALIVE_PID}" 2>/dev/null || true
|
||||
wait "${KEEPALIVE_PID}" 2>/dev/null || true
|
||||
KEEPALIVE_PID=""
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Cleanup trap (from refactor.sh) ---
|
||||
cleanup() {
|
||||
local exit_code=$?
|
||||
log_info "Running cleanup (exit_code=${exit_code})..."
|
||||
|
||||
# Stop keep-alive loop
|
||||
stop_keepalive
|
||||
|
||||
cd "${REPO_ROOT}" 2>/dev/null || true
|
||||
|
||||
# Prune worktrees and clean up only OUR worktree base
|
||||
|
|
@ -543,6 +578,10 @@ run_team_cycle() {
|
|||
# Substitute WORKTREE_BASE_PLACEHOLDER with actual worktree path
|
||||
sed -i "s|WORKTREE_BASE_PLACEHOLDER|${WORKTREE_BASE}|g" "${PROMPT_FILE}"
|
||||
|
||||
# Start keep-alive before launching claude (prevents Sprite from pausing the VM)
|
||||
start_keepalive
|
||||
log_info "Keep-alive started (pid=${KEEPALIVE_PID})"
|
||||
|
||||
log_info "Launching agent team..."
|
||||
log_info "Worktree base: ${WORKTREE_BASE}"
|
||||
log_info "Cycle timeout: ${CYCLE_TIMEOUT}s"
|
||||
|
|
@ -558,6 +597,9 @@ run_team_cycle() {
|
|||
claude -p "$(cat "${PROMPT_FILE}")" --dangerously-skip-permissions --model sonnet \
|
||||
2>&1 | tee -a "${LOG_FILE}" || CLAUDE_EXIT=$?
|
||||
|
||||
# Stop keep-alive now that the cycle is done
|
||||
stop_keepalive
|
||||
|
||||
if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then
|
||||
log_info "Cycle completed successfully"
|
||||
|
||||
|
|
@ -594,6 +636,10 @@ run_single_cycle() {
|
|||
PROMPT_FILE=$(mktemp /tmp/discovery-prompt-XXXXXX.md)
|
||||
build_single_prompt > "${PROMPT_FILE}"
|
||||
|
||||
# Start keep-alive before launching claude
|
||||
start_keepalive
|
||||
log_info "Keep-alive started (pid=${KEEPALIVE_PID})"
|
||||
|
||||
log_info "Launching single agent..."
|
||||
log_info "Cycle timeout: ${SINGLE_TIMEOUT}s"
|
||||
echo ""
|
||||
|
|
@ -606,6 +652,8 @@ run_single_cycle() {
|
|||
claude --print -p "$(cat "${PROMPT_FILE}")" --model sonnet \
|
||||
2>&1 | tee -a "${LOG_FILE}" || CLAUDE_EXIT=$?
|
||||
|
||||
stop_keepalive
|
||||
|
||||
if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then
|
||||
log_info "Single cycle completed successfully"
|
||||
sprite-env checkpoint create --comment "discovery single cycle complete" 2>&1 | tee -a "${LOG_FILE}" || true
|
||||
|
|
|
|||
|
|
@ -60,16 +60,28 @@ cleanup() {
|
|||
|
||||
trap cleanup EXIT SIGTERM SIGINT
|
||||
|
||||
# --- Keep-alive: ping the trigger server's /health endpoint periodically ---
|
||||
# Sprite pauses/stops VMs that have no HTTP activity. When claude is running
|
||||
# (waiting on API calls), there may be no inbound requests for long stretches,
|
||||
# causing Sprite to freeze the VM mid-cycle. This background loop ensures
|
||||
# continuous HTTP activity so Sprite keeps the VM alive.
|
||||
# --- Keep-alive: ping the Sprite's PUBLIC URL to prevent VM pause ---
|
||||
# Sprite only counts inbound HTTP requests through its proxy as "active."
|
||||
# Localhost requests (curl http://localhost:8080/health) bypass the proxy
|
||||
# entirely and do NOT prevent the VM from pausing. We must hit the public
|
||||
# URL so the request routes through the Sprite proxy infrastructure.
|
||||
KEEPALIVE_PID=""
|
||||
SPRITE_PUBLIC_URL=""
|
||||
start_keepalive() {
|
||||
# Resolve the Sprite's public URL from sprite-env info
|
||||
SPRITE_PUBLIC_URL=$(sprite-env info 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['sprite_url'])" 2>/dev/null) || SPRITE_PUBLIC_URL=""
|
||||
|
||||
if [[ -z "${SPRITE_PUBLIC_URL}" ]]; then
|
||||
log "WARNING: Could not resolve Sprite public URL — keep-alive will use localhost (may not prevent pause)"
|
||||
SPRITE_PUBLIC_URL="http://localhost:8080"
|
||||
else
|
||||
log "Keep-alive will ping: ${SPRITE_PUBLIC_URL}/health"
|
||||
fi
|
||||
|
||||
(
|
||||
while true; do
|
||||
curl -sf http://localhost:8080/health >/dev/null 2>&1 || true
|
||||
# Ping via public URL (routes through Sprite proxy, counts as active HTTP)
|
||||
curl -sf "${SPRITE_PUBLIC_URL}/health" >/dev/null 2>&1 || true
|
||||
sleep 30
|
||||
done
|
||||
) &
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue