From 4d15470f8f0bda8b97ebb9dd7ff8c84598f68bec Mon Sep 17 00:00:00 2001 From: B <6723574+louisgv@users.noreply.github.com> Date: Tue, 10 Feb 2026 17:58:38 +0000 Subject: [PATCH] fix: Use Sprite public URL for keep-alive instead of localhost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Localhost pings (curl http://localhost:8080/health) bypass the Sprite proxy entirely and don't register as "actively servicing HTTP requests." Per Sprite lifecycle rules, VMs pause when there's no inbound HTTP through the proxy and no detachable session output — so the old keep-alive was doing nothing. Now both discovery.sh and refactor.sh resolve the Sprite's public URL via `sprite-env info` and ping that instead. The request routes through the Sprite proxy, which counts as real activity and prevents pause. Also adds keep-alive to discovery.sh (previously had none at all). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../skills/setup-trigger-service/discovery.sh | 48 +++++++++++++++++++ .../skills/setup-trigger-service/refactor.sh | 24 +++++++--- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/.claude/skills/setup-trigger-service/discovery.sh b/.claude/skills/setup-trigger-service/discovery.sh index d522aefc..7bfd44a7 100755 --- a/.claude/skills/setup-trigger-service/discovery.sh +++ b/.claude/skills/setup-trigger-service/discovery.sh @@ -39,11 +39,46 @@ log_info() { printf "${GREEN}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m log_warn() { printf "${YELLOW}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m-%d %H:%M:%S')] [discovery] WARN: $1" >> "${LOG_FILE}"; } log_error() { printf "${RED}[discovery]${NC} %s\n" "$1"; echo "[$(date +'%Y-%m-%d %H:%M:%S')] [discovery] ERROR: $1" >> "${LOG_FILE}"; } +# --- Keep-alive: ping the Sprite's PUBLIC URL to prevent VM pause --- +# Sprite only counts inbound HTTP requests through its proxy as "active." +# Localhost requests bypass the proxy and do NOT prevent the VM from pausing. +# We must hit the public URL so the request routes through the Sprite proxy. +KEEPALIVE_PID="" +SPRITE_PUBLIC_URL="" +start_keepalive() { + SPRITE_PUBLIC_URL=$(sprite-env info 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['sprite_url'])" 2>/dev/null) || SPRITE_PUBLIC_URL="" + + if [[ -z "${SPRITE_PUBLIC_URL}" ]]; then + log_warn "Could not resolve Sprite public URL — keep-alive will use localhost (may not prevent pause)" + SPRITE_PUBLIC_URL="http://localhost:8080" + else + log_info "Keep-alive will ping: ${SPRITE_PUBLIC_URL}/health" + fi + + ( + while true; do + curl -sf "${SPRITE_PUBLIC_URL}/health" >/dev/null 2>&1 || true + sleep 30 + done + ) & + KEEPALIVE_PID=$! +} +stop_keepalive() { + if [[ -n "${KEEPALIVE_PID}" ]]; then + kill "${KEEPALIVE_PID}" 2>/dev/null || true + wait "${KEEPALIVE_PID}" 2>/dev/null || true + KEEPALIVE_PID="" + fi +} + # --- Cleanup trap (from refactor.sh) --- cleanup() { local exit_code=$? log_info "Running cleanup (exit_code=${exit_code})..." + # Stop keep-alive loop + stop_keepalive + cd "${REPO_ROOT}" 2>/dev/null || true # Prune worktrees and clean up only OUR worktree base @@ -543,6 +578,10 @@ run_team_cycle() { # Substitute WORKTREE_BASE_PLACEHOLDER with actual worktree path sed -i "s|WORKTREE_BASE_PLACEHOLDER|${WORKTREE_BASE}|g" "${PROMPT_FILE}" + # Start keep-alive before launching claude (prevents Sprite from pausing the VM) + start_keepalive + log_info "Keep-alive started (pid=${KEEPALIVE_PID})" + log_info "Launching agent team..." log_info "Worktree base: ${WORKTREE_BASE}" log_info "Cycle timeout: ${CYCLE_TIMEOUT}s" @@ -558,6 +597,9 @@ run_team_cycle() { claude -p "$(cat "${PROMPT_FILE}")" --dangerously-skip-permissions --model sonnet \ 2>&1 | tee -a "${LOG_FILE}" || CLAUDE_EXIT=$? + # Stop keep-alive now that the cycle is done + stop_keepalive + if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then log_info "Cycle completed successfully" @@ -594,6 +636,10 @@ run_single_cycle() { PROMPT_FILE=$(mktemp /tmp/discovery-prompt-XXXXXX.md) build_single_prompt > "${PROMPT_FILE}" + # Start keep-alive before launching claude + start_keepalive + log_info "Keep-alive started (pid=${KEEPALIVE_PID})" + log_info "Launching single agent..." log_info "Cycle timeout: ${SINGLE_TIMEOUT}s" echo "" @@ -606,6 +652,8 @@ run_single_cycle() { claude --print -p "$(cat "${PROMPT_FILE}")" --model sonnet \ 2>&1 | tee -a "${LOG_FILE}" || CLAUDE_EXIT=$? + stop_keepalive + if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then log_info "Single cycle completed successfully" sprite-env checkpoint create --comment "discovery single cycle complete" 2>&1 | tee -a "${LOG_FILE}" || true diff --git a/.claude/skills/setup-trigger-service/refactor.sh b/.claude/skills/setup-trigger-service/refactor.sh index 29d0652d..aa30872c 100755 --- a/.claude/skills/setup-trigger-service/refactor.sh +++ b/.claude/skills/setup-trigger-service/refactor.sh @@ -60,16 +60,28 @@ cleanup() { trap cleanup EXIT SIGTERM SIGINT -# --- Keep-alive: ping the trigger server's /health endpoint periodically --- -# Sprite pauses/stops VMs that have no HTTP activity. When claude is running -# (waiting on API calls), there may be no inbound requests for long stretches, -# causing Sprite to freeze the VM mid-cycle. This background loop ensures -# continuous HTTP activity so Sprite keeps the VM alive. +# --- Keep-alive: ping the Sprite's PUBLIC URL to prevent VM pause --- +# Sprite only counts inbound HTTP requests through its proxy as "active." +# Localhost requests (curl http://localhost:8080/health) bypass the proxy +# entirely and do NOT prevent the VM from pausing. We must hit the public +# URL so the request routes through the Sprite proxy infrastructure. KEEPALIVE_PID="" +SPRITE_PUBLIC_URL="" start_keepalive() { + # Resolve the Sprite's public URL from sprite-env info + SPRITE_PUBLIC_URL=$(sprite-env info 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['sprite_url'])" 2>/dev/null) || SPRITE_PUBLIC_URL="" + + if [[ -z "${SPRITE_PUBLIC_URL}" ]]; then + log "WARNING: Could not resolve Sprite public URL — keep-alive will use localhost (may not prevent pause)" + SPRITE_PUBLIC_URL="http://localhost:8080" + else + log "Keep-alive will ping: ${SPRITE_PUBLIC_URL}/health" + fi + ( while true; do - curl -sf http://localhost:8080/health >/dev/null 2>&1 || true + # Ping via public URL (routes through Sprite proxy, counts as active HTTP) + curl -sf "${SPRITE_PUBLIC_URL}/health" >/dev/null 2>&1 || true sleep 30 done ) &