fix(hetzner): clean up orphaned primary IPs before provisioning to avoid quota exceeded (#2935)

Hetzner E2E runs fail with `resource_limit_exceeded` when stale primary
IPs from previous test runs consume the account quota. This adds proactive
cleanup at two levels:

1. E2E shell driver: `_hetzner_cleanup_orphaned_ips()` deletes unattached
   primary IPs during pre-batch stale cleanup, freeing quota before any
   new servers are provisioned.

2. TypeScript CLI: `hetzner/main.ts` calls `cleanupOrphanedPrimaryIps()`
   before `createServer()` in headless/non-interactive mode, ensuring
   each agent provisioning attempt starts with a clean IP quota.

The existing reactive cleanup (retry after failure) in `hetzner.ts`
remains as a fallback.

Fixes #2933

Agent: code-health

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
A 2026-03-23 21:20:30 -07:00 committed by GitHub
parent 3b150eabd8
commit 50319e0d39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 73 additions and 2 deletions

View file

@ -1,6 +1,6 @@
{
"name": "@openrouter/spawn",
"version": "0.25.24",
"version": "0.25.25",
"type": "module",
"bin": {
"spawn": "cli.js"

View file

@ -9,6 +9,7 @@ import { DOCKER_CONTAINER_NAME, DOCKER_REGISTRY, runOrchestration } from "../sha
import { logInfo, logStep, shellQuote } from "../shared/ui.js";
import { agents, resolveAgent } from "./agents.js";
import {
cleanupOrphanedPrimaryIps,
createServer as createHetznerServer,
downloadFile,
ensureHcloudToken,
@ -71,6 +72,16 @@ async function main() {
location = await promptLocation();
},
async createServer(name: string) {
// Proactively clean up orphaned Primary IPs before provisioning in headless
// mode (E2E batches). This prevents resource_limit_exceeded errors when
// previous test runs left behind unattached IPs that consume quota (#2933).
if (process.env.SPAWN_NON_INTERACTIVE === "1") {
const cleaned = await cleanupOrphanedPrimaryIps();
if (cleaned > 0) {
logInfo(`Pre-provisioning: cleaned ${cleaned} orphaned Primary IP(s)`);
}
}
// Check for a pre-built snapshot before provisioning
snapshotId = await findSpawnSnapshot(agentName);
if (snapshotId) {

View file

@ -220,11 +220,68 @@ _hetzner_teardown() {
untrack_app "${app}"
}
# ---------------------------------------------------------------------------
# _hetzner_cleanup_orphaned_ips
#
# Delete Hetzner Primary IPs not attached to any server. These accumulate
# from failed/interrupted provisioning runs and consume the account's
# primary_ip_limit quota, causing resource_limit_exceeded errors (#2933).
# ---------------------------------------------------------------------------
_hetzner_cleanup_orphaned_ips() {
local response
response=$(_hetzner_curl_auth -sf \
"${_HETZNER_API}/primary_ips?per_page=50" 2>/dev/null || true)
if [ -z "${response}" ]; then
log_info "Could not list Hetzner primary IPs — skipping IP cleanup"
return 0
fi
local orphaned
orphaned=$(printf '%s' "${response}" | jq -r '.primary_ips[] | select(.assignee_id == null or .assignee_id == 0) | "\(.id):\(.ip)"' 2>/dev/null || true)
if [ -z "${orphaned}" ]; then
log_ok "No orphaned Hetzner Primary IPs found"
return 0
fi
local cleaned=0
for entry in ${orphaned}; do
local ip_id
ip_id=$(printf '%s' "${entry}" | cut -d: -f1)
local ip_addr
ip_addr=$(printf '%s' "${entry}" | cut -d: -f2-)
# Validate IP ID is numeric before using it in API URL
case "${ip_id}" in ''|*[!0-9]*) log_warn "Skipping orphaned IP ${entry} — non-numeric ID"; continue ;; esac
local http_code
http_code=$(_hetzner_curl_auth -s -o /dev/null -w '%{http_code}' \
-X DELETE \
"${_HETZNER_API}/primary_ips/${ip_id}" 2>/dev/null || printf '000')
if [ "${http_code}" = "200" ] || [ "${http_code}" = "204" ]; then
log_ok "Deleted orphaned Primary IP ${ip_addr} (id=${ip_id})"
cleaned=$((cleaned + 1))
elif [ "${http_code}" = "404" ]; then
log_info "Primary IP ${ip_addr} (id=${ip_id}) already gone"
else
log_warn "Failed to delete Primary IP ${ip_addr} (id=${ip_id}, HTTP ${http_code})"
fi
done
if [ "${cleaned}" -gt 0 ]; then
log_ok "Cleaned ${cleaned} orphaned Hetzner Primary IP(s)"
fi
}
# ---------------------------------------------------------------------------
# _hetzner_cleanup_stale
#
# List all Hetzner servers, find e2e-* instances older than 30 minutes,
# and destroy them.
# and destroy them. Also cleans up orphaned Primary IPs to prevent
# resource_limit_exceeded errors (#2933).
# ---------------------------------------------------------------------------
_hetzner_cleanup_stale() {
local now
@ -312,6 +369,9 @@ _hetzner_cleanup_stale() {
if [ "${skipped}" -gt 0 ]; then
log_info "Skipped ${skipped} recent Hetzner instance(s)"
fi
# Also clean up orphaned Primary IPs to free quota for new provisioning (#2933)
_hetzner_cleanup_orphaned_ips
}
# ---------------------------------------------------------------------------