mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-19 08:01:17 +00:00
fix(hetzner): clean up orphaned primary IPs before provisioning to avoid quota exceeded (#2935)
Hetzner E2E runs fail with `resource_limit_exceeded` when stale primary IPs from previous test runs consume the account quota. This adds proactive cleanup at two levels: 1. E2E shell driver: `_hetzner_cleanup_orphaned_ips()` deletes unattached primary IPs during pre-batch stale cleanup, freeing quota before any new servers are provisioned. 2. TypeScript CLI: `hetzner/main.ts` calls `cleanupOrphanedPrimaryIps()` before `createServer()` in headless/non-interactive mode, ensuring each agent provisioning attempt starts with a clean IP quota. The existing reactive cleanup (retry after failure) in `hetzner.ts` remains as a fallback. Fixes #2933 Agent: code-health Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3b150eabd8
commit
50319e0d39
3 changed files with 73 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@openrouter/spawn",
|
||||
"version": "0.25.24",
|
||||
"version": "0.25.25",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"spawn": "cli.js"
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { DOCKER_CONTAINER_NAME, DOCKER_REGISTRY, runOrchestration } from "../sha
|
|||
import { logInfo, logStep, shellQuote } from "../shared/ui.js";
|
||||
import { agents, resolveAgent } from "./agents.js";
|
||||
import {
|
||||
cleanupOrphanedPrimaryIps,
|
||||
createServer as createHetznerServer,
|
||||
downloadFile,
|
||||
ensureHcloudToken,
|
||||
|
|
@ -71,6 +72,16 @@ async function main() {
|
|||
location = await promptLocation();
|
||||
},
|
||||
async createServer(name: string) {
|
||||
// Proactively clean up orphaned Primary IPs before provisioning in headless
|
||||
// mode (E2E batches). This prevents resource_limit_exceeded errors when
|
||||
// previous test runs left behind unattached IPs that consume quota (#2933).
|
||||
if (process.env.SPAWN_NON_INTERACTIVE === "1") {
|
||||
const cleaned = await cleanupOrphanedPrimaryIps();
|
||||
if (cleaned > 0) {
|
||||
logInfo(`Pre-provisioning: cleaned ${cleaned} orphaned Primary IP(s)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for a pre-built snapshot before provisioning
|
||||
snapshotId = await findSpawnSnapshot(agentName);
|
||||
if (snapshotId) {
|
||||
|
|
|
|||
|
|
@ -220,11 +220,68 @@ _hetzner_teardown() {
|
|||
untrack_app "${app}"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _hetzner_cleanup_orphaned_ips
|
||||
#
|
||||
# Delete Hetzner Primary IPs not attached to any server. These accumulate
|
||||
# from failed/interrupted provisioning runs and consume the account's
|
||||
# primary_ip_limit quota, causing resource_limit_exceeded errors (#2933).
|
||||
# ---------------------------------------------------------------------------
|
||||
_hetzner_cleanup_orphaned_ips() {
|
||||
local response
|
||||
response=$(_hetzner_curl_auth -sf \
|
||||
"${_HETZNER_API}/primary_ips?per_page=50" 2>/dev/null || true)
|
||||
|
||||
if [ -z "${response}" ]; then
|
||||
log_info "Could not list Hetzner primary IPs — skipping IP cleanup"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local orphaned
|
||||
orphaned=$(printf '%s' "${response}" | jq -r '.primary_ips[] | select(.assignee_id == null or .assignee_id == 0) | "\(.id):\(.ip)"' 2>/dev/null || true)
|
||||
|
||||
if [ -z "${orphaned}" ]; then
|
||||
log_ok "No orphaned Hetzner Primary IPs found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local cleaned=0
|
||||
for entry in ${orphaned}; do
|
||||
local ip_id
|
||||
ip_id=$(printf '%s' "${entry}" | cut -d: -f1)
|
||||
|
||||
local ip_addr
|
||||
ip_addr=$(printf '%s' "${entry}" | cut -d: -f2-)
|
||||
|
||||
# Validate IP ID is numeric before using it in API URL
|
||||
case "${ip_id}" in ''|*[!0-9]*) log_warn "Skipping orphaned IP ${entry} — non-numeric ID"; continue ;; esac
|
||||
|
||||
local http_code
|
||||
http_code=$(_hetzner_curl_auth -s -o /dev/null -w '%{http_code}' \
|
||||
-X DELETE \
|
||||
"${_HETZNER_API}/primary_ips/${ip_id}" 2>/dev/null || printf '000')
|
||||
|
||||
if [ "${http_code}" = "200" ] || [ "${http_code}" = "204" ]; then
|
||||
log_ok "Deleted orphaned Primary IP ${ip_addr} (id=${ip_id})"
|
||||
cleaned=$((cleaned + 1))
|
||||
elif [ "${http_code}" = "404" ]; then
|
||||
log_info "Primary IP ${ip_addr} (id=${ip_id}) already gone"
|
||||
else
|
||||
log_warn "Failed to delete Primary IP ${ip_addr} (id=${ip_id}, HTTP ${http_code})"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${cleaned}" -gt 0 ]; then
|
||||
log_ok "Cleaned ${cleaned} orphaned Hetzner Primary IP(s)"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _hetzner_cleanup_stale
|
||||
#
|
||||
# List all Hetzner servers, find e2e-* instances older than 30 minutes,
|
||||
# and destroy them.
|
||||
# and destroy them. Also cleans up orphaned Primary IPs to prevent
|
||||
# resource_limit_exceeded errors (#2933).
|
||||
# ---------------------------------------------------------------------------
|
||||
_hetzner_cleanup_stale() {
|
||||
local now
|
||||
|
|
@ -312,6 +369,9 @@ _hetzner_cleanup_stale() {
|
|||
if [ "${skipped}" -gt 0 ]; then
|
||||
log_info "Skipped ${skipped} recent Hetzner instance(s)"
|
||||
fi
|
||||
|
||||
# Also clean up orphaned Primary IPs to free quota for new provisioning (#2933)
|
||||
_hetzner_cleanup_orphaned_ips
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue