mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-20 01:11:18 +00:00
fix: detect and recover from Hetzner primary_ip_limit exceeded error (#2905)
When parallel E2E runs exhaust Hetzner's Primary IP quota, the CLI now detects the `resource_limit_exceeded` / `primary_ip_limit` error, automatically cleans up orphaned Primary IPs (unattached to any server), and retries once. If cleanup doesn't free quota, a clear message guides users to delete stale resources or request a quota increase. Fixes #2902 Agent: code-health Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d2f11bbf06
commit
5392ff2d7a
3 changed files with 358 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@openrouter/spawn",
|
||||
"version": "0.25.16",
|
||||
"version": "0.25.17",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"spawn": "cli.js"
|
||||
|
|
|
|||
|
|
@ -3,7 +3,13 @@ import { mockBunSpawn, mockClackPrompts } from "./test-helpers";
|
|||
|
||||
mockClackPrompts();
|
||||
|
||||
import { DEFAULT_LOCATION, DEFAULT_SERVER_TYPE, getConnectionInfo } from "../hetzner/hetzner";
|
||||
import {
|
||||
cleanupOrphanedPrimaryIps,
|
||||
DEFAULT_LOCATION,
|
||||
DEFAULT_SERVER_TYPE,
|
||||
getConnectionInfo,
|
||||
isResourceLimitError,
|
||||
} from "../hetzner/hetzner";
|
||||
|
||||
let origFetch: typeof global.fetch;
|
||||
let origEnv: NodeJS.ProcessEnv;
|
||||
|
|
@ -577,4 +583,279 @@ describe("hetzner/createServer", () => {
|
|||
await ensureHcloudToken();
|
||||
await expect(createServer("test-server", "cx23", "fsn1")).rejects.toThrow("No server IP");
|
||||
});
|
||||
|
||||
it("cleans up orphaned primary IPs on resource_limit_exceeded and retries", async () => {
|
||||
process.env.HCLOUD_TOKEN = "test-token";
|
||||
const serverResp = {
|
||||
server: {
|
||||
id: 99,
|
||||
public_net: {
|
||||
ipv4: {
|
||||
ip: "10.0.0.5",
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
let callCount = 0;
|
||||
global.fetch = mock(() => {
|
||||
callCount++;
|
||||
if (callCount <= 1) {
|
||||
// Token validation
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
servers: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 2) {
|
||||
// SSH keys
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
ssh_keys: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 3) {
|
||||
// First create attempt — resource_limit_exceeded (HTTP 403)
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
error: {
|
||||
code: "resource_limit_exceeded",
|
||||
message: "primary_ip_limit",
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 403,
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 4) {
|
||||
// List primary IPs for cleanup
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
primary_ips: [
|
||||
{
|
||||
id: 100,
|
||||
ip: "1.2.3.4",
|
||||
assignee_id: 0,
|
||||
},
|
||||
{
|
||||
id: 200,
|
||||
ip: "5.6.7.8",
|
||||
assignee_id: 42,
|
||||
},
|
||||
],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 5) {
|
||||
// Delete orphaned IP 100
|
||||
return Promise.resolve(
|
||||
new Response("", {
|
||||
status: 204,
|
||||
}),
|
||||
);
|
||||
}
|
||||
// Retry create — success
|
||||
return Promise.resolve(new Response(JSON.stringify(serverResp)));
|
||||
});
|
||||
const { ensureHcloudToken, createServer } = await import("../hetzner/hetzner");
|
||||
await ensureHcloudToken();
|
||||
const conn = await createServer("test-retry", "cx23", "fsn1");
|
||||
expect(conn.ip).toBe("10.0.0.5");
|
||||
// Should have called: token(1), ssh_keys(2), create-fail(3), list-ips(4), delete-ip(5), create-ok(6)
|
||||
expect(callCount).toBeGreaterThanOrEqual(6);
|
||||
});
|
||||
|
||||
it("throws with guidance when resource limit hit and no orphaned IPs to clean", async () => {
|
||||
process.env.HCLOUD_TOKEN = "test-token";
|
||||
let callCount = 0;
|
||||
global.fetch = mock(() => {
|
||||
callCount++;
|
||||
if (callCount <= 1) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
servers: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 2) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
ssh_keys: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 3) {
|
||||
// Create fails with resource_limit_exceeded
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
error: {
|
||||
code: "resource_limit_exceeded",
|
||||
message: "primary_ip_limit",
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 403,
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
// List primary IPs — all attached (none orphaned)
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
primary_ips: [
|
||||
{
|
||||
id: 100,
|
||||
ip: "1.2.3.4",
|
||||
assignee_id: 42,
|
||||
},
|
||||
],
|
||||
}),
|
||||
),
|
||||
);
|
||||
});
|
||||
const { ensureHcloudToken, createServer } = await import("../hetzner/hetzner");
|
||||
await ensureHcloudToken();
|
||||
await expect(createServer("test-noclean", "cx23", "fsn1")).rejects.toThrow("resource_limit_exceeded");
|
||||
// Verify guidance was printed
|
||||
const output = stderrSpy.mock.calls.map((c) => String(c[0])).join("");
|
||||
expect(output).toContain("Primary IP limit");
|
||||
expect(output).toContain("quota increase");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── isResourceLimitError ─────────────────────────────────────────────────
|
||||
|
||||
describe("hetzner/isResourceLimitError", () => {
|
||||
it("detects resource_limit_exceeded", () => {
|
||||
expect(isResourceLimitError("resource_limit_exceeded")).toBe(true);
|
||||
});
|
||||
it("detects primary_ip_limit", () => {
|
||||
expect(isResourceLimitError("primary_ip_limit")).toBe(true);
|
||||
});
|
||||
it("detects mixed-case and substring", () => {
|
||||
expect(isResourceLimitError("Error: Resource_Limit_Exceeded for account")).toBe(true);
|
||||
});
|
||||
it("returns false for unrelated errors", () => {
|
||||
expect(isResourceLimitError("server not found")).toBe(false);
|
||||
expect(isResourceLimitError("insufficient funds")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── cleanupOrphanedPrimaryIps ──────────────────────────────────────────────
|
||||
|
||||
describe("hetzner/cleanupOrphanedPrimaryIps", () => {
|
||||
it("deletes only unattached primary IPs", async () => {
|
||||
process.env.HCLOUD_TOKEN = "test-token";
|
||||
let callCount = 0;
|
||||
const deletedIds: string[] = [];
|
||||
global.fetch = mock((url: string, opts?: RequestInit) => {
|
||||
callCount++;
|
||||
if (callCount <= 1) {
|
||||
// Token validation
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
servers: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (callCount <= 2) {
|
||||
// List primary IPs
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
primary_ips: [
|
||||
{
|
||||
id: 10,
|
||||
ip: "1.1.1.1",
|
||||
assignee_id: 0,
|
||||
},
|
||||
{
|
||||
id: 20,
|
||||
ip: "2.2.2.2",
|
||||
assignee_id: 5,
|
||||
},
|
||||
{
|
||||
id: 30,
|
||||
ip: "3.3.3.3",
|
||||
assignee_id: 0,
|
||||
},
|
||||
],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
// DELETE calls
|
||||
if (opts?.method === "DELETE") {
|
||||
const idMatch = String(url).match(/primary_ips\/(\d+)/);
|
||||
if (idMatch) {
|
||||
deletedIds.push(idMatch[1]);
|
||||
}
|
||||
return Promise.resolve(
|
||||
new Response("", {
|
||||
status: 204,
|
||||
}),
|
||||
);
|
||||
}
|
||||
return Promise.resolve(new Response("{}"));
|
||||
});
|
||||
const { ensureHcloudToken } = await import("../hetzner/hetzner");
|
||||
await ensureHcloudToken();
|
||||
const count = await cleanupOrphanedPrimaryIps();
|
||||
expect(count).toBe(2);
|
||||
expect(deletedIds).toContain("10");
|
||||
expect(deletedIds).toContain("30");
|
||||
expect(deletedIds).not.toContain("20");
|
||||
});
|
||||
|
||||
it("returns 0 when no orphaned IPs exist", async () => {
|
||||
process.env.HCLOUD_TOKEN = "test-token";
|
||||
let callCount = 0;
|
||||
global.fetch = mock(() => {
|
||||
callCount++;
|
||||
if (callCount <= 1) {
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
servers: [],
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
primary_ips: [
|
||||
{
|
||||
id: 10,
|
||||
ip: "1.1.1.1",
|
||||
assignee_id: 5,
|
||||
},
|
||||
],
|
||||
}),
|
||||
),
|
||||
);
|
||||
});
|
||||
const { ensureHcloudToken } = await import("../hetzner/hetzner");
|
||||
await ensureHcloudToken();
|
||||
const count = await cleanupOrphanedPrimaryIps();
|
||||
expect(count).toBe(0);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -522,6 +522,41 @@ function isLocationUnavailableError(errMsg: string): boolean {
|
|||
return /resource_unavailable|location disabled|location.*unavailable/i.test(errMsg);
|
||||
}
|
||||
|
||||
/** Check if a Hetzner API error indicates a resource limit was exceeded (e.g. primary_ip_limit). */
|
||||
export function isResourceLimitError(errMsg: string): boolean {
|
||||
return /resource_limit_exceeded|primary_ip_limit/i.test(errMsg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up orphaned Hetzner Primary IPs (not attached to any server).
|
||||
* These accumulate from failed/leaked server provisioning runs and count toward
|
||||
* the account's primary_ip_limit quota. Returns the number of IPs deleted.
|
||||
*/
|
||||
export async function cleanupOrphanedPrimaryIps(): Promise<number> {
|
||||
const allIps = await hetznerGetAll("/primary_ips", "primary_ips");
|
||||
let deleted = 0;
|
||||
for (const ip of allIps) {
|
||||
// assignee_id is null/0 when the IP is not attached to a server
|
||||
const assigneeId = isNumber(ip.assignee_id) ? ip.assignee_id : 0;
|
||||
if (assigneeId !== 0) {
|
||||
continue;
|
||||
}
|
||||
const ipId = isNumber(ip.id) ? ip.id : 0;
|
||||
if (ipId === 0) {
|
||||
continue;
|
||||
}
|
||||
const ipAddr = isString(ip.ip) ? ip.ip : `ID:${ipId}`;
|
||||
const r = await asyncTryCatch(() => hetznerApi("DELETE", `/primary_ips/${ipId}`));
|
||||
if (r.ok) {
|
||||
logInfo(`Deleted orphaned Primary IP ${ipAddr}`);
|
||||
deleted = deleted + 1;
|
||||
} else {
|
||||
logWarn(`Could not delete Primary IP ${ipAddr}: ${getErrorMessage(r.error)}`);
|
||||
}
|
||||
}
|
||||
return deleted;
|
||||
}
|
||||
|
||||
export async function createServer(
|
||||
name: string,
|
||||
serverType?: string,
|
||||
|
|
@ -549,6 +584,8 @@ export async function createServer(
|
|||
// Track locations that failed so the user isn't offered them again
|
||||
const failedLocations: string[] = [];
|
||||
const maxLocationRetries = 3;
|
||||
// Track whether we've already attempted a resource-limit cleanup+retry
|
||||
let resourceLimitRetried = false;
|
||||
|
||||
for (let attempt = 0; attempt <= maxLocationRetries; attempt++) {
|
||||
logStep(`Creating Hetzner server '${name}' (type: ${sType}, location: ${loc}, image: ${imageLabel})...`);
|
||||
|
|
@ -580,6 +617,25 @@ export async function createServer(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Resource limit (e.g. primary_ip_limit) — try cleaning up orphaned IPs, then retry once
|
||||
if (isResourceLimitError(errMsg) && !resourceLimitRetried) {
|
||||
resourceLimitRetried = true;
|
||||
logWarn("Hetzner resource limit exceeded (primary_ip_limit). Cleaning up orphaned Primary IPs...");
|
||||
const cleaned = await asyncTryCatch(() => cleanupOrphanedPrimaryIps());
|
||||
const count = cleaned.ok ? cleaned.data : 0;
|
||||
if (count > 0) {
|
||||
logInfo(`Cleaned up ${count} orphaned Primary IP(s). Retrying server creation...`);
|
||||
continue;
|
||||
}
|
||||
logError("No orphaned Primary IPs found to clean up.");
|
||||
logWarn("Your Hetzner account has reached its Primary IP limit.");
|
||||
logWarn("To fix this:");
|
||||
logWarn(" 1. Delete unused servers in the Hetzner Console");
|
||||
logWarn(" 2. Go to Networking > Primary IPs and delete unattached IPs");
|
||||
logWarn(" 3. Or request a quota increase at: https://console.hetzner.cloud/limits");
|
||||
throw createResult.error;
|
||||
}
|
||||
|
||||
throw createResult.error;
|
||||
}
|
||||
|
||||
|
|
@ -607,6 +663,25 @@ export async function createServer(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Resource limit (e.g. primary_ip_limit) — try cleaning up orphaned IPs, then retry once
|
||||
if ((isResourceLimitError(errMsg) || isResourceLimitError(errCode)) && !resourceLimitRetried) {
|
||||
resourceLimitRetried = true;
|
||||
logWarn("Hetzner resource limit exceeded (primary_ip_limit). Cleaning up orphaned Primary IPs...");
|
||||
const cleaned = await asyncTryCatch(() => cleanupOrphanedPrimaryIps());
|
||||
const count = cleaned.ok ? cleaned.data : 0;
|
||||
if (count > 0) {
|
||||
logInfo(`Cleaned up ${count} orphaned Primary IP(s). Retrying server creation...`);
|
||||
continue;
|
||||
}
|
||||
logError("No orphaned Primary IPs found to clean up.");
|
||||
logWarn("Your Hetzner account has reached its Primary IP limit.");
|
||||
logWarn("To fix this:");
|
||||
logWarn(" 1. Delete unused servers in the Hetzner Console");
|
||||
logWarn(" 2. Go to Networking > Primary IPs and delete unattached IPs");
|
||||
logWarn(" 3. Or request a quota increase at: https://console.hetzner.cloud/limits");
|
||||
throw new Error(`Server creation failed: ${errMsg}`);
|
||||
}
|
||||
|
||||
logError(`Failed to create Hetzner server: ${errMsg}`);
|
||||
|
||||
if (isBillingError(hetznerBilling, errMsg)) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue