feat(digitalocean): Packer nightly snapshot pipeline for fast boot (#2198)

* feat(digitalocean): Packer nightly snapshot pipeline for fast boot

Add pre-built Packer snapshots for DigitalOcean droplets. Instead of
10-20 min cloud-init + agent install on every boot, snapshot-based
droplets boot in ~2-3 min (SSH only, agent pre-installed).

- Packer HCL2 template with parametrized agent/tier builds
- Agent build matrix (packer/agents.json) for all 7 agents
- Tier scripts mirroring cloud-init.ts package tiers
- Nightly GitHub Actions workflow (4 AM UTC, max-parallel: 3)
- Automatic cleanup: keeps only latest snapshot per agent
- CLI: findSpawnSnapshot() looks up pre-built images via DO API
- CLI: waitForSshOnly() skips cloud-init when using snapshots
- CLI: createServer() accepts optional snapshotId, skips user_data
- CLI: main.ts routes to fast path when snapshot detected
- Tests for findSpawnSnapshot() (5 cases, all passing)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix(packer): use var-file for install_commands to avoid shell quoting issues

The previous approach passed install_commands as `-var` inline, but
GitHub Actions expands `${{ }}` before shell evaluation — JSON arrays
with `|`, `&&`, and `"` characters break shell quoting.

Fix: generate a `.auto.pkrvars.json` file (auto-loaded by Packer)
using jq with --argjson for safe JSON handling. Also route all
`${{ inputs }}` and `${{ matrix }}` values through env vars to
prevent script injection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Ahmed Abushagur 2026-03-04 20:47:46 -08:00 committed by GitHub
parent 3242fa78f1
commit ed98a59318
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 546 additions and 10 deletions

View file

@ -0,0 +1,110 @@
import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test";
// We test findSpawnSnapshot by importing from the module.
// The function uses the module-level doToken + doApi, so we mock fetch.
const originalFetch = globalThis.fetch;
describe("findSpawnSnapshot", () => {
// findSpawnSnapshot requires doToken to be set, which happens via ensureDoToken.
// Since doToken is module-private, we test the function's behavior via fetch mocking.
beforeEach(() => {
// Reset fetch before each test
globalThis.fetch = originalFetch;
});
afterEach(() => {
globalThis.fetch = originalFetch;
});
it("returns latest snapshot ID when API returns multiple images", async () => {
const images = {
images: [
{
id: 111,
created_at: "2026-03-01T00:00:00Z",
name: "spawn-claude-20260301",
},
{
id: 222,
created_at: "2026-03-03T00:00:00Z",
name: "spawn-claude-20260303",
},
{
id: 333,
created_at: "2026-03-02T00:00:00Z",
name: "spawn-claude-20260302",
},
],
};
globalThis.fetch = mock(() => Promise.resolve(new Response(JSON.stringify(images))));
// Import fresh to avoid module-level state issues
const { findSpawnSnapshot } = await import("../digitalocean/digitalocean");
const result = await findSpawnSnapshot("claude");
// Should return the latest (ID 222, created 2026-03-03)
expect(result).toBe("222");
});
it("returns null when no images found", async () => {
globalThis.fetch = mock(() =>
Promise.resolve(
new Response(
JSON.stringify({
images: [],
}),
),
),
);
const { findSpawnSnapshot } = await import("../digitalocean/digitalocean");
const result = await findSpawnSnapshot("claude");
expect(result).toBeNull();
});
it("returns null on API error (graceful fallback)", async () => {
globalThis.fetch = mock(() =>
Promise.resolve(
new Response("Unauthorized", {
status: 401,
}),
),
);
const { findSpawnSnapshot } = await import("../digitalocean/digitalocean");
const result = await findSpawnSnapshot("claude");
expect(result).toBeNull();
});
it("returns null when images have no valid ID", async () => {
const images = {
images: [
{
id: "not-a-number",
created_at: "2026-03-01T00:00:00Z",
},
],
};
globalThis.fetch = mock(() => Promise.resolve(new Response(JSON.stringify(images))));
const { findSpawnSnapshot } = await import("../digitalocean/digitalocean");
const result = await findSpawnSnapshot("claude");
expect(result).toBeNull();
});
it("returns null on network failure", async () => {
globalThis.fetch = mock(() => Promise.reject(new Error("Network error")));
const { findSpawnSnapshot } = await import("../digitalocean/digitalocean");
const result = await findSpawnSnapshot("claude");
expect(result).toBeNull();
});
});

View file

@ -750,6 +750,56 @@ export async function promptDoRegion(): Promise<string> {
return selectFromList(items, "DigitalOcean region", DEFAULT_DO_REGION);
}
// ─── Snapshot Lookup ─────────────────────────────────────────────────────────
/**
* Find the latest pre-built Packer snapshot for an agent.
* Returns the numeric image ID or null if none found / on error.
*/
export async function findSpawnSnapshot(agentName: string): Promise<string | null> {
try {
const text = await doApi("GET", `/images?private=true&per_page=50&tag_name=spawn-${agentName}`, undefined, 1);
const data = parseJsonObj(text);
const images = toObjectArray(data?.images);
if (images.length === 0) {
return null;
}
// Sort by created_at descending and pick the latest
images.sort((a, b) => {
const aDate = isString(a.created_at) ? a.created_at : "";
const bDate = isString(b.created_at) ? b.created_at : "";
return bDate.localeCompare(aDate);
});
const latestId = images[0].id;
if (isNumber(latestId) && latestId > 0) {
logInfo(`Found pre-built snapshot for ${agentName} (ID: ${latestId})`);
return String(latestId);
}
return null;
} catch {
return null;
}
}
// ─── SSH-Only Wait (for snapshot-based boots) ────────────────────────────────
/**
* Wait for SSH to become available without waiting for cloud-init.
* Used when booting from a pre-built snapshot (no cloud-init needed).
*/
export async function waitForSshOnly(ip?: string): Promise<void> {
const serverIp = ip || doServerIp;
const selectedKeys = await ensureSshKeys();
const keyOpts = getSshKeyOpts(selectedKeys);
await sharedWaitForSsh({
host: serverIp,
user: "root",
maxAttempts: 36,
extraSshOpts: keyOpts,
});
logInfo("SSH available (snapshot boot — skipping cloud-init)");
}
// ─── Provisioning ────────────────────────────────────────────────────────────
function getCloudInitUserdata(tier: CloudInitTier = "full"): string {
@ -783,17 +833,21 @@ export async function createServer(
tier?: CloudInitTier,
dropletSize?: string,
region?: string,
snapshotId?: string,
): Promise<void> {
const size = dropletSize || process.env.DO_DROPLET_SIZE || "s-2vcpu-4gb";
const effectiveRegion = region || process.env.DO_REGION || "nyc3";
const image = "ubuntu-24-04-x64";
const image = snapshotId ? Number(snapshotId) : "ubuntu-24-04-x64";
if (!validateRegionName(effectiveRegion)) {
logError("Invalid DO_REGION");
throw new Error("Invalid region");
}
logStep(`Creating DigitalOcean droplet '${name}' (size: ${size}, region: ${effectiveRegion})...`);
const imageLabel = snapshotId ? `snapshot ${snapshotId}` : "ubuntu-24-04-x64";
logStep(
`Creating DigitalOcean droplet '${name}' (size: ${size}, region: ${effectiveRegion}, image: ${imageLabel})...`,
);
// Get all SSH key IDs
const keysText = await doApi("GET", "/account/keys");
@ -802,17 +856,20 @@ export async function createServer(
.map((k) => (isNumber(k.id) ? k.id : 0))
.filter((n) => n > 0);
const userdata = getCloudInitUserdata(tier);
const body = JSON.stringify({
const dropletBody: Record<string, unknown> = {
name,
region: effectiveRegion,
size,
image,
ssh_keys: sshKeyIds,
user_data: userdata,
backups: false,
monitoring: false,
});
};
// Only include cloud-init userdata when booting from a base image (not a snapshot)
if (!snapshotId) {
dropletBody.user_data = getCloudInitUserdata(tier);
}
const body = JSON.stringify(dropletBody);
const createText = await doApi("POST", "/droplets", body);
const createData = parseJsonObj(createText);

View file

@ -10,6 +10,8 @@ import {
createServer as createDroplet,
getServerName,
waitForCloudInit,
waitForSshOnly,
findSpawnSnapshot,
runServer,
uploadFile,
interactiveSession,
@ -18,7 +20,7 @@ import { agents, resolveAgent } from "./agents";
import { saveLaunchCmd } from "../history.js";
import { runOrchestration } from "../shared/orchestrate";
import type { CloudOrchestrator } from "../shared/orchestrate";
import { logStep } from "../shared/ui";
import { logInfo, logStep } from "../shared/ui";
async function main() {
const agentName = process.argv[2];
@ -32,6 +34,7 @@ async function main() {
let dropletSize = "";
let region = "";
let snapshotId: string | null = null;
const cloud: CloudOrchestrator = {
cloudName: "digitalocean",
@ -44,6 +47,8 @@ async function main() {
await promptSpawnName();
const usedBrowserAuth = await ensureDoToken();
await ensureSshKey();
// Look for a pre-built snapshot after auth (needs valid token)
snapshotId = await findSpawnSnapshot(agentName);
if (usedBrowserAuth) {
logStep("Next step: OpenRouter authentication (opening browser in 5s)...");
await new Promise((r) => setTimeout(r, 5000));
@ -54,17 +59,31 @@ async function main() {
region = await promptDoRegion();
},
async createServer(name: string) {
await createDroplet(name, agent.cloudInitTier, dropletSize, region);
await createDroplet(name, agent.cloudInitTier, dropletSize, region, snapshotId || undefined);
},
getServerName,
async waitForReady() {
await waitForCloudInit();
if (snapshotId) {
await waitForSshOnly();
} else {
await waitForCloudInit();
}
},
interactiveSession,
saveLaunchCmd,
};
await runOrchestration(cloud, agent, agentName);
// When using a snapshot, skip the agent install step (already pre-installed)
const effectiveAgent = snapshotId
? {
...agent,
install: async () => {
logInfo("Agent pre-installed (snapshot)");
},
}
: agent;
await runOrchestration(cloud, effectiveAgent, agentName);
}
main().catch((err) => {