diff --git a/.github/workflows/packer-snapshots.yml b/.github/workflows/packer-snapshots.yml new file mode 100644 index 00000000..4d09c93b --- /dev/null +++ b/.github/workflows/packer-snapshots.yml @@ -0,0 +1,105 @@ +name: Packer DO Snapshots + +on: + schedule: + # Nightly at 4 AM UTC (before tarball build at 5 AM) + - cron: "0 4 * * *" + workflow_dispatch: + inputs: + agent: + description: "Single agent to build (leave empty for all)" + required: false + type: string + +permissions: + contents: read + +jobs: + matrix: + name: Generate matrix + runs-on: ubuntu-latest + outputs: + agents: ${{ steps.set.outputs.agents }} + steps: + - uses: actions/checkout@v4 + - id: set + run: | + SINGLE_AGENT="${SINGLE_AGENT_INPUT}" + if [ -n "$SINGLE_AGENT" ]; then + echo "agents=[\"${SINGLE_AGENT}\"]" >> "$GITHUB_OUTPUT" + else + AGENTS=$(jq -c 'keys' packer/agents.json) + echo "agents=${AGENTS}" >> "$GITHUB_OUTPUT" + fi + env: + SINGLE_AGENT_INPUT: ${{ inputs.agent }} + + build: + name: "Build ${{ matrix.agent }}" + needs: matrix + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + agent: ${{ fromJson(needs.matrix.outputs.agents) }} + steps: + - uses: actions/checkout@v4 + + - name: Read agent config + id: config + run: | + TIER=$(jq -r --arg a "$AGENT_NAME" '.[$a].tier // "minimal"' packer/agents.json) + INSTALL=$(jq -c --arg a "$AGENT_NAME" '.[$a].install // []' packer/agents.json) + echo "tier=${TIER}" >> "$GITHUB_OUTPUT" + echo "install=${INSTALL}" >> "$GITHUB_OUTPUT" + env: + AGENT_NAME: ${{ matrix.agent }} + + - name: Setup Packer + uses: hashicorp/setup-packer@main + with: + version: latest + + - name: Init Packer plugins + run: packer init packer/digitalocean.pkr.hcl + + - name: Generate variables file + run: | + jq -n \ + --arg token "$DO_API_TOKEN" \ + --arg agent "$AGENT_NAME" \ + --arg tier "$TIER" \ + --argjson install "$INSTALL_COMMANDS" \ + '{ + do_api_token: $token, + agent_name: $agent, + cloud_init_tier: $tier, + install_commands: $install + }' > packer/auto.pkrvars.json + env: + DO_API_TOKEN: ${{ secrets.DO_API_TOKEN }} + AGENT_NAME: ${{ matrix.agent }} + TIER: ${{ steps.config.outputs.tier }} + INSTALL_COMMANDS: ${{ steps.config.outputs.install }} + + - name: Build snapshot + run: packer build -var-file=packer/auto.pkrvars.json packer/digitalocean.pkr.hcl + env: + PACKER_LOG: "1" + + - name: Cleanup old snapshots + if: success() + run: | + # Keep only the latest snapshot per agent + SNAPSHOTS=$(curl -s -H "Authorization: Bearer ${DO_API_TOKEN}" \ + "https://api.digitalocean.com/v2/images?private=true&per_page=100&tag_name=spawn-${AGENT_NAME}" \ + | jq -r '.images | sort_by(.created_at) | reverse | .[1:] | .[].id') + + for ID in $SNAPSHOTS; do + echo "Deleting old snapshot: ${ID}" + curl -s -X DELETE -H "Authorization: Bearer ${DO_API_TOKEN}" \ + "https://api.digitalocean.com/v2/images/${ID}" || true + done + env: + DO_API_TOKEN: ${{ secrets.DO_API_TOKEN }} + AGENT_NAME: ${{ matrix.agent }} diff --git a/packages/cli/src/__tests__/do-snapshot.test.ts b/packages/cli/src/__tests__/do-snapshot.test.ts new file mode 100644 index 00000000..59ad1ad1 --- /dev/null +++ b/packages/cli/src/__tests__/do-snapshot.test.ts @@ -0,0 +1,114 @@ +/** + * do-snapshot.test.ts — Tests for findSpawnSnapshot(). + * + * Verifies snapshot lookup: happy path, empty results, API errors, + * invalid IDs, and network failures all return correct values. + */ + +import { afterAll, afterEach, describe, expect, it, mock } from "bun:test"; + +// ── Mock oauth (prevent interactive prompts) ────────────────────────────── + +mock.module("../shared/oauth", () => ({ + getOrPromptApiKey: mock(() => Promise.resolve("sk-test")), + getModelIdInteractive: mock(() => Promise.resolve("openrouter/auto")), +})); + +// ── Import under test ───────────────────────────────────────────────────── + +const { findSpawnSnapshot } = await import("../digitalocean/digitalocean"); + +describe("findSpawnSnapshot", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + afterAll(() => { + globalThis.fetch = originalFetch; + }); + + it("returns the latest snapshot ID sorted by created_at", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + images: [ + { + id: 100, + created_at: "2026-01-01T00:00:00Z", + }, + { + id: 200, + created_at: "2026-03-01T00:00:00Z", + }, + { + id: 150, + created_at: "2026-02-01T00:00:00Z", + }, + ], + }), + ), + ), + ); + + const result = await findSpawnSnapshot("claude"); + expect(result).toBe("200"); + }); + + it("returns null when no images are found", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + images: [], + }), + ), + ), + ); + + const result = await findSpawnSnapshot("claude"); + expect(result).toBeNull(); + }); + + it("returns null on API error response", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response("Unauthorized", { + status: 401, + }), + ), + ); + + const result = await findSpawnSnapshot("claude"); + expect(result).toBeNull(); + }); + + it("returns null when snapshot ID is invalid (non-numeric)", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + images: [ + { + id: "not-a-number", + created_at: "2026-01-01T00:00:00Z", + }, + ], + }), + ), + ), + ); + + const result = await findSpawnSnapshot("claude"); + expect(result).toBeNull(); + }); + + it("returns null on network failure", async () => { + globalThis.fetch = mock(() => Promise.reject(new Error("Network unreachable"))); + + const result = await findSpawnSnapshot("claude"); + expect(result).toBeNull(); + }); +}); diff --git a/packages/cli/src/digitalocean/digitalocean.ts b/packages/cli/src/digitalocean/digitalocean.ts index 4b6dd4cd..609c6852 100644 --- a/packages/cli/src/digitalocean/digitalocean.ts +++ b/packages/cli/src/digitalocean/digitalocean.ts @@ -789,6 +789,7 @@ export async function createServer( tier?: CloudInitTier, dropletSize?: string, region?: string, + snapshotId?: string, ): Promise { const size = dropletSize || process.env.DO_DROPLET_SIZE || "s-2vcpu-4gb"; const effectiveRegion = region || process.env.DO_REGION || "nyc3"; @@ -798,9 +799,12 @@ export async function createServer( throw new Error("Invalid region"); } - const image = "ubuntu-24-04-x64"; + const image = snapshotId ? Number(snapshotId) : "ubuntu-24-04-x64"; + const imageLabel = snapshotId ? `snapshot:${snapshotId}` : "ubuntu-24-04-x64"; - logStep(`Creating DigitalOcean droplet '${name}' (size: ${size}, region: ${effectiveRegion})...`); + logStep( + `Creating DigitalOcean droplet '${name}' (size: ${size}, region: ${effectiveRegion}, image: ${imageLabel})...`, + ); // Get all SSH key IDs const keysText = await doApi("GET", "/account/keys"); @@ -809,16 +813,22 @@ export async function createServer( .map((k) => (isNumber(k.id) ? k.id : 0)) .filter((n) => n > 0); - const body = JSON.stringify({ + const dropletConfig: Record = { name, region: effectiveRegion, size, image, ssh_keys: sshKeyIds, - user_data: getCloudInitUserdata(tier), backups: false, monitoring: false, - }); + }; + + // Only include cloud-init userdata when NOT booting from a snapshot + if (!snapshotId) { + dropletConfig.user_data = getCloudInitUserdata(tier); + } + + const body = JSON.stringify(dropletConfig); const createText = await doApi("POST", "/droplets", body); const createData = parseJsonObj(createText); @@ -882,6 +892,56 @@ async function waitForDropletActive(dropletId: string, maxAttempts = 60): Promis logStepDone(); } +// ─── Snapshot Lookup ───────────────────────────────────────────────────────── + +export async function findSpawnSnapshot(agentName: string): Promise { + try { + const text = await doApi( + "GET", + `/images?private=true&per_page=50&tag_name=spawn-${encodeURIComponent(agentName)}`, + undefined, + 1, + ); + const data = parseJsonObj(text); + const images = toObjectArray(data?.images); + if (images.length === 0) { + return null; + } + + // Sort by created_at descending to get the latest snapshot + images.sort((a, b) => { + const aDate = isString(a.created_at) ? a.created_at : ""; + const bDate = isString(b.created_at) ? b.created_at : ""; + return bDate.localeCompare(aDate); + }); + + const latestId = images[0].id; + if (!isNumber(latestId) || latestId <= 0) { + return null; + } + + logInfo(`Found pre-built snapshot for ${agentName} (ID: ${latestId})`); + return String(latestId); + } catch { + return null; + } +} + +// ─── SSH-Only Wait (for snapshot boots) ────────────────────────────────────── + +export async function waitForSshOnly(ip?: string): Promise { + const serverIp = ip || _state.serverIp; + const selectedKeys = await ensureSshKeys(); + const keyOpts = getSshKeyOpts(selectedKeys); + await sharedWaitForSsh({ + host: serverIp, + user: "root", + maxAttempts: 36, + extraSshOpts: keyOpts, + }); + logInfo("SSH available (snapshot boot — skipping cloud-init)"); +} + // ─── SSH Execution ─────────────────────────────────────────────────────────── export async function waitForCloudInit(ip?: string, _maxAttempts = 60): Promise { diff --git a/packages/cli/src/digitalocean/main.ts b/packages/cli/src/digitalocean/main.ts index fd84b445..76ae530a 100644 --- a/packages/cli/src/digitalocean/main.ts +++ b/packages/cli/src/digitalocean/main.ts @@ -12,6 +12,7 @@ import { createServer as createDroplet, ensureDoToken, ensureSshKey, + findSpawnSnapshot, getServerName, interactiveSession, promptDoRegion, @@ -20,6 +21,7 @@ import { runServer, uploadFile, waitForCloudInit, + waitForSshOnly, } from "./digitalocean"; async function main() { @@ -34,10 +36,12 @@ async function main() { let dropletSize = ""; let region = ""; + let snapshotId: string | null = null; const cloud: CloudOrchestrator = { cloudName: "digitalocean", cloudLabel: "DigitalOcean", + skipAgentInstall: false, runner: { runServer, uploadFile, @@ -57,11 +61,20 @@ async function main() { }, async createServer(name: string, spawnId?: string) { process.env.SPAWN_ID = spawnId || ""; - await createDroplet(name, agent.cloudInitTier, dropletSize, region); + // Check for a pre-built snapshot before provisioning + snapshotId = await findSpawnSnapshot(agentName); + if (snapshotId) { + cloud.skipAgentInstall = true; + } + await createDroplet(name, agent.cloudInitTier, dropletSize, region, snapshotId ?? undefined); }, getServerName, async waitForReady() { - await waitForCloudInit(); + if (snapshotId) { + await waitForSshOnly(); + } else { + await waitForCloudInit(); + } }, interactiveSession, saveLaunchCmd: (cmd: string, sid?: string) => saveLaunchCmd(cmd, sid), diff --git a/packages/cli/src/shared/orchestrate.ts b/packages/cli/src/shared/orchestrate.ts index 0ebe7761..be7314cb 100644 --- a/packages/cli/src/shared/orchestrate.ts +++ b/packages/cli/src/shared/orchestrate.ts @@ -15,6 +15,8 @@ export interface CloudOrchestrator { cloudName: string; cloudLabel: string; runner: CloudRunner; + /** When true, skip tarball + agent install (e.g. booting from a pre-baked snapshot). */ + skipAgentInstall?: boolean; authenticate(): Promise; promptSize(): Promise; createServer(name: string, spawnId?: string): Promise; @@ -112,14 +114,18 @@ export async function runOrchestration( const envContent = generateEnvConfig(agent.envVars(apiKey)); - // 8. Install agent (try tarball first on cloud VMs) - let installedFromTarball = false; - if (cloud.cloudName !== "local" && !agent.skipTarball) { - const tarball = options?.tryTarball ?? tryTarballInstall; - installedFromTarball = await tarball(cloud.runner, agentName); - } - if (!installedFromTarball) { - await agent.install(); + // 8. Install agent (skip entirely for snapshot boots, try tarball first on cloud VMs) + if (cloud.skipAgentInstall) { + logInfo("Snapshot boot — skipping agent install"); + } else { + let installedFromTarball = false; + if (cloud.cloudName !== "local" && !agent.skipTarball) { + const tarball = options?.tryTarball ?? tryTarballInstall; + installedFromTarball = await tarball(cloud.runner, agentName); + } + if (!installedFromTarball) { + await agent.install(); + } } // 9. Inject environment variables via .spawnrc diff --git a/packer/digitalocean.pkr.hcl b/packer/digitalocean.pkr.hcl new file mode 100644 index 00000000..5a884f6a --- /dev/null +++ b/packer/digitalocean.pkr.hcl @@ -0,0 +1,98 @@ +packer { + required_plugins { + digitalocean = { + version = ">= 1.4.1" + source = "github.com/digitalocean/digitalocean" + } + } +} + +variable "do_api_token" { + type = string + sensitive = true +} + +variable "agent_name" { + type = string +} + +variable "cloud_init_tier" { + type = string + default = "minimal" +} + +variable "install_commands" { + type = list(string) + default = [] +} + +locals { + timestamp = formatdate("YYYYMMDD-hhmm", timestamp()) + image_name = "spawn-${var.agent_name}-${local.timestamp}" +} + +source "digitalocean" "spawn" { + api_token = var.do_api_token + image = "ubuntu-24-04-x64" + region = "nyc3" + size = "s-2vcpu-4gb" + ssh_username = "root" + + snapshot_name = local.image_name + snapshot_regions = [ + "nyc1", "nyc3", "sfo3", "tor1", "ams3", + "lon1", "fra1", "blr1", "sgp1", "syd1", + ] + + tags = ["spawn", "spawn-${var.agent_name}"] +} + +build { + sources = ["source.digitalocean.spawn"] + + # Wait for cloud-init to finish (DO base images run it on first boot) + provisioner "shell" { + inline = [ + "cloud-init status --wait || true", + ] + } + + # Wait for any apt locks to be released (cloud-init may hold them) + provisioner "shell" { + inline = [ + "for i in $(seq 1 30); do fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 || break; echo 'Waiting for apt lock...'; sleep 2; done", + ] + } + + # Run the tier script (installs base packages: curl, git, node, bun, etc.) + provisioner "shell" { + script = "scripts/tier-${var.cloud_init_tier}.sh" + } + + # Install the agent + provisioner "shell" { + inline = var.install_commands + environment_vars = [ + "HOME=/root", + "DEBIAN_FRONTEND=noninteractive", + ] + } + + # Leave a marker so the CLI knows this is a pre-baked snapshot + provisioner "shell" { + inline = [ + "echo 'spawn-${var.agent_name}' > /root/.spawn-snapshot", + "date -u '+%Y-%m-%dT%H:%M:%SZ' >> /root/.spawn-snapshot", + "touch /root/.cloud-init-complete", + ] + } + + # Clean up to reduce snapshot size + provisioner "shell" { + inline = [ + "apt-get clean", + "rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*", + "sync", + ] + } +}