feat: restore Packer DO snapshot pipeline for fast agent boot (#2262)

Restores the nightly Packer snapshot build pipeline (reverted in #2205)
that pre-bakes agent images as DigitalOcean snapshots. When a snapshot
exists on the user's account, droplet boot skips cloud-init and tarball
install entirely — cutting provisioning from ~10min to ~2min.

- Add `packer/digitalocean.pkr.hcl` HCL2 template with multi-region
  distribution, apt-lock wait, and snapshot marker
- Add `.github/workflows/packer-snapshots.yml` nightly build with
  matrix strategy, auto-cleanup of old snapshots, and injection-safe
  env var handling
- Add `findSpawnSnapshot()` to query DO API for pre-built snapshots
- Add `waitForSshOnly()` for snapshot boots (skip cloud-init wait)
- Modify `createServer()` to accept optional `snapshotId` param
- Wire snapshot detection in DO `main.ts` orchestrator
- Add `skipAgentInstall` to `CloudOrchestrator` interface to skip
  tarball + install steps when booting from snapshot
- Add 5 unit tests for snapshot lookup (happy path, empty, error,
  invalid ID, network failure)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Ahmed Abushagur 2026-03-06 16:32:05 -08:00 committed by GitHub
parent 9e26d74ddb
commit cefcd56327
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 411 additions and 15 deletions

105
.github/workflows/packer-snapshots.yml vendored Normal file
View file

@ -0,0 +1,105 @@
name: Packer DO Snapshots
on:
schedule:
# Nightly at 4 AM UTC (before tarball build at 5 AM)
- cron: "0 4 * * *"
workflow_dispatch:
inputs:
agent:
description: "Single agent to build (leave empty for all)"
required: false
type: string
permissions:
contents: read
jobs:
matrix:
name: Generate matrix
runs-on: ubuntu-latest
outputs:
agents: ${{ steps.set.outputs.agents }}
steps:
- uses: actions/checkout@v4
- id: set
run: |
SINGLE_AGENT="${SINGLE_AGENT_INPUT}"
if [ -n "$SINGLE_AGENT" ]; then
echo "agents=[\"${SINGLE_AGENT}\"]" >> "$GITHUB_OUTPUT"
else
AGENTS=$(jq -c 'keys' packer/agents.json)
echo "agents=${AGENTS}" >> "$GITHUB_OUTPUT"
fi
env:
SINGLE_AGENT_INPUT: ${{ inputs.agent }}
build:
name: "Build ${{ matrix.agent }}"
needs: matrix
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
agent: ${{ fromJson(needs.matrix.outputs.agents) }}
steps:
- uses: actions/checkout@v4
- name: Read agent config
id: config
run: |
TIER=$(jq -r --arg a "$AGENT_NAME" '.[$a].tier // "minimal"' packer/agents.json)
INSTALL=$(jq -c --arg a "$AGENT_NAME" '.[$a].install // []' packer/agents.json)
echo "tier=${TIER}" >> "$GITHUB_OUTPUT"
echo "install=${INSTALL}" >> "$GITHUB_OUTPUT"
env:
AGENT_NAME: ${{ matrix.agent }}
- name: Setup Packer
uses: hashicorp/setup-packer@main
with:
version: latest
- name: Init Packer plugins
run: packer init packer/digitalocean.pkr.hcl
- name: Generate variables file
run: |
jq -n \
--arg token "$DO_API_TOKEN" \
--arg agent "$AGENT_NAME" \
--arg tier "$TIER" \
--argjson install "$INSTALL_COMMANDS" \
'{
do_api_token: $token,
agent_name: $agent,
cloud_init_tier: $tier,
install_commands: $install
}' > packer/auto.pkrvars.json
env:
DO_API_TOKEN: ${{ secrets.DO_API_TOKEN }}
AGENT_NAME: ${{ matrix.agent }}
TIER: ${{ steps.config.outputs.tier }}
INSTALL_COMMANDS: ${{ steps.config.outputs.install }}
- name: Build snapshot
run: packer build -var-file=packer/auto.pkrvars.json packer/digitalocean.pkr.hcl
env:
PACKER_LOG: "1"
- name: Cleanup old snapshots
if: success()
run: |
# Keep only the latest snapshot per agent
SNAPSHOTS=$(curl -s -H "Authorization: Bearer ${DO_API_TOKEN}" \
"https://api.digitalocean.com/v2/images?private=true&per_page=100&tag_name=spawn-${AGENT_NAME}" \
| jq -r '.images | sort_by(.created_at) | reverse | .[1:] | .[].id')
for ID in $SNAPSHOTS; do
echo "Deleting old snapshot: ${ID}"
curl -s -X DELETE -H "Authorization: Bearer ${DO_API_TOKEN}" \
"https://api.digitalocean.com/v2/images/${ID}" || true
done
env:
DO_API_TOKEN: ${{ secrets.DO_API_TOKEN }}
AGENT_NAME: ${{ matrix.agent }}