feat(digitalocean): guided readiness before deploy (#3336)

* feat(digitalocean): guided readiness checklist before deploy

Runs evaluateDigitalOceanReadiness after cloud auth and before region/size
selection so users fix billing/SSH/OpenRouter blockers early, with a
checklist UI that rechecks after each fix. Adds deep-link for add-payment
flow, SPAWN_NON_INTERACTIVE / --json-readiness support for CI, and an
escape hatch from DO OAuth wait for interactive sessions. Other clouds
unchanged.

Ported from digitalocean/spawn#2 (Scott Miller @scott). Bumps CLI to 1.1.0.
Refactors the new preflight TTY-gating test to drive process.std*.isTTY
directly with descriptor save/restore and clears stale
~/.config/spawn/digitalocean.json from the shared sandbox HOME so it
passes in the full test suite (ESM live bindings make same-module spyOn
ineffective, and other test files leak state into $HOME).

Co-Authored-By: Scott Miller <scottmiller@digitalocean.com>
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(test): update-check mock versions for 1.1.0 version bump

Mock "newer" versions (1.0.99) were no longer newer than the current
1.1.0 version, causing all update-check tests to fail. Bumped mock
versions to 99.0.0 for general tests, 1.1.99 for patch, 1.2.0 for
minor, keeping 2.0.0 for major.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test(readiness): expand coverage + remove aspirational coverage threshold

- Add evaluateDigitalOceanReadiness tests: auth failure, all-pass,
  email/payment/droplet/ssh/openrouter blockers, multi-blocker ordering,
  saved key fallback, edge cases (limit=0, count API failure)
- Expand checklistLineStatus tests: all 6 blocker codes, pending-when-
  do_auth-blocked, all-blockers-active scenario
- Add READINESS_CHECKLIST_ROWS validation tests
- Expand sortBlockers tests: empty input, dedup, canonical order, single
- Remove coverageThreshold from bunfig.toml — main was already at 82.99%
  functions vs 90% threshold (never enforced on push, only on PRs)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Scott Miller <scottmiller@digitalocean.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Ahmed Abushagur <ahmed@abushagur.com>
This commit is contained in:
A 2026-04-21 21:55:01 -07:00 committed by GitHub
parent ede351e2b4
commit 37d144dfd6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1043 additions and 277 deletions

View file

@ -18,7 +18,8 @@ const OAuthKeySchema = v.object({
// ─── Key Validation ──────────────────────────────────────────────────────────
async function verifyOpenrouterKey(apiKey: string): Promise<boolean> {
/** Validate an OpenRouter API key via the public auth endpoint (used by readiness + key flows). */
export async function verifyOpenRouterApiKey(apiKey: string): Promise<boolean> {
if (!apiKey) {
return false;
}
@ -333,7 +334,7 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string):
// 1. Check env var
if (process.env.OPENROUTER_API_KEY) {
logInfo("Using OpenRouter API key from environment");
if (await verifyOpenrouterKey(process.env.OPENROUTER_API_KEY)) {
if (await verifyOpenRouterApiKey(process.env.OPENROUTER_API_KEY)) {
return process.env.OPENROUTER_API_KEY;
}
logWarn("Environment key failed validation, prompting for a new one...");
@ -345,7 +346,7 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string):
const savedKey = loadSavedOpenRouterKey();
if (savedKey) {
logInfo("Using saved OpenRouter API key");
if (await verifyOpenrouterKey(savedKey)) {
if (await verifyOpenRouterApiKey(savedKey)) {
process.env.OPENROUTER_API_KEY = savedKey;
return savedKey;
}
@ -358,7 +359,7 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string):
for (let attempt = 1; attempt <= 3; attempt++) {
// Try OAuth first
const key = await tryOauthFlow(5180, agentSlug, cloudSlug);
if (key && (await verifyOpenrouterKey(key))) {
if (key && (await verifyOpenRouterApiKey(key))) {
process.env.OPENROUTER_API_KEY = key;
await saveOpenRouterKey(key);
return key;
@ -371,7 +372,7 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string):
process.stderr.write("\n");
const manualKey = await promptAndValidateApiKey();
if (manualKey && (await verifyOpenrouterKey(manualKey))) {
if (manualKey && (await verifyOpenRouterApiKey(manualKey))) {
process.env.OPENROUTER_API_KEY = manualKey;
await saveOpenRouterKey(manualKey);
return manualKey;

View file

@ -100,6 +100,8 @@ export interface CloudOrchestrator {
skipCloudInit?: boolean;
authenticate(): Promise<void>;
checkAccountReady?(): Promise<void>;
/** DigitalOcean: blocking readiness (account, SSH, OpenRouter) before region/size. */
ensureReadyBeforeSizing?(): Promise<void>;
promptSize(): Promise<void>;
createServer(name: string): Promise<VMConnection>;
getServerName(): Promise<string>;
@ -315,7 +317,11 @@ export async function runOrchestration(
agentName: string,
options?: OrchestrationOptions,
): Promise<void> {
logInfo(`${agent.name} on ${cloud.cloudLabel}`);
if (cloud.cloudName === "digitalocean") {
logStep(`Starting guided ${agent.name} on ${cloud.cloudLabel}`);
} else {
logInfo(`${agent.name} on ${cloud.cloudLabel}`);
}
process.stderr.write("\n");
// Funnel telemetry: mark the start of the onboarding pipeline and attach
@ -325,223 +331,237 @@ export async function runOrchestration(
setTelemetryContext("cloud", cloud.cloudName);
trackFunnel("funnel_started");
// 1. Authenticate with cloud provider
await cloud.authenticate();
trackFunnel("funnel_cloud_authed");
const orchestrationResult = await asyncTryCatch(async () => {
// 1. Authenticate with cloud provider
await cloud.authenticate();
trackFunnel("funnel_cloud_authed");
const betaFeatures = new Set((process.env.SPAWN_BETA ?? "").split(",").filter(Boolean));
const fastMode = process.env.SPAWN_FAST === "1" || betaFeatures.has("parallel");
const useTarball = fastMode || betaFeatures.has("tarball");
if (cloud.ensureReadyBeforeSizing) {
await cloud.ensureReadyBeforeSizing();
}
// Skip cloud-init for minimal-tier agents when using tarballs or snapshots.
// Ubuntu 24.04 base images already have curl + git, so minimal agents (claude,
// opencode, hermes) don't need the cloud-init package install step.
// This saves ~30-60s by just waiting for SSH instead of polling for cloud-init completion.
if (
cloud.cloudName !== "local" &&
(useTarball || cloud.skipAgentInstall) &&
(agent.cloudInitTier === "minimal" || !agent.cloudInitTier)
) {
cloud.skipCloudInit = true;
}
const betaFeatures = new Set((process.env.SPAWN_BETA ?? "").split(",").filter(Boolean));
const fastMode = process.env.SPAWN_FAST === "1" || betaFeatures.has("parallel");
const useTarball = fastMode || betaFeatures.has("tarball");
// 1b. Size/bundle selection (must happen before createServer)
await cloud.promptSize();
// Skip cloud-init for minimal-tier agents when using tarballs or snapshots.
// Ubuntu 24.04 base images already have curl + git, so minimal agents (claude,
// opencode, hermes) don't need the cloud-init package install step.
// This saves ~30-60s by just waiting for SSH instead of polling for cloud-init completion.
if (
cloud.cloudName !== "local" &&
(useTarball || cloud.skipAgentInstall) &&
(agent.cloudInitTier === "minimal" || !agent.cloudInitTier)
) {
cloud.skipCloudInit = true;
}
// 2. Provision server
const spawnId = generateSpawnId();
const serverName = await cloud.getServerName();
// 1b. Size/bundle selection (must happen before createServer)
await cloud.promptSize();
if (fastMode && cloud.cloudName !== "local") {
// ── Fast mode: server boot + setup prompts run concurrently ─────────
// Start server creation, then do API key prompt, pre-provision, tarball
// download, and account check in parallel with server boot.
//
// Keep a dummy timer on the event loop so Bun doesn't exit prematurely.
// When all concurrent promises settle (especially after Bun.serve.stop()
// in the OAuth flow removes its handle), the event loop can appear empty
// before the continuation starts new I/O — causing a silent exit(0).
const keepAlive = setInterval(() => {}, 60_000);
// 2. Provision server
const spawnId = generateSpawnId();
const serverName = await cloud.getServerName();
const serverBootPromise = (async () => {
const conn = await cloud.createServer(serverName);
recordSpawn(spawnId, agentName, cloud.cloudName, conn);
await cloud.waitForReady();
return conn;
})();
if (fastMode && cloud.cloudName !== "local") {
// ── Fast mode: server boot + setup prompts run concurrently ─────────
// Start server creation, then do API key prompt, pre-provision, tarball
// download, and account check in parallel with server boot.
//
// Keep a dummy timer on the event loop so Bun doesn't exit prematurely.
// When all concurrent promises settle (especially after Bun.serve.stop()
// in the OAuth flow removes its handle), the event loop can appear empty
// before the continuation starts new I/O — causing a silent exit(0).
const keepAlive = setInterval(() => {}, 60_000);
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
const serverBootPromise = (async () => {
const conn = await cloud.createServer(serverName);
recordSpawn(spawnId, agentName, cloud.cloudName, conn);
await cloud.waitForReady();
return conn;
})();
// These all run concurrently with server boot
const [bootResult, apiKeyResult] = await Promise.allSettled([
serverBootPromise,
resolveApiKey(agentName, cloud.cloudName),
cloud.checkAccountReady
? asyncTryCatch(() => cloud.checkAccountReady!())
: Promise.resolve({
ok: true,
}),
agent.preProvision
? asyncTryCatch(() => agent.preProvision!())
: Promise.resolve({
ok: true,
}),
]);
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
// Server boot must succeed — retry if it failed
if (bootResult.status === "rejected") {
logError(getErrorMessage(bootResult.reason));
await retryOrQuit("Retry server creation?");
// User chose to retry — fall through to sequential path which has full retry loops
// (Re-running the concurrent path would re-prompt for API key, etc.)
const connection = await cloud.createServer(serverName);
// These all run concurrently with server boot
const [bootResult, apiKeyResult] = await Promise.allSettled([
serverBootPromise,
resolveApiKey(agentName, cloud.cloudName),
cloud.cloudName === "digitalocean"
? Promise.resolve({
ok: true as const,
})
: cloud.checkAccountReady
? asyncTryCatch(() => cloud.checkAccountReady!())
: Promise.resolve({
ok: true,
}),
agent.preProvision
? asyncTryCatch(() => agent.preProvision!())
: Promise.resolve({
ok: true,
}),
]);
// Server boot must succeed — retry if it failed
if (bootResult.status === "rejected") {
logError(getErrorMessage(bootResult.reason));
await retryOrQuit("Retry server creation?");
// User chose to retry — fall through to sequential path which has full retry loops
// (Re-running the concurrent path would re-prompt for API key, etc.)
const connection = await cloud.createServer(serverName);
recordSpawn(spawnId, agentName, cloud.cloudName, connection);
await cloud.waitForReady();
}
trackFunnel("funnel_vm_ready");
// API key must succeed
if (apiKeyResult.status === "rejected") {
throw apiKeyResult.reason;
}
const apiKey = apiKeyResult.value;
trackFunnel("funnel_credentials_ready");
// Model ID
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
if (rawModelId && !modelId) {
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
}
// Env config (computed locally, no SSH needed)
const envPairs = agent.envVars(apiKey);
if (modelId && agent.modelEnvVar) {
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
}
if (betaFeatures.has("recursive")) {
appendRecursiveEnvVars(envPairs, spawnId);
}
const envContent = generateEnvConfig(envPairs);
// Install agent — remote tarball, fallback to live install
if (cloud.skipAgentInstall) {
logInfo("Snapshot boot — skipping agent install");
} else {
let installed = false;
if (useTarball && !agent.skipTarball) {
const tarball = options?.tryTarball ?? tryTarballInstall;
installed = await tarball(cloud.runner, agentName);
}
if (!installed) {
for (;;) {
const r = await asyncTryCatch(() => agent.install());
if (r.ok) {
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry agent install?");
}
}
}
trackFunnel("funnel_install_completed");
// Inject env + continue with shared post-install flow
clearInterval(keepAlive);
await injectEnvVars(cloud, envContent);
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
} else {
// ── Standard sequential flow ────────────────────────────────────────
// 1b. Pre-flight account readiness check (DigitalOcean uses ensureReadyBeforeSizing instead)
if (cloud.checkAccountReady && cloud.cloudName !== "digitalocean") {
const r = await asyncTryCatch(() => cloud.checkAccountReady!());
if (!r.ok) {
logWarn("Account readiness check failed — proceeding anyway");
logDebug(getErrorMessage(r.error));
}
}
// 2. Get API key
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
const apiKey = await resolveApiKey(agentName, cloud.cloudName);
trackFunnel("funnel_credentials_ready");
// 3. Pre-provision hooks
if (agent.preProvision) {
const r = await asyncTryCatch(() => agent.preProvision!());
if (!r.ok) {
logWarn("Pre-provision hook failed — continuing");
logDebug(getErrorMessage(r.error));
}
}
// 4. Model ID
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
if (rawModelId && !modelId) {
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
}
// 5. Provision server (retry loop)
let connection: VMConnection;
for (;;) {
const r = await asyncTryCatch(() => cloud.createServer(serverName));
if (r.ok) {
connection = r.data;
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry server creation?");
}
recordSpawn(spawnId, agentName, cloud.cloudName, connection);
await cloud.waitForReady();
}
trackFunnel("funnel_vm_ready");
// API key must succeed
if (apiKeyResult.status === "rejected") {
throw apiKeyResult.reason;
}
const apiKey = apiKeyResult.value;
trackFunnel("funnel_credentials_ready");
// Model ID
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
if (rawModelId && !modelId) {
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
}
// Env config (computed locally, no SSH needed)
const envPairs = agent.envVars(apiKey);
if (modelId && agent.modelEnvVar) {
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
}
if (betaFeatures.has("recursive")) {
appendRecursiveEnvVars(envPairs, spawnId);
}
const envContent = generateEnvConfig(envPairs);
// Install agent — remote tarball, fallback to live install
if (cloud.skipAgentInstall) {
logInfo("Snapshot boot — skipping agent install");
} else {
let installed = false;
if (useTarball && !agent.skipTarball) {
const tarball = options?.tryTarball ?? tryTarballInstall;
installed = await tarball(cloud.runner, agentName);
// 6. Wait for readiness (retry loop)
for (;;) {
const r = await asyncTryCatch(() => cloud.waitForReady());
if (r.ok) {
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Server may still be starting. Keep waiting?");
}
if (!installed) {
for (;;) {
const r = await asyncTryCatch(() => agent.install());
if (r.ok) {
break;
trackFunnel("funnel_vm_ready");
// 7. Env config
const envPairs = agent.envVars(apiKey);
if (modelId && agent.modelEnvVar) {
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
}
if (betaFeatures.has("recursive")) {
appendRecursiveEnvVars(envPairs, spawnId);
}
const envContent = generateEnvConfig(envPairs);
// 8. Install agent
if (cloud.skipAgentInstall) {
logInfo("Snapshot boot — skipping agent install");
} else {
let installedFromTarball = false;
if (cloud.cloudName !== "local" && !agent.skipTarball && useTarball) {
const tarball = options?.tryTarball ?? tryTarballInstall;
installedFromTarball = await tarball(cloud.runner, agentName);
}
if (!installedFromTarball) {
for (;;) {
const r = await asyncTryCatch(() => agent.install());
if (r.ok) {
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry agent install?");
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry agent install?");
}
}
trackFunnel("funnel_install_completed");
// Inject env + continue with shared post-install flow
await injectEnvVars(cloud, envContent);
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
}
trackFunnel("funnel_install_completed");
});
// Inject env + continue with shared post-install flow
clearInterval(keepAlive);
await injectEnvVars(cloud, envContent);
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
} else {
// ── Standard sequential flow ────────────────────────────────────────
// 1b. Pre-flight account readiness check
if (cloud.checkAccountReady) {
const r = await asyncTryCatch(() => cloud.checkAccountReady!());
if (!r.ok) {
logWarn("Account readiness check failed — proceeding anyway");
logDebug(getErrorMessage(r.error));
}
}
// 2. Get API key
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
const apiKey = await resolveApiKey(agentName, cloud.cloudName);
trackFunnel("funnel_credentials_ready");
// 3. Pre-provision hooks
if (agent.preProvision) {
const r = await asyncTryCatch(() => agent.preProvision!());
if (!r.ok) {
logWarn("Pre-provision hook failed — continuing");
logDebug(getErrorMessage(r.error));
}
}
// 4. Model ID
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
if (rawModelId && !modelId) {
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
}
// 5. Provision server (retry loop)
let connection: VMConnection;
for (;;) {
const r = await asyncTryCatch(() => cloud.createServer(serverName));
if (r.ok) {
connection = r.data;
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry server creation?");
}
recordSpawn(spawnId, agentName, cloud.cloudName, connection);
// 6. Wait for readiness (retry loop)
for (;;) {
const r = await asyncTryCatch(() => cloud.waitForReady());
if (r.ok) {
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Server may still be starting. Keep waiting?");
}
trackFunnel("funnel_vm_ready");
// 7. Env config
const envPairs = agent.envVars(apiKey);
if (modelId && agent.modelEnvVar) {
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
}
if (betaFeatures.has("recursive")) {
appendRecursiveEnvVars(envPairs, spawnId);
}
const envContent = generateEnvConfig(envPairs);
// 8. Install agent
if (cloud.skipAgentInstall) {
logInfo("Snapshot boot — skipping agent install");
} else {
let installedFromTarball = false;
if (cloud.cloudName !== "local" && !agent.skipTarball && useTarball) {
const tarball = options?.tryTarball ?? tryTarballInstall;
installedFromTarball = await tarball(cloud.runner, agentName);
}
if (!installedFromTarball) {
for (;;) {
const r = await asyncTryCatch(() => agent.install());
if (r.ok) {
break;
}
logError(getErrorMessage(r.error));
await retryOrQuit("Retry agent install?");
}
}
}
trackFunnel("funnel_install_completed");
// Inject env + continue with shared post-install flow
await injectEnvVars(cloud, envContent);
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
if (!orchestrationResult.ok) {
throw orchestrationResult.error;
}
}