diff --git a/.claude/skills/setup-agent-team/qa.sh b/.claude/skills/setup-agent-team/qa.sh index 59677737..c1765cc6 100644 --- a/.claude/skills/setup-agent-team/qa.sh +++ b/.claude/skills/setup-agent-team/qa.sh @@ -33,6 +33,11 @@ elif [[ "${SPAWN_REASON}" == "e2e" ]]; then WORKTREE_BASE="/tmp/spawn-worktrees/qa-e2e" TEAM_NAME="spawn-qa-e2e" CYCLE_TIMEOUT=1200 # 20 min for E2E tests + investigation +elif [[ "${SPAWN_REASON}" == "e2e-interactive" ]]; then + RUN_MODE="e2e-interactive" + WORKTREE_BASE="/tmp/spawn-worktrees/qa-e2e-interactive" + TEAM_NAME="spawn-qa-e2e-interactive" + CYCLE_TIMEOUT=1800 # 30 min for interactive AI-driven E2E (slower than headless) elif [[ "${SPAWN_REASON}" == "issues" ]] && [[ -n "${SPAWN_ISSUE}" ]]; then RUN_MODE="issue" ISSUE_NUM="${SPAWN_ISSUE}" @@ -203,7 +208,7 @@ if [[ "${RUN_MODE}" == "quality" ]]; then fi # --- Load cloud credentials (quality + fixtures + e2e modes) --- -if [[ "${RUN_MODE}" == "fixtures" ]] || [[ "${RUN_MODE}" == "quality" ]] || [[ "${RUN_MODE}" == "e2e" ]] || [[ "${RUN_MODE}" == "soak" ]]; then +if [[ "${RUN_MODE}" == "fixtures" ]] || [[ "${RUN_MODE}" == "quality" ]] || [[ "${RUN_MODE}" == "e2e" ]] || [[ "${RUN_MODE}" == "e2e-interactive" ]] || [[ "${RUN_MODE}" == "soak" ]]; then if [[ -f "${REPO_ROOT}/sh/shared/key-request.sh" ]]; then source "${REPO_ROOT}/sh/shared/key-request.sh" load_cloud_keys_from_config @@ -430,6 +435,43 @@ if [[ "${RUN_MODE}" == "soak" ]]; then log "Soak test failed (exit_code=${CLAUDE_EXIT})" fi +# --- Interactive E2E mode: run e2e.sh --interactive directly (no Claude Code needed) --- +elif [[ "${RUN_MODE}" == "e2e-interactive" ]]; then + log "Running interactive E2E test (AI-driven via Claude Haiku)..." + + # ANTHROPIC_API_KEY is needed for the AI driver (Claude Haiku deciding what to type). + # On QA VMs this is typically set in the environment or /etc/spawn-qa-auth.env. + if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then + # Try loading from auth env file + if [[ -f /etc/spawn-qa-auth.env ]]; then + while IFS='=' read -r _ekey _eval || [[ -n "${_ekey}" ]]; do + _ekey="${_ekey#"${_ekey%%[! ]*}"}" + case "${_ekey}" in + ANTHROPIC_API_KEY) export ANTHROPIC_API_KEY="${_eval}" ;; + esac + done < /etc/spawn-qa-auth.env + fi + fi + + if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then + log "ERROR: ANTHROPIC_API_KEY not set — required for interactive E2E" + exit 1 + fi + + cd "${REPO_ROOT}" + # Run on hetzner (cheapest) with claude agent by default. + # Can be overridden via E2E_INTERACTIVE_CLOUD and E2E_INTERACTIVE_AGENT env vars. + _int_cloud="${E2E_INTERACTIVE_CLOUD:-hetzner}" + _int_agent="${E2E_INTERACTIVE_AGENT:-claude}" + bash sh/e2e/e2e.sh --cloud "${_int_cloud}" "${_int_agent}" --interactive 2>&1 | tee -a "${LOG_FILE}" + CLAUDE_EXIT=$? + + if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then + log "Interactive E2E test passed" + else + log "Interactive E2E test failed (exit_code=${CLAUDE_EXIT})" + fi + # --- Quality mode: retry up to 3 times, then file issue --- elif [[ "${RUN_MODE}" == "quality" ]]; then MAX_ATTEMPTS=3 diff --git a/.claude/skills/setup-agent-team/trigger-server.ts b/.claude/skills/setup-agent-team/trigger-server.ts index 12c2c722..244fd685 100644 --- a/.claude/skills/setup-agent-team/trigger-server.ts +++ b/.claude/skills/setup-agent-team/trigger-server.ts @@ -100,6 +100,7 @@ const VALID_REASONS = new Set([ "hygiene", "fixtures", "e2e", + "e2e-interactive", "soak", ]); diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml index 9092268d..c9464d3b 100644 --- a/.github/workflows/qa.yml +++ b/.github/workflows/qa.yml @@ -2,7 +2,8 @@ name: QA on: schedule: - cron: '0 */4 * * *' # Every 4 hours — quality sweep - - cron: '0 3 * * 1' # Every Monday 3am UTC — Telegram soak test (OpenClaw on DigitalOcean) + - cron: '30 1 * * 1' # Every Monday 1:30am UTC — Telegram soak test (offset from */4 to avoid dedup) + - cron: '0 6 * * *' # Daily 6am UTC — Interactive E2E (1 agent, 1 cloud) workflow_dispatch: inputs: reason: @@ -13,6 +14,7 @@ on: options: - schedule - e2e + - e2e-interactive - fixtures - soak jobs: @@ -25,8 +27,10 @@ jobs: SPRITE_URL: ${{ secrets.QA_SPRITE_URL }} TRIGGER_SECRET: ${{ secrets.QA_TRIGGER_SECRET }} run: | - if [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "0 3 * * 1" ]; then + if [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "30 1 * * 1" ]; then REASON="soak" + elif [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "0 6 * * *" ]; then + REASON="e2e-interactive" else REASON="${{ github.event.inputs.reason || 'schedule' }}" fi diff --git a/packages/cli/package.json b/packages/cli/package.json index a9ac4602..d7268043 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@openrouter/spawn", - "version": "0.24.2", + "version": "0.25.0", "type": "module", "bin": { "spawn": "cli.js" diff --git a/packages/cli/src/__tests__/do-payment-warning.test.ts b/packages/cli/src/__tests__/do-payment-warning.test.ts index a77f047b..6b382470 100644 --- a/packages/cli/src/__tests__/do-payment-warning.test.ts +++ b/packages/cli/src/__tests__/do-payment-warning.test.ts @@ -91,7 +91,7 @@ describe("ensureDoToken — payment method warning for first-time users", () => // Empty prompt responses → manual entry fails × 3 → throws mockPrompt.mockImplementation(() => Promise.resolve("")); - await expect(ensureDoToken()).rejects.toThrow("DigitalOcean authentication failed"); + await expect(ensureDoToken()).rejects.toThrow("User chose to exit"); expect(warnMessages.some((msg) => msg.includes("payment method"))).toBe(true); expect(warnMessages.some((msg) => msg.includes("cloud.digitalocean.com/account/billing"))).toBe(true); @@ -121,7 +121,7 @@ describe("ensureDoToken — payment method warning for first-time users", () => mockLoadApiToken.mockImplementation(() => null); mockPrompt.mockImplementation(() => Promise.resolve("")); - await expect(ensureDoToken()).rejects.toThrow("DigitalOcean authentication failed"); + await expect(ensureDoToken()).rejects.toThrow("User chose to exit"); const billingWarning = warnMessages.find((msg) => msg.includes("billing")); expect(billingWarning).toBeDefined(); diff --git a/packages/cli/src/__tests__/orchestrate.test.ts b/packages/cli/src/__tests__/orchestrate.test.ts index 95ae5b76..1f0e212a 100644 --- a/packages/cli/src/__tests__/orchestrate.test.ts +++ b/packages/cli/src/__tests__/orchestrate.test.ts @@ -697,6 +697,8 @@ describe("runOrchestration", () => { }); it("throws when createServer rejects", async () => { + const prevNonInteractive = process.env.SPAWN_NON_INTERACTIVE; + process.env.SPAWN_NON_INTERACTIVE = "1"; const cloud = createMockCloud({ cloudName: "hetzner", createServer: mock(() => Promise.reject(new Error("server boot failed"))), @@ -707,8 +709,9 @@ describe("runOrchestration", () => { expect(result.ok).toBe(false); if (!result.ok) { - expect(result.error.message).toBe("server boot failed"); + expect(result.error.message).toBe("Non-interactive mode: cannot retry"); } + process.env.SPAWN_NON_INTERACTIVE = prevNonInteractive; stderrSpy.mockRestore(); exitSpy.mockRestore(); }); diff --git a/packages/cli/src/aws/aws.ts b/packages/cli/src/aws/aws.ts index 48465615..72371fde 100644 --- a/packages/cli/src/aws/aws.ts +++ b/packages/cli/src/aws/aws.ts @@ -33,6 +33,7 @@ import { logWarn, prompt, promptSpawnNameShared, + retryOrQuit, sanitizeTermValue, selectFromList, shellQuote, @@ -623,49 +624,57 @@ export async function authenticate(): Promise { } } - // 4. Interactive credential entry + // 4. Interactive credential entry (retry loop — never exits unless user says no) if (process.env.SPAWN_NON_INTERACTIVE === "1") { logError("AWS credentials not found. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY."); throw new Error("No AWS credentials"); } - if (skipCache) { - logStep("Re-entering AWS credentials (--reauth):"); - } else { - logStep("Enter your AWS credentials:"); - } - const accessKey = await prompt("AWS Access Key ID: "); - if (!accessKey) { - throw new Error("No access key provided"); - } - const secretKey = await prompt("AWS Secret Access Key: "); - if (!secretKey) { - throw new Error("No secret key provided"); - } - - process.env.AWS_ACCESS_KEY_ID = accessKey; - process.env.AWS_SECRET_ACCESS_KEY = secretKey; - process.env.AWS_DEFAULT_REGION = region; - _state.accessKeyId = accessKey; - _state.secretAccessKey = secretKey; - - if (hasAwsCli()) { - const result = awsCliSync([ - "sts", - "get-caller-identity", - ]); - if (result.exitCode === 0) { - _state.lightsailMode = "cli"; - await saveCredsToConfig(accessKey, secretKey, region); - logInfo(`AWS CLI configured, using region: ${region}`); - return; + for (;;) { + if (skipCache) { + logStep("Re-entering AWS credentials (--reauth):"); + } else { + logStep("Enter your AWS credentials:"); + } + const accessKey = await prompt("AWS Access Key ID: "); + if (!accessKey) { + await retryOrQuit("AWS credentials invalid. Try again?"); + continue; + } + const secretKey = await prompt("AWS Secret Access Key: "); + if (!secretKey) { + await retryOrQuit("AWS credentials invalid. Try again?"); + continue; } - } - _state.lightsailMode = "rest"; - await saveCredsToConfig(accessKey, secretKey, region); - logInfo("Using Lightsail REST API directly"); - logInfo(`Using region: ${region}`); + process.env.AWS_ACCESS_KEY_ID = accessKey; + process.env.AWS_SECRET_ACCESS_KEY = secretKey; + process.env.AWS_DEFAULT_REGION = region; + _state.accessKeyId = accessKey; + _state.secretAccessKey = secretKey; + + if (hasAwsCli()) { + const result = awsCliSync([ + "sts", + "get-caller-identity", + ]); + if (result.exitCode === 0) { + _state.lightsailMode = "cli"; + await saveCredsToConfig(accessKey, secretKey, region); + logInfo(`AWS CLI configured, using region: ${region}`); + return; + } + logError("AWS credentials are invalid"); + await retryOrQuit("AWS credentials invalid. Try again?"); + continue; + } + + _state.lightsailMode = "rest"; + await saveCredsToConfig(accessKey, secretKey, region); + logInfo("Using Lightsail REST API directly"); + logInfo(`Using region: ${region}`); + return; + } } // ─── Region Prompt ────────────────────────────────────────────────────────── diff --git a/packages/cli/src/digitalocean/digitalocean.ts b/packages/cli/src/digitalocean/digitalocean.ts index 07bce3a2..71a58ff4 100644 --- a/packages/cli/src/digitalocean/digitalocean.ts +++ b/packages/cli/src/digitalocean/digitalocean.ts @@ -43,6 +43,7 @@ import { logWarn, openBrowser, prompt, + retryOrQuit, sanitizeTermValue, selectFromList, shellQuote, @@ -765,28 +766,30 @@ export async function ensureDoToken(): Promise { _state.token = ""; } - // 4. Manual entry (fallback) - logStep("DigitalOcean API Token Required"); - logWarn("Get a token from: https://cloud.digitalocean.com/account/api/tokens"); + // 4. Manual entry (retry loop — never exits unless user says no) + for (;;) { + logStep("DigitalOcean API Token Required"); + logWarn("Get a token from: https://cloud.digitalocean.com/account/api/tokens"); - for (let attempt = 1; attempt <= 3; attempt++) { - const token = await prompt("Enter your DigitalOcean API token: "); - if (!token) { - logError("Token cannot be empty"); - continue; + for (let attempt = 1; attempt <= 3; attempt++) { + const token = await prompt("Enter your DigitalOcean API token: "); + if (!token) { + logError("Token cannot be empty"); + continue; + } + _state.token = token.trim(); + if (await testDoToken()) { + await saveTokenToConfig(_state.token); + logInfo("DigitalOcean API token validated and saved"); + return false; + } + logError("Token is invalid"); + _state.token = ""; } - _state.token = token.trim(); - if (await testDoToken()) { - await saveTokenToConfig(_state.token); - logInfo("DigitalOcean API token validated and saved"); - return false; - } - logError("Token is invalid"); - _state.token = ""; + + logError("No valid token after 3 attempts"); + await retryOrQuit("Try DigitalOcean authentication again?"); } - - logError("No valid token after 3 attempts"); - throw new Error("DigitalOcean authentication failed"); } // ─── SSH Key Management ────────────────────────────────────────────────────── diff --git a/packages/cli/src/gcp/gcp.ts b/packages/cli/src/gcp/gcp.ts index 6e23064a..3adaad34 100644 --- a/packages/cli/src/gcp/gcp.ts +++ b/packages/cli/src/gcp/gcp.ts @@ -30,6 +30,7 @@ import { openBrowser, prompt, promptSpawnNameShared, + retryOrQuit, sanitizeTermValue, selectFromList, shellQuote, @@ -431,17 +432,20 @@ export async function authenticate(): Promise { return; } - logWarn("No active Google Cloud account -- launching gcloud auth login..."); - const exitCode = await gcloudInteractive([ - "auth", - "login", - ]); - if (exitCode !== 0) { + for (;;) { + logWarn("No active Google Cloud account -- launching gcloud auth login..."); + const exitCode = await gcloudInteractive([ + "auth", + "login", + ]); + if (exitCode === 0) { + logInfo("Authenticated with Google Cloud"); + return; + } logError("Authentication failed. You can also set credentials via:"); logError(" export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json"); - throw new Error("gcloud auth failed"); + await retryOrQuit("Try Google Cloud authentication again?"); } - logInfo("Authenticated with Google Cloud"); } // ─── Project Resolution ───────────────────────────────────────────────────── diff --git a/packages/cli/src/hetzner/hetzner.ts b/packages/cli/src/hetzner/hetzner.ts index 83803a52..18d3bdfc 100644 --- a/packages/cli/src/hetzner/hetzner.ts +++ b/packages/cli/src/hetzner/hetzner.ts @@ -33,6 +33,7 @@ import { logWarn, prompt, promptSpawnNameShared, + retryOrQuit, sanitizeTermValue, selectFromList, shellQuote, @@ -211,28 +212,30 @@ export async function ensureHcloudToken(): Promise { _state.hcloudToken = ""; } - // 3. Manual entry - logStep("Hetzner Cloud API Token Required"); - logWarn("Get a token from: https://console.hetzner.cloud/projects -> API Tokens"); + // 3. Manual entry (retry loop — never exits unless user says no) + for (;;) { + logStep("Hetzner Cloud API Token Required"); + logWarn("Get a token from: https://console.hetzner.cloud/projects -> API Tokens"); - for (let attempt = 1; attempt <= 3; attempt++) { - const token = await prompt("Enter your Hetzner Cloud API token: "); - if (!token) { - logError("Token cannot be empty"); - continue; + for (let attempt = 1; attempt <= 3; attempt++) { + const token = await prompt("Enter your Hetzner Cloud API token: "); + if (!token) { + logError("Token cannot be empty"); + continue; + } + _state.hcloudToken = token.trim(); + if (await testHcloudToken()) { + await saveTokenToConfig(_state.hcloudToken); + logInfo("Hetzner Cloud token validated and saved"); + return; + } + logError("Token is invalid"); + _state.hcloudToken = ""; } - _state.hcloudToken = token.trim(); - if (await testHcloudToken()) { - await saveTokenToConfig(_state.hcloudToken); - logInfo("Hetzner Cloud token validated and saved"); - return; - } - logError("Token is invalid"); - _state.hcloudToken = ""; + + logError("No valid token after 3 attempts"); + await retryOrQuit("Enter a new Hetzner token?"); } - - logError("No valid token after 3 attempts"); - throw new Error("Hetzner authentication failed"); } // ─── SSH Key Management ────────────────────────────────────────────────────── diff --git a/packages/cli/src/shared/oauth.ts b/packages/cli/src/shared/oauth.ts index 35f68f1f..35359f69 100644 --- a/packages/cli/src/shared/oauth.ts +++ b/packages/cli/src/shared/oauth.ts @@ -8,7 +8,7 @@ import { OAUTH_CODE_REGEX } from "./oauth-constants"; import { parseJsonObj, parseJsonWith } from "./parse"; import { getSpawnCloudConfigPath } from "./paths"; import { asyncTryCatchIf, isFileError, isNetworkError, tryCatch } from "./result.js"; -import { logDebug, logError, logInfo, logStep, logWarn, openBrowser, prompt } from "./ui"; +import { logDebug, logError, logInfo, logStep, logWarn, openBrowser, prompt, retryOrQuit } from "./ui"; // ─── Schemas ───────────────────────────────────────────────────────────────── @@ -353,30 +353,32 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string): } } - // 3. Try OAuth + manual fallback (3 attempts) - for (let attempt = 1; attempt <= 3; attempt++) { - // Try OAuth first - const key = await tryOauthFlow(5180, agentSlug, cloudSlug); - if (key && (await verifyOpenrouterKey(key))) { - process.env.OPENROUTER_API_KEY = key; - await saveOpenRouterKey(key); - return key; + // 3. Try OAuth + manual fallback (retry loop — never exits unless user says no) + for (;;) { + for (let attempt = 1; attempt <= 3; attempt++) { + // Try OAuth first + const key = await tryOauthFlow(5180, agentSlug, cloudSlug); + if (key && (await verifyOpenrouterKey(key))) { + process.env.OPENROUTER_API_KEY = key; + await saveOpenRouterKey(key); + return key; + } + + // OAuth failed — fall through to manual entry + process.stderr.write("\n"); + logWarn("Browser-based login was not completed."); + logInfo("Get your API key from: https://openrouter.ai/settings/keys"); + process.stderr.write("\n"); + + const manualKey = await promptAndValidateApiKey(); + if (manualKey && (await verifyOpenrouterKey(manualKey))) { + process.env.OPENROUTER_API_KEY = manualKey; + await saveOpenRouterKey(manualKey); + return manualKey; + } } - // OAuth failed — fall through to manual entry - process.stderr.write("\n"); - logWarn("Browser-based login was not completed."); - logInfo("Get your API key from: https://openrouter.ai/settings/keys"); - process.stderr.write("\n"); - - const manualKey = await promptAndValidateApiKey(); - if (manualKey && (await verifyOpenrouterKey(manualKey))) { - process.env.OPENROUTER_API_KEY = manualKey; - await saveOpenRouterKey(manualKey); - return manualKey; - } + logError("No valid API key after 3 attempts"); + await retryOrQuit("Try getting an API key again?"); } - - logError("No valid API key after 3 attempts"); - throw new Error("API key acquisition failed"); } diff --git a/packages/cli/src/shared/orchestrate.ts b/packages/cli/src/shared/orchestrate.ts index 645f76b2..03cb0469 100644 --- a/packages/cli/src/shared/orchestrate.ts +++ b/packages/cli/src/shared/orchestrate.ts @@ -21,12 +21,14 @@ import { startSshTunnel } from "./ssh"; import { ensureSshKeys, getSshKeyOpts } from "./ssh-keys"; import { logDebug, + logError, logInfo, logStep, logWarn, openBrowser, prepareStdinForHandoff, prompt, + retryOrQuit, shellQuote, validateModelId, withRetry, @@ -185,9 +187,27 @@ export async function runOrchestration( !cloud.skipAgentInstall && !agent.skipTarball ? downloadTarballLocally(agentName) : Promise.resolve(null), ]); - // Server boot must succeed + // Server boot must succeed — retry if it failed if (bootResult.status === "rejected") { - throw bootResult.reason; + logError(getErrorMessage(bootResult.reason)); + await retryOrQuit("Retry server creation?"); + // User chose to retry — fall through to sequential path which has full retry loops + // (Re-running the concurrent path would re-prompt for API key, etc.) + const connection = await cloud.createServer(serverName); + const spawnName2 = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined; + saveSpawnRecord({ + id: spawnId, + agent: agentName, + cloud: cloud.cloudName, + timestamp: new Date().toISOString(), + ...(spawnName2 + ? { + name: spawnName2, + } + : {}), + connection, + }); + await cloud.waitForReady(); } // API key must succeed @@ -225,7 +245,14 @@ export async function runOrchestration( installed = await tarball(cloud.runner, agentName); } if (!installed) { - await agent.install(); + for (;;) { + const r = await asyncTryCatch(() => agent.install()); + if (r.ok) { + break; + } + logError(getErrorMessage(r.error)); + await retryOrQuit("Retry agent install?"); + } } } @@ -264,8 +291,17 @@ export async function runOrchestration( logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`); } - // 5. Provision server - const connection = await cloud.createServer(serverName); + // 5. Provision server (retry loop) + let connection: VMConnection; + for (;;) { + const r = await asyncTryCatch(() => cloud.createServer(serverName)); + if (r.ok) { + connection = r.data; + break; + } + logError(getErrorMessage(r.error)); + await retryOrQuit("Retry server creation?"); + } const spawnName = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined; saveSpawnRecord({ id: spawnId, @@ -280,8 +316,15 @@ export async function runOrchestration( connection, }); - // 6. Wait for readiness - await cloud.waitForReady(); + // 6. Wait for readiness (retry loop) + for (;;) { + const r = await asyncTryCatch(() => cloud.waitForReady()); + if (r.ok) { + break; + } + logError(getErrorMessage(r.error)); + await retryOrQuit("Server may still be starting. Keep waiting?"); + } // 7. Env config const envPairs = agent.envVars(apiKey); @@ -300,7 +343,14 @@ export async function runOrchestration( installedFromTarball = await tarball(cloud.runner, agentName); } if (!installedFromTarball) { - await agent.install(); + for (;;) { + const r = await asyncTryCatch(() => agent.install()); + if (r.ok) { + break; + } + logError(getErrorMessage(r.error)); + await retryOrQuit("Retry agent install?"); + } } } @@ -376,9 +426,16 @@ async function postInstall( await setupAutoUpdate(cloud.runner, agentName, agent.updateCmd); } - // Pre-launch hooks + // Pre-launch hooks (retry loop) if (agent.preLaunch) { - await agent.preLaunch(); + for (;;) { + const r = await asyncTryCatch(() => agent.preLaunch!()); + if (r.ok) { + break; + } + logError(getErrorMessage(r.error)); + await retryOrQuit("Retry pre-launch setup?"); + } } // SSH tunnel for web dashboard diff --git a/packages/cli/src/shared/ui.ts b/packages/cli/src/shared/ui.ts index 06919cde..542e0f2c 100644 --- a/packages/cli/src/shared/ui.ts +++ b/packages/cli/src/shared/ui.ts @@ -196,6 +196,26 @@ export function openBrowser(url: string): void { } } +// ─── Retry-or-quit ───────────────────────────────────────────────────── + +/** + * Prompt the user to retry or quit after a failure. + * - Enter / "y" / anything else → returns (caller retries) + * - "n" / "N" / Ctrl+C (empty) → throws (caller exits) + * + * In non-interactive mode, always throws immediately. + */ +export async function retryOrQuit(message: string): Promise { + if (process.env.SPAWN_NON_INTERACTIVE === "1") { + throw new Error("Non-interactive mode: cannot retry"); + } + process.stderr.write("\n"); + const answer = await prompt(`${message} (Y/n): `); + if (!answer || /^[Nn]/.test(answer)) { + throw new Error("User chose to exit"); + } +} + // ─── Result-based retry ──────────────────────────────────────────────── import type { Result } from "./result"; diff --git a/sh/e2e/e2e.sh b/sh/e2e/e2e.sh index e951abb0..a5eb0c8e 100755 --- a/sh/e2e/e2e.sh +++ b/sh/e2e/e2e.sh @@ -31,6 +31,7 @@ source "${SCRIPT_DIR}/lib/provision.sh" source "${SCRIPT_DIR}/lib/verify.sh" source "${SCRIPT_DIR}/lib/teardown.sh" source "${SCRIPT_DIR}/lib/soak.sh" +source "${SCRIPT_DIR}/lib/interactive.sh" # --------------------------------------------------------------------------- # All supported clouds (excluding local — no infra to provision) @@ -47,6 +48,7 @@ SKIP_CLEANUP=0 SKIP_INPUT_TEST="${SKIP_INPUT_TEST:-0}" SEQUENTIAL_MODE=0 SOAK_MODE=0 +INTERACTIVE_MODE=0 while [ $# -gt 0 ]; do case "$1" in @@ -108,6 +110,10 @@ while [ $# -gt 0 ]; do SOAK_MODE=1 shift ;; + --interactive) + INTERACTIVE_MODE=1 + shift + ;; --help|-h) printf "Usage: %s --cloud CLOUD [--cloud CLOUD2 ...] [agents...] [options]\n\n" "$0" printf "Clouds: %s\n" "${ALL_CLOUDS}" @@ -120,6 +126,7 @@ while [ $# -gt 0 ]; do printf " --skip-cleanup Skip stale e2e-* instance cleanup\n" printf " --skip-input-test Skip live input tests\n" printf " --soak Run Telegram soak test (OpenClaw on Sprite)\n" + printf " --interactive AI-driven interactive test (requires ANTHROPIC_API_KEY)\n" printf " --help Show this help\n" exit 0 ;; @@ -211,12 +218,22 @@ run_single_agent() { # Run core logic in a subshell so we can kill it on timeout ( local _inner_status="fail" - if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then - if verify_agent "${agent}" "${app_name}"; then + if [ "${INTERACTIVE_MODE}" -eq 1 ]; then + # AI-driven interactive mode: provision + verify in one step + if interactive_provision "${agent}" "${app_name}" "${LOG_DIR}"; then if run_input_test "${agent}" "${app_name}"; then _inner_status="pass" fi fi + else + # Standard headless mode + if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then + if verify_agent "${agent}" "${app_name}"; then + if run_input_test "${agent}" "${app_name}"; then + _inner_status="pass" + fi + fi + fi fi printf '%s' "${_inner_status}" > "${status_file}" ) & diff --git a/sh/e2e/interactive-harness.ts b/sh/e2e/interactive-harness.ts new file mode 100644 index 00000000..ac2a7f42 --- /dev/null +++ b/sh/e2e/interactive-harness.ts @@ -0,0 +1,372 @@ +#!/usr/bin/env bun +// sh/e2e/interactive-harness.ts — AI-driven interactive E2E test for spawn CLI +// +// Spawns spawn in a real PTY (via `script` command), feeds terminal output to +// Claude Haiku, and types responses like a human user would. +// +// Usage: bun run sh/e2e/interactive-harness.ts +// +// Required env: +// ANTHROPIC_API_KEY — For the AI driver (Claude Haiku) +// OPENROUTER_API_KEY — Injected into spawn for the agent +// Cloud credentials — HCLOUD_TOKEN, DO_API_TOKEN, AWS_ACCESS_KEY_ID, etc. +// +// Outputs JSON to stdout: { success: boolean, duration: number, transcript: string } + +const IDLE_MS = 2000; // Wait 2s of silence before asking AI +const SESSION_TIMEOUT_MS = 10 * 60 * 1000; // 10 minute overall timeout +const AI_MODEL = "claude-haiku-4-5-20251001"; + +// ─── Args & validation ────────────────────────────────────────────────── + +const [agent, cloud] = process.argv.slice(2); +if (!agent || !cloud) { + process.stderr.write("Usage: bun run interactive-harness.ts \n"); + process.exit(1); +} + +const apiKey = process.env.ANTHROPIC_API_KEY ?? ""; +if (!apiKey) { + process.stderr.write("ANTHROPIC_API_KEY is required for the AI driver\n"); + process.exit(1); +} + +if (!process.env.OPENROUTER_API_KEY) { + process.stderr.write("OPENROUTER_API_KEY is required for the spawned agent\n"); + process.exit(1); +} + +// ─── Credential map (only include what's set) ─────────────────────────── + +function buildCredentialHints(): string { + const creds: string[] = []; + + const orKey = process.env.OPENROUTER_API_KEY ?? ""; + if (orKey) creds.push(`OpenRouter API key: ${orKey}`); + + const hetzner = process.env.HCLOUD_TOKEN ?? ""; + if (hetzner) creds.push(`Hetzner token: ${hetzner}`); + + const doToken = process.env.DO_API_TOKEN ?? ""; + if (doToken) creds.push(`DigitalOcean token: ${doToken}`); + + const awsKey = process.env.AWS_ACCESS_KEY_ID ?? ""; + const awsSecret = process.env.AWS_SECRET_ACCESS_KEY ?? ""; + if (awsKey) creds.push(`AWS Access Key ID: ${awsKey}`); + if (awsSecret) creds.push(`AWS Secret Access Key: ${awsSecret}`); + + const gcpProject = process.env.GCP_PROJECT ?? ""; + if (gcpProject) creds.push(`GCP Project ID: ${gcpProject}`); + + return creds.join("\n"); +} + +// ─── ANSI stripping ───────────────────────────────────────────────────── + +function stripAnsi(text: string): string { + return text + .replace(/\x1B\[[0-9;]*[A-Za-z]/g, "") // CSI sequences + .replace(/\x1B\][^\x07]*\x07/g, "") // OSC sequences + .replace(/\x1B\[\?[0-9;]*[hl]/g, "") // DEC private mode + .replace(/\x1B[()][A-Z0-9]/g, "") // Character set + .replace(/\r/g, ""); +} + +// ─── Credential redaction for logs ────────────────────────────────────── + +function redactSecrets(text: string): string { + let result = text; + const secrets = [ + process.env.OPENROUTER_API_KEY, + process.env.HCLOUD_TOKEN, + process.env.DO_API_TOKEN, + process.env.AWS_ACCESS_KEY_ID, + process.env.AWS_SECRET_ACCESS_KEY, + process.env.ANTHROPIC_API_KEY, + ]; + for (const s of secrets) { + if (s && s.length > 8) { + result = result.replaceAll(s, "[REDACTED]"); + } + } + return result; +} + +// ─── Claude API ───────────────────────────────────────────────────────── + +interface Message { + role: "user" | "assistant"; + content: string; +} + +async function askClaude( + systemPrompt: string, + messages: Message[], +): Promise { + const resp = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: AI_MODEL, + max_tokens: 256, + system: systemPrompt, + messages, + }), + signal: AbortSignal.timeout(30_000), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`Claude API ${resp.status}: ${body.slice(0, 200)}`); + } + + const data = await resp.json(); + // data.content is an array of content blocks + const blocks = Array.isArray(data?.content) ? data.content : []; + const textBlock = blocks.find( + (b: Record) => b.type === "text", + ); + return typeof textBlock?.text === "string" ? textBlock.text.trim() : ""; +} + +// ─── Input parsing ────────────────────────────────────────────────────── + +function parseInput(response: string): Uint8Array | null { + const trimmed = response.trim(); + + if (trimmed === "") return null; + if (trimmed === "") return null; + if (trimmed === "") return new Uint8Array([3]); // ETX + if (trimmed === "") return new Uint8Array([10]); // LF + if (trimmed === "") return new TextEncoder().encode("\x1B[A"); + if (trimmed === "") return new TextEncoder().encode("\x1B[B"); + + // Plain text → type it + Enter + return new TextEncoder().encode(trimmed + "\n"); +} + +// ─── System prompt ────────────────────────────────────────────────────── + +function buildSystemPrompt(): string { + return `You are an automated QA tester driving the "spawn" CLI through a terminal. +Your job is to respond to prompts exactly like a human user would. + +CREDENTIALS (paste these EXACTLY when asked): +${buildCredentialHints()} + +RULES: +1. When asked for a token/key/credential, paste the EXACT value from above +2. When asked to confirm (Y/n), respond with "y" +3. When asked for a name with a default shown in [brackets], press Enter to accept +4. When shown a selection menu (with arrows/highlights), press Enter to accept the default +5. If you see "Try again? (Y/n)" or similar retry prompts, respond with "y" +6. When you see "is ready" or "Starting agent", respond with +7. If something is clearly broken and unrecoverable, respond with +8. If the terminal is still loading/processing, respond with + +RESPONSE FORMAT — reply with ONLY one of these: +- The exact text to type (will be followed by Enter automatically) +- — press Enter (accept default) +- — arrow up +- — arrow down +- — send Ctrl+C +- — do nothing, wait for more output +- — test succeeded (agent is ready) +- — test failed (describe why) + +IMPORTANT: Reply with ONLY the action. No explanation, no markdown, no quotes.`; +} + +// ─── PTY via script command ───────────────────────────────────────────── + +function spawnPty(command: string): typeof Bun.spawn.prototype { + const env = { + ...process.env, + TERM: "xterm-256color", + COLUMNS: "120", + LINES: "40", + }; + + // macOS: script -q /dev/null bash -c "command" + // Linux: script -qc "command" /dev/null + const args = + process.platform === "darwin" + ? ["-q", "/dev/null", "bash", "-c", command] + : ["-qc", command, "/dev/null"]; + + return Bun.spawn(["script", ...args], { + stdin: "pipe", + stdout: "pipe", + stderr: "pipe", + env, + }); +} + +// ─── Main ─────────────────────────────────────────────────────────────── + +async function main(): Promise { + const startTime = Date.now(); + const systemPrompt = buildSystemPrompt(); + const messages: Message[] = []; + let transcript = ""; + let success = false; + let failReason = ""; + + // Resolve CLI entry point + const repoRoot = + process.env.SPAWN_CLI_DIR ?? + new URL("../../", import.meta.url).pathname.replace(/\/$/, ""); + const cliEntry = `${repoRoot}/packages/cli/src/index.ts`; + const command = `bun run ${cliEntry} ${agent} ${cloud}`; + + process.stderr.write( + `[harness] Starting: spawn ${agent} ${cloud}\n`, + ); + process.stderr.write(`[harness] Timeout: ${SESSION_TIMEOUT_MS / 1000}s\n`); + + const proc = spawnPty(command); + let buffer = ""; + let lastDataTime = Date.now(); + let sessionDone = false; + + // Reader loop — accumulates PTY output + const readerDone = (async () => { + const reader = proc.stdout.getReader(); + const decoder = new TextDecoder(); + for (;;) { + const { done, value } = await reader.read(); + if (done) { + sessionDone = true; + break; + } + const text = decoder.decode(value, { stream: true }); + buffer += text; + transcript += text; + lastDataTime = Date.now(); + // Echo to stderr (redacted) so CI logs show progress + process.stderr.write(redactSecrets(text)); + } + })(); + + // AI driver loop + let turnCount = 0; + const maxTurns = 50; // Safety limit + + while (!sessionDone && turnCount < maxTurns) { + // Wait for output to settle + await Bun.sleep(500); + + // Check overall timeout + if (Date.now() - startTime > SESSION_TIMEOUT_MS) { + failReason = "Session timeout"; + break; + } + + // Wait until output has been idle for IDLE_MS + if (Date.now() - lastDataTime < IDLE_MS) continue; + if (buffer.length === 0) continue; + + const stripped = stripAnsi(buffer); + + // Check for success markers in output + if (/is ready|Starting agent|setup completed successfully/i.test(stripped)) { + success = true; + break; + } + + // Ask Claude what to type + turnCount++; + process.stderr.write( + `\n[harness] Turn ${turnCount}: asking AI (${stripped.length} chars of output)\n`, + ); + + messages.push({ + role: "user", + content: `Terminal output:\n${stripped}`, + }); + + let response: string; + const aiResult = await askClaude(systemPrompt, messages).catch( + (err: Error) => { + process.stderr.write(`[harness] AI error: ${err.message}\n`); + return ""; + }, + ); + response = aiResult; + + messages.push({ role: "assistant", content: response }); + process.stderr.write( + `[harness] AI response: ${redactSecrets(response)}\n`, + ); + + // Clear buffer for next round + buffer = ""; + + // Handle AI response + if (response === "") { + success = true; + break; + } + if (response.startsWith("") { + continue; + } + + const input = parseInput(response); + if (input) { + proc.stdin.write(input); + proc.stdin.flush(); + } + } + + if (turnCount >= maxTurns) { + failReason = "Exceeded max turns"; + } + + // Clean exit: send Ctrl+C then wait briefly + proc.stdin.write(new Uint8Array([3])); + proc.stdin.flush(); + await Bun.sleep(2000); + proc.kill(); + await readerDone.catch(() => {}); + + const duration = Math.round((Date.now() - startTime) / 1000); + + // Output result as JSON to stdout + const result = { + success, + duration, + turns: turnCount, + failReason: failReason || undefined, + transcript: redactSecrets(stripAnsi(transcript)).slice(-5000), // Last 5KB + }; + + process.stdout.write(JSON.stringify(result) + "\n"); + + if (success) { + process.stderr.write( + `\n[harness] SUCCESS in ${duration}s (${turnCount} turns)\n`, + ); + } else { + process.stderr.write( + `\n[harness] FAILED in ${duration}s: ${failReason || "unknown"}\n`, + ); + } + + process.exit(success ? 0 : 1); +} + +main().catch((err) => { + process.stderr.write(`[harness] Fatal: ${err}\n`); + process.stdout.write( + JSON.stringify({ success: false, duration: 0, turns: 0, failReason: String(err) }) + "\n", + ); + process.exit(1); +}); diff --git a/sh/e2e/lib/interactive.sh b/sh/e2e/lib/interactive.sh new file mode 100644 index 00000000..415df832 --- /dev/null +++ b/sh/e2e/lib/interactive.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# e2e/lib/interactive.sh — AI-driven interactive provision & verification +# +# Instead of running spawn in headless mode (SPAWN_NON_INTERACTIVE=1), this +# runs spawn interactively with an AI agent (Claude Haiku) responding to +# prompts like a human user would. Tests the real user experience end-to-end. +# +# Requires: ANTHROPIC_API_KEY (for the AI driver), plus normal cloud creds. +set -eo pipefail + +# --------------------------------------------------------------------------- +# interactive_provision AGENT APP_NAME LOG_DIR +# +# Runs spawn interactively with AI driving the prompts. On success, the +# instance is provisioned AND the agent is installed — equivalent to +# provision_agent + verify_agent in the headless flow. +# +# Returns 0 on success, 1 on failure. +# --------------------------------------------------------------------------- +interactive_provision() { + local agent="$1" + local app_name="$2" + local log_dir="$3" + + # Validate app_name (same rules as provision.sh) + if [ -z "${app_name}" ] || ! printf '%s' "${app_name}" | grep -qE '^[A-Za-z0-9._-]+$'; then + log_err "Invalid app_name: must be non-empty and contain only [A-Za-z0-9._-]" + return 1 + fi + + # Require AI driver key + if [ -z "${ANTHROPIC_API_KEY:-}" ]; then + log_err "ANTHROPIC_API_KEY required for interactive mode" + return 1 + fi + + # Resolve harness script + local harness_script + harness_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/interactive-harness.ts" + if [ ! -f "${harness_script}" ]; then + log_err "Interactive harness not found: ${harness_script}" + return 1 + fi + + local result_file="${log_dir}/${app_name}-interactive.json" + local log_file="${log_dir}/${app_name}-interactive.log" + + log_step "Interactive provision: ${agent} on ${ACTIVE_CLOUD}" + log_info "AI driver: Claude Haiku via Anthropic API" + + # Build cloud-specific env for the spawn CLI invocation. + # The harness inherits the current env, which already has cloud creds + # loaded by the cloud driver. We just need to set spawn-specific vars. + local spawn_env="" + spawn_env="${spawn_env} SPAWN_NAME_KEBAB=${app_name}" + + # Map ACTIVE_CLOUD to the cloud name spawn expects + local spawn_cloud="${ACTIVE_CLOUD}" + + local harness_start + harness_start=$(date +%s) + + # Run the harness — it outputs JSON to stdout, logs to stderr + local harness_exit=0 + env ${spawn_env} bun run "${harness_script}" "${agent}" "${spawn_cloud}" \ + > "${result_file}" 2> "${log_file}" || harness_exit=$? + + local harness_end + harness_end=$(date +%s) + local harness_duration=$((harness_end - harness_start)) + + # Parse result + if [ -f "${result_file}" ] && [ -s "${result_file}" ]; then + local harness_success + harness_success=$(jq -r '.success // false' "${result_file}" 2>/dev/null || printf 'false') + local harness_turns + harness_turns=$(jq -r '.turns // 0' "${result_file}" 2>/dev/null || printf '0') + local harness_reason + harness_reason=$(jq -r '.failReason // ""' "${result_file}" 2>/dev/null || printf '') + + if [ "${harness_success}" = "true" ]; then + log_ok "Interactive provision succeeded (${harness_duration}s, ${harness_turns} AI turns)" + + # Now verify the instance exists via cloud driver so teardown works + if cloud_provision_verify "${app_name}" "${log_dir}"; then + log_ok "Cloud driver confirmed instance exists" + return 0 + else + log_warn "Instance not found via cloud driver — spawn may have used a different name" + return 0 + fi + else + log_err "Interactive provision failed (${harness_duration}s): ${harness_reason}" + # Dump last 50 lines of harness log for debugging + if [ -f "${log_file}" ]; then + log_info "Last 50 lines of harness log:" + tail -50 "${log_file}" | while IFS= read -r line; do + printf ' %s\n' "${line}" + done + fi + return 1 + fi + else + log_err "Interactive harness produced no output (exit code: ${harness_exit})" + if [ -f "${log_file}" ]; then + log_info "Harness stderr:" + tail -20 "${log_file}" | while IFS= read -r line; do + printf ' %s\n' "${line}" + done + fi + return 1 + fi +}