mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-19 16:39:50 +00:00
feat: never-give-up resilience layer (#2807)
* feat: never-give-up resilience layer — retry every failure instead of exiting Add retryOrQuit() helper to shared/ui.ts that prompts "Try again? (Y/n)" after any recoverable failure. Wrap all fatal exit points with retry loops: - Cloud auth (Hetzner, DigitalOcean, AWS, GCP): retry after 3 failed tokens - API key acquisition: retry after 3 failed OAuth+manual attempts - Server creation: retry on any createServer failure (both fast & sequential) - SSH readiness: retry on waitForReady timeout - Agent install: retry on install failure - Pre-launch hooks: retry on preLaunch failure Non-interactive mode (SPAWN_NON_INTERACTIVE=1) still throws immediately. Ctrl+C at any retry prompt exits cleanly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(e2e): add AI-driven interactive test harness Add --interactive mode to the E2E test framework. Instead of running spawn in headless mode (SPAWN_NON_INTERACTIVE=1), this spawns the CLI in a real PTY and uses Claude Haiku to respond to prompts like a human user would. New files: - sh/e2e/interactive-harness.ts — Bun script that drives the PTY + AI loop - sh/e2e/lib/interactive.sh — Bash integration with the E2E framework Usage: e2e.sh --cloud hetzner claude --interactive Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(qa): wire interactive E2E into scheduled QA pipeline - Add `e2e-interactive` option to workflow_dispatch in qa.yml - Add `e2e-interactive` run mode to qa.sh (loads cloud creds + ANTHROPIC_API_KEY) - Runs `e2e.sh --cloud hetzner claude --interactive` directly (no Claude Code needed) - Defaults to hetzner (cheapest), overridable via E2E_INTERACTIVE_CLOUD/AGENT env vars Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(qa): schedule interactive E2E daily at 6am UTC Runs one agent (claude) on one cloud (hetzner) with AI-driven prompts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(qa): offset soak cron to avoid GitHub Actions schedule dedup GitHub Actions deduplicates overlapping cron schedules into one run, making `github.event.schedule` unpredictable. The soak test at `0 3 * * 1` was getting absorbed by the `0 */4 * * *` quality sweep and never firing as reason=soak. Move soak to `30 1 * * 1` (Monday 1:30am UTC) — safely between the 0am and 4am quality sweep slots. Interactive E2E at `0 6 * * *` is already safe (between the 4am and 8am slots). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(qa): add e2e-interactive to trigger server valid reasons The trigger server validates reason query params against an allowlist. Without this, the `e2e-interactive` dispatch returns 400. Also note: `soak` is already in VALID_REASONS in the repo but the running service on the QA VM is stale — needs a restart to pick up both soak and e2e-interactive reasons. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2280550c18
commit
ed127cf592
16 changed files with 775 additions and 125 deletions
|
|
@ -33,6 +33,11 @@ elif [[ "${SPAWN_REASON}" == "e2e" ]]; then
|
|||
WORKTREE_BASE="/tmp/spawn-worktrees/qa-e2e"
|
||||
TEAM_NAME="spawn-qa-e2e"
|
||||
CYCLE_TIMEOUT=1200 # 20 min for E2E tests + investigation
|
||||
elif [[ "${SPAWN_REASON}" == "e2e-interactive" ]]; then
|
||||
RUN_MODE="e2e-interactive"
|
||||
WORKTREE_BASE="/tmp/spawn-worktrees/qa-e2e-interactive"
|
||||
TEAM_NAME="spawn-qa-e2e-interactive"
|
||||
CYCLE_TIMEOUT=1800 # 30 min for interactive AI-driven E2E (slower than headless)
|
||||
elif [[ "${SPAWN_REASON}" == "issues" ]] && [[ -n "${SPAWN_ISSUE}" ]]; then
|
||||
RUN_MODE="issue"
|
||||
ISSUE_NUM="${SPAWN_ISSUE}"
|
||||
|
|
@ -203,7 +208,7 @@ if [[ "${RUN_MODE}" == "quality" ]]; then
|
|||
fi
|
||||
|
||||
# --- Load cloud credentials (quality + fixtures + e2e modes) ---
|
||||
if [[ "${RUN_MODE}" == "fixtures" ]] || [[ "${RUN_MODE}" == "quality" ]] || [[ "${RUN_MODE}" == "e2e" ]] || [[ "${RUN_MODE}" == "soak" ]]; then
|
||||
if [[ "${RUN_MODE}" == "fixtures" ]] || [[ "${RUN_MODE}" == "quality" ]] || [[ "${RUN_MODE}" == "e2e" ]] || [[ "${RUN_MODE}" == "e2e-interactive" ]] || [[ "${RUN_MODE}" == "soak" ]]; then
|
||||
if [[ -f "${REPO_ROOT}/sh/shared/key-request.sh" ]]; then
|
||||
source "${REPO_ROOT}/sh/shared/key-request.sh"
|
||||
load_cloud_keys_from_config
|
||||
|
|
@ -430,6 +435,43 @@ if [[ "${RUN_MODE}" == "soak" ]]; then
|
|||
log "Soak test failed (exit_code=${CLAUDE_EXIT})"
|
||||
fi
|
||||
|
||||
# --- Interactive E2E mode: run e2e.sh --interactive directly (no Claude Code needed) ---
|
||||
elif [[ "${RUN_MODE}" == "e2e-interactive" ]]; then
|
||||
log "Running interactive E2E test (AI-driven via Claude Haiku)..."
|
||||
|
||||
# ANTHROPIC_API_KEY is needed for the AI driver (Claude Haiku deciding what to type).
|
||||
# On QA VMs this is typically set in the environment or /etc/spawn-qa-auth.env.
|
||||
if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||
# Try loading from auth env file
|
||||
if [[ -f /etc/spawn-qa-auth.env ]]; then
|
||||
while IFS='=' read -r _ekey _eval || [[ -n "${_ekey}" ]]; do
|
||||
_ekey="${_ekey#"${_ekey%%[! ]*}"}"
|
||||
case "${_ekey}" in
|
||||
ANTHROPIC_API_KEY) export ANTHROPIC_API_KEY="${_eval}" ;;
|
||||
esac
|
||||
done < /etc/spawn-qa-auth.env
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||
log "ERROR: ANTHROPIC_API_KEY not set — required for interactive E2E"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "${REPO_ROOT}"
|
||||
# Run on hetzner (cheapest) with claude agent by default.
|
||||
# Can be overridden via E2E_INTERACTIVE_CLOUD and E2E_INTERACTIVE_AGENT env vars.
|
||||
_int_cloud="${E2E_INTERACTIVE_CLOUD:-hetzner}"
|
||||
_int_agent="${E2E_INTERACTIVE_AGENT:-claude}"
|
||||
bash sh/e2e/e2e.sh --cloud "${_int_cloud}" "${_int_agent}" --interactive 2>&1 | tee -a "${LOG_FILE}"
|
||||
CLAUDE_EXIT=$?
|
||||
|
||||
if [[ "${CLAUDE_EXIT}" -eq 0 ]]; then
|
||||
log "Interactive E2E test passed"
|
||||
else
|
||||
log "Interactive E2E test failed (exit_code=${CLAUDE_EXIT})"
|
||||
fi
|
||||
|
||||
# --- Quality mode: retry up to 3 times, then file issue ---
|
||||
elif [[ "${RUN_MODE}" == "quality" ]]; then
|
||||
MAX_ATTEMPTS=3
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ const VALID_REASONS = new Set([
|
|||
"hygiene",
|
||||
"fixtures",
|
||||
"e2e",
|
||||
"e2e-interactive",
|
||||
"soak",
|
||||
]);
|
||||
|
||||
|
|
|
|||
8
.github/workflows/qa.yml
vendored
8
.github/workflows/qa.yml
vendored
|
|
@ -2,7 +2,8 @@ name: QA
|
|||
on:
|
||||
schedule:
|
||||
- cron: '0 */4 * * *' # Every 4 hours — quality sweep
|
||||
- cron: '0 3 * * 1' # Every Monday 3am UTC — Telegram soak test (OpenClaw on DigitalOcean)
|
||||
- cron: '30 1 * * 1' # Every Monday 1:30am UTC — Telegram soak test (offset from */4 to avoid dedup)
|
||||
- cron: '0 6 * * *' # Daily 6am UTC — Interactive E2E (1 agent, 1 cloud)
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
reason:
|
||||
|
|
@ -13,6 +14,7 @@ on:
|
|||
options:
|
||||
- schedule
|
||||
- e2e
|
||||
- e2e-interactive
|
||||
- fixtures
|
||||
- soak
|
||||
jobs:
|
||||
|
|
@ -25,8 +27,10 @@ jobs:
|
|||
SPRITE_URL: ${{ secrets.QA_SPRITE_URL }}
|
||||
TRIGGER_SECRET: ${{ secrets.QA_TRIGGER_SECRET }}
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "0 3 * * 1" ]; then
|
||||
if [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "30 1 * * 1" ]; then
|
||||
REASON="soak"
|
||||
elif [ "${{ github.event_name }}" = "schedule" ] && [ "${{ github.event.schedule }}" = "0 6 * * *" ]; then
|
||||
REASON="e2e-interactive"
|
||||
else
|
||||
REASON="${{ github.event.inputs.reason || 'schedule' }}"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@openrouter/spawn",
|
||||
"version": "0.24.2",
|
||||
"version": "0.25.0",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"spawn": "cli.js"
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ describe("ensureDoToken — payment method warning for first-time users", () =>
|
|||
// Empty prompt responses → manual entry fails × 3 → throws
|
||||
mockPrompt.mockImplementation(() => Promise.resolve(""));
|
||||
|
||||
await expect(ensureDoToken()).rejects.toThrow("DigitalOcean authentication failed");
|
||||
await expect(ensureDoToken()).rejects.toThrow("User chose to exit");
|
||||
|
||||
expect(warnMessages.some((msg) => msg.includes("payment method"))).toBe(true);
|
||||
expect(warnMessages.some((msg) => msg.includes("cloud.digitalocean.com/account/billing"))).toBe(true);
|
||||
|
|
@ -121,7 +121,7 @@ describe("ensureDoToken — payment method warning for first-time users", () =>
|
|||
mockLoadApiToken.mockImplementation(() => null);
|
||||
mockPrompt.mockImplementation(() => Promise.resolve(""));
|
||||
|
||||
await expect(ensureDoToken()).rejects.toThrow("DigitalOcean authentication failed");
|
||||
await expect(ensureDoToken()).rejects.toThrow("User chose to exit");
|
||||
|
||||
const billingWarning = warnMessages.find((msg) => msg.includes("billing"));
|
||||
expect(billingWarning).toBeDefined();
|
||||
|
|
|
|||
|
|
@ -697,6 +697,8 @@ describe("runOrchestration", () => {
|
|||
});
|
||||
|
||||
it("throws when createServer rejects", async () => {
|
||||
const prevNonInteractive = process.env.SPAWN_NON_INTERACTIVE;
|
||||
process.env.SPAWN_NON_INTERACTIVE = "1";
|
||||
const cloud = createMockCloud({
|
||||
cloudName: "hetzner",
|
||||
createServer: mock(() => Promise.reject(new Error("server boot failed"))),
|
||||
|
|
@ -707,8 +709,9 @@ describe("runOrchestration", () => {
|
|||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.message).toBe("server boot failed");
|
||||
expect(result.error.message).toBe("Non-interactive mode: cannot retry");
|
||||
}
|
||||
process.env.SPAWN_NON_INTERACTIVE = prevNonInteractive;
|
||||
stderrSpy.mockRestore();
|
||||
exitSpy.mockRestore();
|
||||
});
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import {
|
|||
logWarn,
|
||||
prompt,
|
||||
promptSpawnNameShared,
|
||||
retryOrQuit,
|
||||
sanitizeTermValue,
|
||||
selectFromList,
|
||||
shellQuote,
|
||||
|
|
@ -623,49 +624,57 @@ export async function authenticate(): Promise<void> {
|
|||
}
|
||||
}
|
||||
|
||||
// 4. Interactive credential entry
|
||||
// 4. Interactive credential entry (retry loop — never exits unless user says no)
|
||||
if (process.env.SPAWN_NON_INTERACTIVE === "1") {
|
||||
logError("AWS credentials not found. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.");
|
||||
throw new Error("No AWS credentials");
|
||||
}
|
||||
|
||||
if (skipCache) {
|
||||
logStep("Re-entering AWS credentials (--reauth):");
|
||||
} else {
|
||||
logStep("Enter your AWS credentials:");
|
||||
}
|
||||
const accessKey = await prompt("AWS Access Key ID: ");
|
||||
if (!accessKey) {
|
||||
throw new Error("No access key provided");
|
||||
}
|
||||
const secretKey = await prompt("AWS Secret Access Key: ");
|
||||
if (!secretKey) {
|
||||
throw new Error("No secret key provided");
|
||||
}
|
||||
|
||||
process.env.AWS_ACCESS_KEY_ID = accessKey;
|
||||
process.env.AWS_SECRET_ACCESS_KEY = secretKey;
|
||||
process.env.AWS_DEFAULT_REGION = region;
|
||||
_state.accessKeyId = accessKey;
|
||||
_state.secretAccessKey = secretKey;
|
||||
|
||||
if (hasAwsCli()) {
|
||||
const result = awsCliSync([
|
||||
"sts",
|
||||
"get-caller-identity",
|
||||
]);
|
||||
if (result.exitCode === 0) {
|
||||
_state.lightsailMode = "cli";
|
||||
await saveCredsToConfig(accessKey, secretKey, region);
|
||||
logInfo(`AWS CLI configured, using region: ${region}`);
|
||||
return;
|
||||
for (;;) {
|
||||
if (skipCache) {
|
||||
logStep("Re-entering AWS credentials (--reauth):");
|
||||
} else {
|
||||
logStep("Enter your AWS credentials:");
|
||||
}
|
||||
const accessKey = await prompt("AWS Access Key ID: ");
|
||||
if (!accessKey) {
|
||||
await retryOrQuit("AWS credentials invalid. Try again?");
|
||||
continue;
|
||||
}
|
||||
const secretKey = await prompt("AWS Secret Access Key: ");
|
||||
if (!secretKey) {
|
||||
await retryOrQuit("AWS credentials invalid. Try again?");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
_state.lightsailMode = "rest";
|
||||
await saveCredsToConfig(accessKey, secretKey, region);
|
||||
logInfo("Using Lightsail REST API directly");
|
||||
logInfo(`Using region: ${region}`);
|
||||
process.env.AWS_ACCESS_KEY_ID = accessKey;
|
||||
process.env.AWS_SECRET_ACCESS_KEY = secretKey;
|
||||
process.env.AWS_DEFAULT_REGION = region;
|
||||
_state.accessKeyId = accessKey;
|
||||
_state.secretAccessKey = secretKey;
|
||||
|
||||
if (hasAwsCli()) {
|
||||
const result = awsCliSync([
|
||||
"sts",
|
||||
"get-caller-identity",
|
||||
]);
|
||||
if (result.exitCode === 0) {
|
||||
_state.lightsailMode = "cli";
|
||||
await saveCredsToConfig(accessKey, secretKey, region);
|
||||
logInfo(`AWS CLI configured, using region: ${region}`);
|
||||
return;
|
||||
}
|
||||
logError("AWS credentials are invalid");
|
||||
await retryOrQuit("AWS credentials invalid. Try again?");
|
||||
continue;
|
||||
}
|
||||
|
||||
_state.lightsailMode = "rest";
|
||||
await saveCredsToConfig(accessKey, secretKey, region);
|
||||
logInfo("Using Lightsail REST API directly");
|
||||
logInfo(`Using region: ${region}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Region Prompt ──────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ import {
|
|||
logWarn,
|
||||
openBrowser,
|
||||
prompt,
|
||||
retryOrQuit,
|
||||
sanitizeTermValue,
|
||||
selectFromList,
|
||||
shellQuote,
|
||||
|
|
@ -765,28 +766,30 @@ export async function ensureDoToken(): Promise<boolean> {
|
|||
_state.token = "";
|
||||
}
|
||||
|
||||
// 4. Manual entry (fallback)
|
||||
logStep("DigitalOcean API Token Required");
|
||||
logWarn("Get a token from: https://cloud.digitalocean.com/account/api/tokens");
|
||||
// 4. Manual entry (retry loop — never exits unless user says no)
|
||||
for (;;) {
|
||||
logStep("DigitalOcean API Token Required");
|
||||
logWarn("Get a token from: https://cloud.digitalocean.com/account/api/tokens");
|
||||
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const token = await prompt("Enter your DigitalOcean API token: ");
|
||||
if (!token) {
|
||||
logError("Token cannot be empty");
|
||||
continue;
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const token = await prompt("Enter your DigitalOcean API token: ");
|
||||
if (!token) {
|
||||
logError("Token cannot be empty");
|
||||
continue;
|
||||
}
|
||||
_state.token = token.trim();
|
||||
if (await testDoToken()) {
|
||||
await saveTokenToConfig(_state.token);
|
||||
logInfo("DigitalOcean API token validated and saved");
|
||||
return false;
|
||||
}
|
||||
logError("Token is invalid");
|
||||
_state.token = "";
|
||||
}
|
||||
_state.token = token.trim();
|
||||
if (await testDoToken()) {
|
||||
await saveTokenToConfig(_state.token);
|
||||
logInfo("DigitalOcean API token validated and saved");
|
||||
return false;
|
||||
}
|
||||
logError("Token is invalid");
|
||||
_state.token = "";
|
||||
|
||||
logError("No valid token after 3 attempts");
|
||||
await retryOrQuit("Try DigitalOcean authentication again?");
|
||||
}
|
||||
|
||||
logError("No valid token after 3 attempts");
|
||||
throw new Error("DigitalOcean authentication failed");
|
||||
}
|
||||
|
||||
// ─── SSH Key Management ──────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import {
|
|||
openBrowser,
|
||||
prompt,
|
||||
promptSpawnNameShared,
|
||||
retryOrQuit,
|
||||
sanitizeTermValue,
|
||||
selectFromList,
|
||||
shellQuote,
|
||||
|
|
@ -431,17 +432,20 @@ export async function authenticate(): Promise<void> {
|
|||
return;
|
||||
}
|
||||
|
||||
logWarn("No active Google Cloud account -- launching gcloud auth login...");
|
||||
const exitCode = await gcloudInteractive([
|
||||
"auth",
|
||||
"login",
|
||||
]);
|
||||
if (exitCode !== 0) {
|
||||
for (;;) {
|
||||
logWarn("No active Google Cloud account -- launching gcloud auth login...");
|
||||
const exitCode = await gcloudInteractive([
|
||||
"auth",
|
||||
"login",
|
||||
]);
|
||||
if (exitCode === 0) {
|
||||
logInfo("Authenticated with Google Cloud");
|
||||
return;
|
||||
}
|
||||
logError("Authentication failed. You can also set credentials via:");
|
||||
logError(" export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json");
|
||||
throw new Error("gcloud auth failed");
|
||||
await retryOrQuit("Try Google Cloud authentication again?");
|
||||
}
|
||||
logInfo("Authenticated with Google Cloud");
|
||||
}
|
||||
|
||||
// ─── Project Resolution ─────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import {
|
|||
logWarn,
|
||||
prompt,
|
||||
promptSpawnNameShared,
|
||||
retryOrQuit,
|
||||
sanitizeTermValue,
|
||||
selectFromList,
|
||||
shellQuote,
|
||||
|
|
@ -211,28 +212,30 @@ export async function ensureHcloudToken(): Promise<void> {
|
|||
_state.hcloudToken = "";
|
||||
}
|
||||
|
||||
// 3. Manual entry
|
||||
logStep("Hetzner Cloud API Token Required");
|
||||
logWarn("Get a token from: https://console.hetzner.cloud/projects -> API Tokens");
|
||||
// 3. Manual entry (retry loop — never exits unless user says no)
|
||||
for (;;) {
|
||||
logStep("Hetzner Cloud API Token Required");
|
||||
logWarn("Get a token from: https://console.hetzner.cloud/projects -> API Tokens");
|
||||
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const token = await prompt("Enter your Hetzner Cloud API token: ");
|
||||
if (!token) {
|
||||
logError("Token cannot be empty");
|
||||
continue;
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const token = await prompt("Enter your Hetzner Cloud API token: ");
|
||||
if (!token) {
|
||||
logError("Token cannot be empty");
|
||||
continue;
|
||||
}
|
||||
_state.hcloudToken = token.trim();
|
||||
if (await testHcloudToken()) {
|
||||
await saveTokenToConfig(_state.hcloudToken);
|
||||
logInfo("Hetzner Cloud token validated and saved");
|
||||
return;
|
||||
}
|
||||
logError("Token is invalid");
|
||||
_state.hcloudToken = "";
|
||||
}
|
||||
_state.hcloudToken = token.trim();
|
||||
if (await testHcloudToken()) {
|
||||
await saveTokenToConfig(_state.hcloudToken);
|
||||
logInfo("Hetzner Cloud token validated and saved");
|
||||
return;
|
||||
}
|
||||
logError("Token is invalid");
|
||||
_state.hcloudToken = "";
|
||||
|
||||
logError("No valid token after 3 attempts");
|
||||
await retryOrQuit("Enter a new Hetzner token?");
|
||||
}
|
||||
|
||||
logError("No valid token after 3 attempts");
|
||||
throw new Error("Hetzner authentication failed");
|
||||
}
|
||||
|
||||
// ─── SSH Key Management ──────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import { OAUTH_CODE_REGEX } from "./oauth-constants";
|
|||
import { parseJsonObj, parseJsonWith } from "./parse";
|
||||
import { getSpawnCloudConfigPath } from "./paths";
|
||||
import { asyncTryCatchIf, isFileError, isNetworkError, tryCatch } from "./result.js";
|
||||
import { logDebug, logError, logInfo, logStep, logWarn, openBrowser, prompt } from "./ui";
|
||||
import { logDebug, logError, logInfo, logStep, logWarn, openBrowser, prompt, retryOrQuit } from "./ui";
|
||||
|
||||
// ─── Schemas ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -353,30 +353,32 @@ export async function getOrPromptApiKey(agentSlug?: string, cloudSlug?: string):
|
|||
}
|
||||
}
|
||||
|
||||
// 3. Try OAuth + manual fallback (3 attempts)
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
// Try OAuth first
|
||||
const key = await tryOauthFlow(5180, agentSlug, cloudSlug);
|
||||
if (key && (await verifyOpenrouterKey(key))) {
|
||||
process.env.OPENROUTER_API_KEY = key;
|
||||
await saveOpenRouterKey(key);
|
||||
return key;
|
||||
// 3. Try OAuth + manual fallback (retry loop — never exits unless user says no)
|
||||
for (;;) {
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
// Try OAuth first
|
||||
const key = await tryOauthFlow(5180, agentSlug, cloudSlug);
|
||||
if (key && (await verifyOpenrouterKey(key))) {
|
||||
process.env.OPENROUTER_API_KEY = key;
|
||||
await saveOpenRouterKey(key);
|
||||
return key;
|
||||
}
|
||||
|
||||
// OAuth failed — fall through to manual entry
|
||||
process.stderr.write("\n");
|
||||
logWarn("Browser-based login was not completed.");
|
||||
logInfo("Get your API key from: https://openrouter.ai/settings/keys");
|
||||
process.stderr.write("\n");
|
||||
|
||||
const manualKey = await promptAndValidateApiKey();
|
||||
if (manualKey && (await verifyOpenrouterKey(manualKey))) {
|
||||
process.env.OPENROUTER_API_KEY = manualKey;
|
||||
await saveOpenRouterKey(manualKey);
|
||||
return manualKey;
|
||||
}
|
||||
}
|
||||
|
||||
// OAuth failed — fall through to manual entry
|
||||
process.stderr.write("\n");
|
||||
logWarn("Browser-based login was not completed.");
|
||||
logInfo("Get your API key from: https://openrouter.ai/settings/keys");
|
||||
process.stderr.write("\n");
|
||||
|
||||
const manualKey = await promptAndValidateApiKey();
|
||||
if (manualKey && (await verifyOpenrouterKey(manualKey))) {
|
||||
process.env.OPENROUTER_API_KEY = manualKey;
|
||||
await saveOpenRouterKey(manualKey);
|
||||
return manualKey;
|
||||
}
|
||||
logError("No valid API key after 3 attempts");
|
||||
await retryOrQuit("Try getting an API key again?");
|
||||
}
|
||||
|
||||
logError("No valid API key after 3 attempts");
|
||||
throw new Error("API key acquisition failed");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,12 +21,14 @@ import { startSshTunnel } from "./ssh";
|
|||
import { ensureSshKeys, getSshKeyOpts } from "./ssh-keys";
|
||||
import {
|
||||
logDebug,
|
||||
logError,
|
||||
logInfo,
|
||||
logStep,
|
||||
logWarn,
|
||||
openBrowser,
|
||||
prepareStdinForHandoff,
|
||||
prompt,
|
||||
retryOrQuit,
|
||||
shellQuote,
|
||||
validateModelId,
|
||||
withRetry,
|
||||
|
|
@ -185,9 +187,27 @@ export async function runOrchestration(
|
|||
!cloud.skipAgentInstall && !agent.skipTarball ? downloadTarballLocally(agentName) : Promise.resolve(null),
|
||||
]);
|
||||
|
||||
// Server boot must succeed
|
||||
// Server boot must succeed — retry if it failed
|
||||
if (bootResult.status === "rejected") {
|
||||
throw bootResult.reason;
|
||||
logError(getErrorMessage(bootResult.reason));
|
||||
await retryOrQuit("Retry server creation?");
|
||||
// User chose to retry — fall through to sequential path which has full retry loops
|
||||
// (Re-running the concurrent path would re-prompt for API key, etc.)
|
||||
const connection = await cloud.createServer(serverName);
|
||||
const spawnName2 = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
|
||||
saveSpawnRecord({
|
||||
id: spawnId,
|
||||
agent: agentName,
|
||||
cloud: cloud.cloudName,
|
||||
timestamp: new Date().toISOString(),
|
||||
...(spawnName2
|
||||
? {
|
||||
name: spawnName2,
|
||||
}
|
||||
: {}),
|
||||
connection,
|
||||
});
|
||||
await cloud.waitForReady();
|
||||
}
|
||||
|
||||
// API key must succeed
|
||||
|
|
@ -225,7 +245,14 @@ export async function runOrchestration(
|
|||
installed = await tarball(cloud.runner, agentName);
|
||||
}
|
||||
if (!installed) {
|
||||
await agent.install();
|
||||
for (;;) {
|
||||
const r = await asyncTryCatch(() => agent.install());
|
||||
if (r.ok) {
|
||||
break;
|
||||
}
|
||||
logError(getErrorMessage(r.error));
|
||||
await retryOrQuit("Retry agent install?");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -264,8 +291,17 @@ export async function runOrchestration(
|
|||
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
|
||||
}
|
||||
|
||||
// 5. Provision server
|
||||
const connection = await cloud.createServer(serverName);
|
||||
// 5. Provision server (retry loop)
|
||||
let connection: VMConnection;
|
||||
for (;;) {
|
||||
const r = await asyncTryCatch(() => cloud.createServer(serverName));
|
||||
if (r.ok) {
|
||||
connection = r.data;
|
||||
break;
|
||||
}
|
||||
logError(getErrorMessage(r.error));
|
||||
await retryOrQuit("Retry server creation?");
|
||||
}
|
||||
const spawnName = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
|
||||
saveSpawnRecord({
|
||||
id: spawnId,
|
||||
|
|
@ -280,8 +316,15 @@ export async function runOrchestration(
|
|||
connection,
|
||||
});
|
||||
|
||||
// 6. Wait for readiness
|
||||
await cloud.waitForReady();
|
||||
// 6. Wait for readiness (retry loop)
|
||||
for (;;) {
|
||||
const r = await asyncTryCatch(() => cloud.waitForReady());
|
||||
if (r.ok) {
|
||||
break;
|
||||
}
|
||||
logError(getErrorMessage(r.error));
|
||||
await retryOrQuit("Server may still be starting. Keep waiting?");
|
||||
}
|
||||
|
||||
// 7. Env config
|
||||
const envPairs = agent.envVars(apiKey);
|
||||
|
|
@ -300,7 +343,14 @@ export async function runOrchestration(
|
|||
installedFromTarball = await tarball(cloud.runner, agentName);
|
||||
}
|
||||
if (!installedFromTarball) {
|
||||
await agent.install();
|
||||
for (;;) {
|
||||
const r = await asyncTryCatch(() => agent.install());
|
||||
if (r.ok) {
|
||||
break;
|
||||
}
|
||||
logError(getErrorMessage(r.error));
|
||||
await retryOrQuit("Retry agent install?");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -376,9 +426,16 @@ async function postInstall(
|
|||
await setupAutoUpdate(cloud.runner, agentName, agent.updateCmd);
|
||||
}
|
||||
|
||||
// Pre-launch hooks
|
||||
// Pre-launch hooks (retry loop)
|
||||
if (agent.preLaunch) {
|
||||
await agent.preLaunch();
|
||||
for (;;) {
|
||||
const r = await asyncTryCatch(() => agent.preLaunch!());
|
||||
if (r.ok) {
|
||||
break;
|
||||
}
|
||||
logError(getErrorMessage(r.error));
|
||||
await retryOrQuit("Retry pre-launch setup?");
|
||||
}
|
||||
}
|
||||
|
||||
// SSH tunnel for web dashboard
|
||||
|
|
|
|||
|
|
@ -196,6 +196,26 @@ export function openBrowser(url: string): void {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Retry-or-quit ─────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Prompt the user to retry or quit after a failure.
|
||||
* - Enter / "y" / anything else → returns (caller retries)
|
||||
* - "n" / "N" / Ctrl+C (empty) → throws (caller exits)
|
||||
*
|
||||
* In non-interactive mode, always throws immediately.
|
||||
*/
|
||||
export async function retryOrQuit(message: string): Promise<void> {
|
||||
if (process.env.SPAWN_NON_INTERACTIVE === "1") {
|
||||
throw new Error("Non-interactive mode: cannot retry");
|
||||
}
|
||||
process.stderr.write("\n");
|
||||
const answer = await prompt(`${message} (Y/n): `);
|
||||
if (!answer || /^[Nn]/.test(answer)) {
|
||||
throw new Error("User chose to exit");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Result-based retry ────────────────────────────────────────────────
|
||||
|
||||
import type { Result } from "./result";
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ source "${SCRIPT_DIR}/lib/provision.sh"
|
|||
source "${SCRIPT_DIR}/lib/verify.sh"
|
||||
source "${SCRIPT_DIR}/lib/teardown.sh"
|
||||
source "${SCRIPT_DIR}/lib/soak.sh"
|
||||
source "${SCRIPT_DIR}/lib/interactive.sh"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# All supported clouds (excluding local — no infra to provision)
|
||||
|
|
@ -47,6 +48,7 @@ SKIP_CLEANUP=0
|
|||
SKIP_INPUT_TEST="${SKIP_INPUT_TEST:-0}"
|
||||
SEQUENTIAL_MODE=0
|
||||
SOAK_MODE=0
|
||||
INTERACTIVE_MODE=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
|
|
@ -108,6 +110,10 @@ while [ $# -gt 0 ]; do
|
|||
SOAK_MODE=1
|
||||
shift
|
||||
;;
|
||||
--interactive)
|
||||
INTERACTIVE_MODE=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
printf "Usage: %s --cloud CLOUD [--cloud CLOUD2 ...] [agents...] [options]\n\n" "$0"
|
||||
printf "Clouds: %s\n" "${ALL_CLOUDS}"
|
||||
|
|
@ -120,6 +126,7 @@ while [ $# -gt 0 ]; do
|
|||
printf " --skip-cleanup Skip stale e2e-* instance cleanup\n"
|
||||
printf " --skip-input-test Skip live input tests\n"
|
||||
printf " --soak Run Telegram soak test (OpenClaw on Sprite)\n"
|
||||
printf " --interactive AI-driven interactive test (requires ANTHROPIC_API_KEY)\n"
|
||||
printf " --help Show this help\n"
|
||||
exit 0
|
||||
;;
|
||||
|
|
@ -211,12 +218,22 @@ run_single_agent() {
|
|||
# Run core logic in a subshell so we can kill it on timeout
|
||||
(
|
||||
local _inner_status="fail"
|
||||
if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then
|
||||
if verify_agent "${agent}" "${app_name}"; then
|
||||
if [ "${INTERACTIVE_MODE}" -eq 1 ]; then
|
||||
# AI-driven interactive mode: provision + verify in one step
|
||||
if interactive_provision "${agent}" "${app_name}" "${LOG_DIR}"; then
|
||||
if run_input_test "${agent}" "${app_name}"; then
|
||||
_inner_status="pass"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# Standard headless mode
|
||||
if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then
|
||||
if verify_agent "${agent}" "${app_name}"; then
|
||||
if run_input_test "${agent}" "${app_name}"; then
|
||||
_inner_status="pass"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
printf '%s' "${_inner_status}" > "${status_file}"
|
||||
) &
|
||||
|
|
|
|||
372
sh/e2e/interactive-harness.ts
Normal file
372
sh/e2e/interactive-harness.ts
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
#!/usr/bin/env bun
|
||||
// sh/e2e/interactive-harness.ts — AI-driven interactive E2E test for spawn CLI
|
||||
//
|
||||
// Spawns spawn in a real PTY (via `script` command), feeds terminal output to
|
||||
// Claude Haiku, and types responses like a human user would.
|
||||
//
|
||||
// Usage: bun run sh/e2e/interactive-harness.ts <agent> <cloud>
|
||||
//
|
||||
// Required env:
|
||||
// ANTHROPIC_API_KEY — For the AI driver (Claude Haiku)
|
||||
// OPENROUTER_API_KEY — Injected into spawn for the agent
|
||||
// Cloud credentials — HCLOUD_TOKEN, DO_API_TOKEN, AWS_ACCESS_KEY_ID, etc.
|
||||
//
|
||||
// Outputs JSON to stdout: { success: boolean, duration: number, transcript: string }
|
||||
|
||||
const IDLE_MS = 2000; // Wait 2s of silence before asking AI
|
||||
const SESSION_TIMEOUT_MS = 10 * 60 * 1000; // 10 minute overall timeout
|
||||
const AI_MODEL = "claude-haiku-4-5-20251001";
|
||||
|
||||
// ─── Args & validation ──────────────────────────────────────────────────
|
||||
|
||||
const [agent, cloud] = process.argv.slice(2);
|
||||
if (!agent || !cloud) {
|
||||
process.stderr.write("Usage: bun run interactive-harness.ts <agent> <cloud>\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY ?? "";
|
||||
if (!apiKey) {
|
||||
process.stderr.write("ANTHROPIC_API_KEY is required for the AI driver\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!process.env.OPENROUTER_API_KEY) {
|
||||
process.stderr.write("OPENROUTER_API_KEY is required for the spawned agent\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ─── Credential map (only include what's set) ───────────────────────────
|
||||
|
||||
function buildCredentialHints(): string {
|
||||
const creds: string[] = [];
|
||||
|
||||
const orKey = process.env.OPENROUTER_API_KEY ?? "";
|
||||
if (orKey) creds.push(`OpenRouter API key: ${orKey}`);
|
||||
|
||||
const hetzner = process.env.HCLOUD_TOKEN ?? "";
|
||||
if (hetzner) creds.push(`Hetzner token: ${hetzner}`);
|
||||
|
||||
const doToken = process.env.DO_API_TOKEN ?? "";
|
||||
if (doToken) creds.push(`DigitalOcean token: ${doToken}`);
|
||||
|
||||
const awsKey = process.env.AWS_ACCESS_KEY_ID ?? "";
|
||||
const awsSecret = process.env.AWS_SECRET_ACCESS_KEY ?? "";
|
||||
if (awsKey) creds.push(`AWS Access Key ID: ${awsKey}`);
|
||||
if (awsSecret) creds.push(`AWS Secret Access Key: ${awsSecret}`);
|
||||
|
||||
const gcpProject = process.env.GCP_PROJECT ?? "";
|
||||
if (gcpProject) creds.push(`GCP Project ID: ${gcpProject}`);
|
||||
|
||||
return creds.join("\n");
|
||||
}
|
||||
|
||||
// ─── ANSI stripping ─────────────────────────────────────────────────────
|
||||
|
||||
function stripAnsi(text: string): string {
|
||||
return text
|
||||
.replace(/\x1B\[[0-9;]*[A-Za-z]/g, "") // CSI sequences
|
||||
.replace(/\x1B\][^\x07]*\x07/g, "") // OSC sequences
|
||||
.replace(/\x1B\[\?[0-9;]*[hl]/g, "") // DEC private mode
|
||||
.replace(/\x1B[()][A-Z0-9]/g, "") // Character set
|
||||
.replace(/\r/g, "");
|
||||
}
|
||||
|
||||
// ─── Credential redaction for logs ──────────────────────────────────────
|
||||
|
||||
function redactSecrets(text: string): string {
|
||||
let result = text;
|
||||
const secrets = [
|
||||
process.env.OPENROUTER_API_KEY,
|
||||
process.env.HCLOUD_TOKEN,
|
||||
process.env.DO_API_TOKEN,
|
||||
process.env.AWS_ACCESS_KEY_ID,
|
||||
process.env.AWS_SECRET_ACCESS_KEY,
|
||||
process.env.ANTHROPIC_API_KEY,
|
||||
];
|
||||
for (const s of secrets) {
|
||||
if (s && s.length > 8) {
|
||||
result = result.replaceAll(s, "[REDACTED]");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── Claude API ─────────────────────────────────────────────────────────
|
||||
|
||||
interface Message {
|
||||
role: "user" | "assistant";
|
||||
content: string;
|
||||
}
|
||||
|
||||
async function askClaude(
|
||||
systemPrompt: string,
|
||||
messages: Message[],
|
||||
): Promise<string> {
|
||||
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": apiKey,
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: AI_MODEL,
|
||||
max_tokens: 256,
|
||||
system: systemPrompt,
|
||||
messages,
|
||||
}),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const body = await resp.text();
|
||||
throw new Error(`Claude API ${resp.status}: ${body.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await resp.json();
|
||||
// data.content is an array of content blocks
|
||||
const blocks = Array.isArray(data?.content) ? data.content : [];
|
||||
const textBlock = blocks.find(
|
||||
(b: Record<string, unknown>) => b.type === "text",
|
||||
);
|
||||
return typeof textBlock?.text === "string" ? textBlock.text.trim() : "";
|
||||
}
|
||||
|
||||
// ─── Input parsing ──────────────────────────────────────────────────────
|
||||
|
||||
function parseInput(response: string): Uint8Array | null {
|
||||
const trimmed = response.trim();
|
||||
|
||||
if (trimmed === "<wait>") return null;
|
||||
if (trimmed === "<done>") return null;
|
||||
if (trimmed === "<ctrl-c>") return new Uint8Array([3]); // ETX
|
||||
if (trimmed === "<enter>") return new Uint8Array([10]); // LF
|
||||
if (trimmed === "<up>") return new TextEncoder().encode("\x1B[A");
|
||||
if (trimmed === "<down>") return new TextEncoder().encode("\x1B[B");
|
||||
|
||||
// Plain text → type it + Enter
|
||||
return new TextEncoder().encode(trimmed + "\n");
|
||||
}
|
||||
|
||||
// ─── System prompt ──────────────────────────────────────────────────────
|
||||
|
||||
function buildSystemPrompt(): string {
|
||||
return `You are an automated QA tester driving the "spawn" CLI through a terminal.
|
||||
Your job is to respond to prompts exactly like a human user would.
|
||||
|
||||
CREDENTIALS (paste these EXACTLY when asked):
|
||||
${buildCredentialHints()}
|
||||
|
||||
RULES:
|
||||
1. When asked for a token/key/credential, paste the EXACT value from above
|
||||
2. When asked to confirm (Y/n), respond with "y"
|
||||
3. When asked for a name with a default shown in [brackets], press Enter to accept
|
||||
4. When shown a selection menu (with arrows/highlights), press Enter to accept the default
|
||||
5. If you see "Try again? (Y/n)" or similar retry prompts, respond with "y"
|
||||
6. When you see "is ready" or "Starting agent", respond with <done>
|
||||
7. If something is clearly broken and unrecoverable, respond with <fail:reason>
|
||||
8. If the terminal is still loading/processing, respond with <wait>
|
||||
|
||||
RESPONSE FORMAT — reply with ONLY one of these:
|
||||
- The exact text to type (will be followed by Enter automatically)
|
||||
- <enter> — press Enter (accept default)
|
||||
- <up> — arrow up
|
||||
- <down> — arrow down
|
||||
- <ctrl-c> — send Ctrl+C
|
||||
- <wait> — do nothing, wait for more output
|
||||
- <done> — test succeeded (agent is ready)
|
||||
- <fail:reason> — test failed (describe why)
|
||||
|
||||
IMPORTANT: Reply with ONLY the action. No explanation, no markdown, no quotes.`;
|
||||
}
|
||||
|
||||
// ─── PTY via script command ─────────────────────────────────────────────
|
||||
|
||||
function spawnPty(command: string): typeof Bun.spawn.prototype {
|
||||
const env = {
|
||||
...process.env,
|
||||
TERM: "xterm-256color",
|
||||
COLUMNS: "120",
|
||||
LINES: "40",
|
||||
};
|
||||
|
||||
// macOS: script -q /dev/null bash -c "command"
|
||||
// Linux: script -qc "command" /dev/null
|
||||
const args =
|
||||
process.platform === "darwin"
|
||||
? ["-q", "/dev/null", "bash", "-c", command]
|
||||
: ["-qc", command, "/dev/null"];
|
||||
|
||||
return Bun.spawn(["script", ...args], {
|
||||
stdin: "pipe",
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
env,
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Main ───────────────────────────────────────────────────────────────
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const systemPrompt = buildSystemPrompt();
|
||||
const messages: Message[] = [];
|
||||
let transcript = "";
|
||||
let success = false;
|
||||
let failReason = "";
|
||||
|
||||
// Resolve CLI entry point
|
||||
const repoRoot =
|
||||
process.env.SPAWN_CLI_DIR ??
|
||||
new URL("../../", import.meta.url).pathname.replace(/\/$/, "");
|
||||
const cliEntry = `${repoRoot}/packages/cli/src/index.ts`;
|
||||
const command = `bun run ${cliEntry} ${agent} ${cloud}`;
|
||||
|
||||
process.stderr.write(
|
||||
`[harness] Starting: spawn ${agent} ${cloud}\n`,
|
||||
);
|
||||
process.stderr.write(`[harness] Timeout: ${SESSION_TIMEOUT_MS / 1000}s\n`);
|
||||
|
||||
const proc = spawnPty(command);
|
||||
let buffer = "";
|
||||
let lastDataTime = Date.now();
|
||||
let sessionDone = false;
|
||||
|
||||
// Reader loop — accumulates PTY output
|
||||
const readerDone = (async () => {
|
||||
const reader = proc.stdout.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
for (;;) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
sessionDone = true;
|
||||
break;
|
||||
}
|
||||
const text = decoder.decode(value, { stream: true });
|
||||
buffer += text;
|
||||
transcript += text;
|
||||
lastDataTime = Date.now();
|
||||
// Echo to stderr (redacted) so CI logs show progress
|
||||
process.stderr.write(redactSecrets(text));
|
||||
}
|
||||
})();
|
||||
|
||||
// AI driver loop
|
||||
let turnCount = 0;
|
||||
const maxTurns = 50; // Safety limit
|
||||
|
||||
while (!sessionDone && turnCount < maxTurns) {
|
||||
// Wait for output to settle
|
||||
await Bun.sleep(500);
|
||||
|
||||
// Check overall timeout
|
||||
if (Date.now() - startTime > SESSION_TIMEOUT_MS) {
|
||||
failReason = "Session timeout";
|
||||
break;
|
||||
}
|
||||
|
||||
// Wait until output has been idle for IDLE_MS
|
||||
if (Date.now() - lastDataTime < IDLE_MS) continue;
|
||||
if (buffer.length === 0) continue;
|
||||
|
||||
const stripped = stripAnsi(buffer);
|
||||
|
||||
// Check for success markers in output
|
||||
if (/is ready|Starting agent|setup completed successfully/i.test(stripped)) {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Ask Claude what to type
|
||||
turnCount++;
|
||||
process.stderr.write(
|
||||
`\n[harness] Turn ${turnCount}: asking AI (${stripped.length} chars of output)\n`,
|
||||
);
|
||||
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: `Terminal output:\n${stripped}`,
|
||||
});
|
||||
|
||||
let response: string;
|
||||
const aiResult = await askClaude(systemPrompt, messages).catch(
|
||||
(err: Error) => {
|
||||
process.stderr.write(`[harness] AI error: ${err.message}\n`);
|
||||
return "<wait>";
|
||||
},
|
||||
);
|
||||
response = aiResult;
|
||||
|
||||
messages.push({ role: "assistant", content: response });
|
||||
process.stderr.write(
|
||||
`[harness] AI response: ${redactSecrets(response)}\n`,
|
||||
);
|
||||
|
||||
// Clear buffer for next round
|
||||
buffer = "";
|
||||
|
||||
// Handle AI response
|
||||
if (response === "<done>") {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
if (response.startsWith("<fail:")) {
|
||||
failReason = response.slice(6, -1) || "AI reported failure";
|
||||
break;
|
||||
}
|
||||
if (response === "<wait>") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const input = parseInput(response);
|
||||
if (input) {
|
||||
proc.stdin.write(input);
|
||||
proc.stdin.flush();
|
||||
}
|
||||
}
|
||||
|
||||
if (turnCount >= maxTurns) {
|
||||
failReason = "Exceeded max turns";
|
||||
}
|
||||
|
||||
// Clean exit: send Ctrl+C then wait briefly
|
||||
proc.stdin.write(new Uint8Array([3]));
|
||||
proc.stdin.flush();
|
||||
await Bun.sleep(2000);
|
||||
proc.kill();
|
||||
await readerDone.catch(() => {});
|
||||
|
||||
const duration = Math.round((Date.now() - startTime) / 1000);
|
||||
|
||||
// Output result as JSON to stdout
|
||||
const result = {
|
||||
success,
|
||||
duration,
|
||||
turns: turnCount,
|
||||
failReason: failReason || undefined,
|
||||
transcript: redactSecrets(stripAnsi(transcript)).slice(-5000), // Last 5KB
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(result) + "\n");
|
||||
|
||||
if (success) {
|
||||
process.stderr.write(
|
||||
`\n[harness] SUCCESS in ${duration}s (${turnCount} turns)\n`,
|
||||
);
|
||||
} else {
|
||||
process.stderr.write(
|
||||
`\n[harness] FAILED in ${duration}s: ${failReason || "unknown"}\n`,
|
||||
);
|
||||
}
|
||||
|
||||
process.exit(success ? 0 : 1);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
process.stderr.write(`[harness] Fatal: ${err}\n`);
|
||||
process.stdout.write(
|
||||
JSON.stringify({ success: false, duration: 0, turns: 0, failReason: String(err) }) + "\n",
|
||||
);
|
||||
process.exit(1);
|
||||
});
|
||||
113
sh/e2e/lib/interactive.sh
Normal file
113
sh/e2e/lib/interactive.sh
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
#!/bin/bash
|
||||
# e2e/lib/interactive.sh — AI-driven interactive provision & verification
|
||||
#
|
||||
# Instead of running spawn in headless mode (SPAWN_NON_INTERACTIVE=1), this
|
||||
# runs spawn interactively with an AI agent (Claude Haiku) responding to
|
||||
# prompts like a human user would. Tests the real user experience end-to-end.
|
||||
#
|
||||
# Requires: ANTHROPIC_API_KEY (for the AI driver), plus normal cloud creds.
|
||||
set -eo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# interactive_provision AGENT APP_NAME LOG_DIR
|
||||
#
|
||||
# Runs spawn interactively with AI driving the prompts. On success, the
|
||||
# instance is provisioned AND the agent is installed — equivalent to
|
||||
# provision_agent + verify_agent in the headless flow.
|
||||
#
|
||||
# Returns 0 on success, 1 on failure.
|
||||
# ---------------------------------------------------------------------------
|
||||
interactive_provision() {
|
||||
local agent="$1"
|
||||
local app_name="$2"
|
||||
local log_dir="$3"
|
||||
|
||||
# Validate app_name (same rules as provision.sh)
|
||||
if [ -z "${app_name}" ] || ! printf '%s' "${app_name}" | grep -qE '^[A-Za-z0-9._-]+$'; then
|
||||
log_err "Invalid app_name: must be non-empty and contain only [A-Za-z0-9._-]"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Require AI driver key
|
||||
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
log_err "ANTHROPIC_API_KEY required for interactive mode"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Resolve harness script
|
||||
local harness_script
|
||||
harness_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/interactive-harness.ts"
|
||||
if [ ! -f "${harness_script}" ]; then
|
||||
log_err "Interactive harness not found: ${harness_script}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local result_file="${log_dir}/${app_name}-interactive.json"
|
||||
local log_file="${log_dir}/${app_name}-interactive.log"
|
||||
|
||||
log_step "Interactive provision: ${agent} on ${ACTIVE_CLOUD}"
|
||||
log_info "AI driver: Claude Haiku via Anthropic API"
|
||||
|
||||
# Build cloud-specific env for the spawn CLI invocation.
|
||||
# The harness inherits the current env, which already has cloud creds
|
||||
# loaded by the cloud driver. We just need to set spawn-specific vars.
|
||||
local spawn_env=""
|
||||
spawn_env="${spawn_env} SPAWN_NAME_KEBAB=${app_name}"
|
||||
|
||||
# Map ACTIVE_CLOUD to the cloud name spawn expects
|
||||
local spawn_cloud="${ACTIVE_CLOUD}"
|
||||
|
||||
local harness_start
|
||||
harness_start=$(date +%s)
|
||||
|
||||
# Run the harness — it outputs JSON to stdout, logs to stderr
|
||||
local harness_exit=0
|
||||
env ${spawn_env} bun run "${harness_script}" "${agent}" "${spawn_cloud}" \
|
||||
> "${result_file}" 2> "${log_file}" || harness_exit=$?
|
||||
|
||||
local harness_end
|
||||
harness_end=$(date +%s)
|
||||
local harness_duration=$((harness_end - harness_start))
|
||||
|
||||
# Parse result
|
||||
if [ -f "${result_file}" ] && [ -s "${result_file}" ]; then
|
||||
local harness_success
|
||||
harness_success=$(jq -r '.success // false' "${result_file}" 2>/dev/null || printf 'false')
|
||||
local harness_turns
|
||||
harness_turns=$(jq -r '.turns // 0' "${result_file}" 2>/dev/null || printf '0')
|
||||
local harness_reason
|
||||
harness_reason=$(jq -r '.failReason // ""' "${result_file}" 2>/dev/null || printf '')
|
||||
|
||||
if [ "${harness_success}" = "true" ]; then
|
||||
log_ok "Interactive provision succeeded (${harness_duration}s, ${harness_turns} AI turns)"
|
||||
|
||||
# Now verify the instance exists via cloud driver so teardown works
|
||||
if cloud_provision_verify "${app_name}" "${log_dir}"; then
|
||||
log_ok "Cloud driver confirmed instance exists"
|
||||
return 0
|
||||
else
|
||||
log_warn "Instance not found via cloud driver — spawn may have used a different name"
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
log_err "Interactive provision failed (${harness_duration}s): ${harness_reason}"
|
||||
# Dump last 50 lines of harness log for debugging
|
||||
if [ -f "${log_file}" ]; then
|
||||
log_info "Last 50 lines of harness log:"
|
||||
tail -50 "${log_file}" | while IFS= read -r line; do
|
||||
printf ' %s\n' "${line}"
|
||||
done
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
log_err "Interactive harness produced no output (exit code: ${harness_exit})"
|
||||
if [ -f "${log_file}" ]; then
|
||||
log_info "Harness stderr:"
|
||||
tail -20 "${log_file}" | while IFS= read -r line; do
|
||||
printf ' %s\n' "${line}"
|
||||
done
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue