fix(e2e): fix input test prompt delivery and agent flags (#2536)

Three root-cause bugs in input test functions: 1. Stdin pass-through broken: cloud_exec uses "printf '...' | base64 -d | bash" on the remote, meaning bash reads the script from its own stdin — not the outer process's stdin. "PROMPT=$(base64 -d)" inside the script was reading from the already-consumed pipe, always producing an empty prompt. Fix: embed the base64-encoded prompt directly in the remote command string. Base64 output is [A-Za-z0-9+/=] only — safe to embed in single-quoted strings. 2. Zeroclaw flag wrong: "zeroclaw agent -p" was passing the prompt as --provider (not --prompt). The correct flag for non-interactive single-message mode is "-m"/"--message". 3. Codex model stale: "openai/gpt-5-codex" does not exist on OpenRouter. Updated to "openai/gpt-5.1-codex" which is available. Co-authored-by: spawn-qa-bot <qa@openrouter.ai> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 09:31:15 +00:00 · 2026-03-12 10:50:06 -07:00 · 2026-03-12 10:50:06 -07:00 · 91b66f4b40
commit 91b66f4b40
parent dfd08ad48c
2 changed files with 24 additions and 13 deletions
--- a/packages/cli/src/shared/agent-setup.ts
+++ b/packages/cli/src/shared/agent-setup.ts
@ -275,7 +275,7 @@ export async function offerGithubAuth(runner: CloudRunner): Promise<void> {

 async function setupCodexConfig(runner: CloudRunner, _apiKey: string): Promise<void> {
  logStep("Configuring Codex CLI for OpenRouter...");
-  const config = `model = "openai/gpt-5-codex"
+  const config = `model = "openai/gpt-5.1-codex"
 model_provider = "openrouter"

 [model_providers.openrouter]
--- a/sh/e2e/lib/verify.sh
+++ b/sh/e2e/lib/verify.sh
@ -24,16 +24,22 @@ input_test_claude() {
  local app="$1"

  log_step "Running input test for claude..."
-  # Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
-  # -w 0 is GNU coreutils (Linux); falls back to plain base64 (macOS/BSD).
+  # Base64-encode the prompt and embed it directly in the remote command.
+  # Base64 output is [A-Za-z0-9+/=] only — safe to embed in single quotes.
+  # We cannot pipe the prompt via stdin because cloud_exec uses
+  # "printf '...' | base64 -d | bash", which means bash's stdin is the
+  # decoded script — not the outer process stdin. Embedding the prompt
+  # in the command avoids this stdin pass-through limitation.
  local encoded_prompt
  encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')

  local output
-  output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
+  # claude -p (--print) reads the prompt from stdin.
+  output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
    export PATH=\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
    rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
-    PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} claude -p \"\$PROMPT\"" 2>&1) || true
+    PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
+    printf '%s' \"\$PROMPT\" | timeout ${INPUT_TEST_TIMEOUT} claude -p" 2>&1) || true

  if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
    log_ok "claude input test — marker found in response"
@ -50,15 +56,16 @@ input_test_codex() {
  local app="$1"

  log_step "Running input test for codex..."
-  # Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
+  # Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
  local encoded_prompt
  encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')

  local output
-  output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
+  output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
    export PATH=\$HOME/.npm-global/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
    rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
-    PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} codex exec \"\$PROMPT\"" 2>&1) || true
+    PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
+    timeout ${INPUT_TEST_TIMEOUT} codex exec --full-auto \"\$PROMPT\"" 2>&1) || true

  if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
    log_ok "codex input test — marker found in response"
@ -142,10 +149,12 @@ input_test_openclaw() {
    fi

    local output
-    output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
+    # Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
+    output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
      export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
      rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
-      PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true
+      PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
+      timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true

    if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
      log_ok "openclaw input test — marker found in response"
@ -170,14 +179,16 @@ input_test_zeroclaw() {
  local app="$1"

  log_step "Running input test for zeroclaw..."
-  # Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
+  # Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
+  # Use -m/--message for non-interactive single-message mode (not -p which is --provider).
  local encoded_prompt
  encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')

  local output
-  output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
+  output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
    rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
-    PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -p \"\$PROMPT\"" 2>&1) || true
+    PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
+    timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -m \"\$PROMPT\"" 2>&1) || true

  if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
    log_ok "zeroclaw input test — marker found in response"