fix(e2e): fix input test prompt delivery and agent flags (#2536)

Three root-cause bugs in input test functions:

1. Stdin pass-through broken: cloud_exec uses "printf '...' | base64 -d | bash"
   on the remote, meaning bash reads the script from its own stdin — not the
   outer process's stdin. "PROMPT=$(base64 -d)" inside the script was reading
   from the already-consumed pipe, always producing an empty prompt.
   Fix: embed the base64-encoded prompt directly in the remote command string.
   Base64 output is [A-Za-z0-9+/=] only — safe to embed in single-quoted strings.

2. Zeroclaw flag wrong: "zeroclaw agent -p" was passing the prompt as
   --provider (not --prompt). The correct flag for non-interactive single-message
   mode is "-m"/"--message".

3. Codex model stale: "openai/gpt-5-codex" does not exist on OpenRouter.
   Updated to "openai/gpt-5.1-codex" which is available.

Co-authored-by: spawn-qa-bot <qa@openrouter.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
A 2026-03-12 10:50:06 -07:00 committed by GitHub
parent dfd08ad48c
commit 91b66f4b40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 24 additions and 13 deletions

View file

@ -275,7 +275,7 @@ export async function offerGithubAuth(runner: CloudRunner): Promise<void> {
async function setupCodexConfig(runner: CloudRunner, _apiKey: string): Promise<void> {
logStep("Configuring Codex CLI for OpenRouter...");
const config = `model = "openai/gpt-5-codex"
const config = `model = "openai/gpt-5.1-codex"
model_provider = "openrouter"
[model_providers.openrouter]

View file

@ -24,16 +24,22 @@ input_test_claude() {
local app="$1"
log_step "Running input test for claude..."
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
# -w 0 is GNU coreutils (Linux); falls back to plain base64 (macOS/BSD).
# Base64-encode the prompt and embed it directly in the remote command.
# Base64 output is [A-Za-z0-9+/=] only — safe to embed in single quotes.
# We cannot pipe the prompt via stdin because cloud_exec uses
# "printf '...' | base64 -d | bash", which means bash's stdin is the
# decoded script — not the outer process stdin. Embedding the prompt
# in the command avoids this stdin pass-through limitation.
local encoded_prompt
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
local output
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
# claude -p (--print) reads the prompt from stdin.
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} claude -p \"\$PROMPT\"" 2>&1) || true
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
printf '%s' \"\$PROMPT\" | timeout ${INPUT_TEST_TIMEOUT} claude -p" 2>&1) || true
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
log_ok "claude input test — marker found in response"
@ -50,15 +56,16 @@ input_test_codex() {
local app="$1"
log_step "Running input test for codex..."
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
local encoded_prompt
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
local output
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} codex exec \"\$PROMPT\"" 2>&1) || true
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
timeout ${INPUT_TEST_TIMEOUT} codex exec --full-auto \"\$PROMPT\"" 2>&1) || true
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
log_ok "codex input test — marker found in response"
@ -142,10 +149,12 @@ input_test_openclaw() {
fi
local output
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
log_ok "openclaw input test — marker found in response"
@ -170,14 +179,16 @@ input_test_zeroclaw() {
local app="$1"
log_step "Running input test for zeroclaw..."
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
# Use -m/--message for non-interactive single-message mode (not -p which is --provider).
local encoded_prompt
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
local output
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -p \"\$PROMPT\"" 2>&1) || true
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -m \"\$PROMPT\"" 2>&1) || true
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
log_ok "zeroclaw input test — marker found in response"