mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-28 03:49:31 +00:00
fix(e2e): fix input test prompt delivery and agent flags (#2536)
Three root-cause bugs in input test functions: 1. Stdin pass-through broken: cloud_exec uses "printf '...' | base64 -d | bash" on the remote, meaning bash reads the script from its own stdin — not the outer process's stdin. "PROMPT=$(base64 -d)" inside the script was reading from the already-consumed pipe, always producing an empty prompt. Fix: embed the base64-encoded prompt directly in the remote command string. Base64 output is [A-Za-z0-9+/=] only — safe to embed in single-quoted strings. 2. Zeroclaw flag wrong: "zeroclaw agent -p" was passing the prompt as --provider (not --prompt). The correct flag for non-interactive single-message mode is "-m"/"--message". 3. Codex model stale: "openai/gpt-5-codex" does not exist on OpenRouter. Updated to "openai/gpt-5.1-codex" which is available. Co-authored-by: spawn-qa-bot <qa@openrouter.ai> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
dfd08ad48c
commit
91b66f4b40
2 changed files with 24 additions and 13 deletions
|
|
@ -275,7 +275,7 @@ export async function offerGithubAuth(runner: CloudRunner): Promise<void> {
|
|||
|
||||
async function setupCodexConfig(runner: CloudRunner, _apiKey: string): Promise<void> {
|
||||
logStep("Configuring Codex CLI for OpenRouter...");
|
||||
const config = `model = "openai/gpt-5-codex"
|
||||
const config = `model = "openai/gpt-5.1-codex"
|
||||
model_provider = "openrouter"
|
||||
|
||||
[model_providers.openrouter]
|
||||
|
|
|
|||
|
|
@ -24,16 +24,22 @@ input_test_claude() {
|
|||
local app="$1"
|
||||
|
||||
log_step "Running input test for claude..."
|
||||
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
|
||||
# -w 0 is GNU coreutils (Linux); falls back to plain base64 (macOS/BSD).
|
||||
# Base64-encode the prompt and embed it directly in the remote command.
|
||||
# Base64 output is [A-Za-z0-9+/=] only — safe to embed in single quotes.
|
||||
# We cannot pipe the prompt via stdin because cloud_exec uses
|
||||
# "printf '...' | base64 -d | bash", which means bash's stdin is the
|
||||
# decoded script — not the outer process stdin. Embedding the prompt
|
||||
# in the command avoids this stdin pass-through limitation.
|
||||
local encoded_prompt
|
||||
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
|
||||
|
||||
local output
|
||||
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
# claude -p (--print) reads the prompt from stdin.
|
||||
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
export PATH=\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
|
||||
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
|
||||
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} claude -p \"\$PROMPT\"" 2>&1) || true
|
||||
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
|
||||
printf '%s' \"\$PROMPT\" | timeout ${INPUT_TEST_TIMEOUT} claude -p" 2>&1) || true
|
||||
|
||||
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
|
||||
log_ok "claude input test — marker found in response"
|
||||
|
|
@ -50,15 +56,16 @@ input_test_codex() {
|
|||
local app="$1"
|
||||
|
||||
log_step "Running input test for codex..."
|
||||
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
|
||||
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
|
||||
local encoded_prompt
|
||||
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
|
||||
|
||||
local output
|
||||
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
export PATH=\$HOME/.npm-global/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH; \
|
||||
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
|
||||
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} codex exec \"\$PROMPT\"" 2>&1) || true
|
||||
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
|
||||
timeout ${INPUT_TEST_TIMEOUT} codex exec --full-auto \"\$PROMPT\"" 2>&1) || true
|
||||
|
||||
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
|
||||
log_ok "codex input test — marker found in response"
|
||||
|
|
@ -142,10 +149,12 @@ input_test_openclaw() {
|
|||
fi
|
||||
|
||||
local output
|
||||
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
|
||||
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
|
||||
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
|
||||
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
|
||||
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true
|
||||
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
|
||||
timeout ${INPUT_TEST_TIMEOUT} openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60" 2>&1) || true
|
||||
|
||||
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
|
||||
log_ok "openclaw input test — marker found in response"
|
||||
|
|
@ -170,14 +179,16 @@ input_test_zeroclaw() {
|
|||
local app="$1"
|
||||
|
||||
log_step "Running input test for zeroclaw..."
|
||||
# Base64-encode prompt, then pipe via stdin to avoid interpolating into the command string.
|
||||
# Embed the prompt in the command (see input_test_claude comment for why stdin won't work).
|
||||
# Use -m/--message for non-interactive single-message mode (not -p which is --provider).
|
||||
local encoded_prompt
|
||||
encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64 | tr -d '\n')
|
||||
|
||||
local output
|
||||
output=$(printf '%s' "${encoded_prompt}" | cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
|
||||
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.cargo/env 2>/dev/null; \
|
||||
rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
|
||||
PROMPT=\$(base64 -d); timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -p \"\$PROMPT\"" 2>&1) || true
|
||||
PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); \
|
||||
timeout ${INPUT_TEST_TIMEOUT} zeroclaw agent -m \"\$PROMPT\"" 2>&1) || true
|
||||
|
||||
if printf '%s' "${output}" | grep -qx "${INPUT_TEST_MARKER}"; then
|
||||
log_ok "zeroclaw input test — marker found in response"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue