ci: enable agentic provider live tests (claude-code, codex, gemini-cli) (#7088)
Some checks failed
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Canary / Prepare Version (push) Waiting to run
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check OpenAPI Schema is Up-to-Date (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Deploy Documentation / deploy (push) Has been cancelled
Publish Ask AI Bot Docker Image / docker (push) Has been cancelled

Signed-off-by: Adrian Cole <adrian@tetrate.io>
This commit is contained in:
Adrian Cole 2026-02-10 11:01:28 +08:00 committed by GitHub
parent 4572d42dfe
commit 3a304c6af3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 4 deletions

View file

@ -95,11 +95,21 @@ jobs:
- name: Make Binary Executable
run: chmod +x target/debug/goose
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
- name: Install agentic providers
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli
- name: Run Smoke Tests with Provider Script
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
@ -171,11 +181,21 @@ jobs:
- name: Make Binary Executable
run: chmod +x target/debug/goose
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
- name: Install agentic providers
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli
- name: Run Provider Tests (Code Execution Mode)
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

View file

@ -25,6 +25,10 @@ ALLOWED_FAILURES=(
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
)
# Agentic providers handle tools internally and return text results.
# They can't produce the normal tool-call log patterns (e.g. "shell | developer").
AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent")
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
@ -40,6 +44,13 @@ fi
SCRIPT_DIR=$(pwd)
# Create a test file with known content in the current directory
# This cannot be /tmp as some agents cannot work outside the PWD
mkdir -p target
TEST_CONTENT="test-content-abc123"
TEST_FILE="./target/test-content.txt"
echo "$TEST_CONTENT" > "$TEST_FILE"
# Format: "provider -> model1|model2|model3"
# Base providers that are always tested (with appropriate env vars)
PROVIDERS=(
@ -224,6 +235,16 @@ should_skip_provider() {
return 1
}
is_agentic_provider() {
local provider="$1"
for agentic in "${AGENTIC_PROVIDERS[@]}"; do
if [ "$agentic" = "$provider" ]; then
return 0
fi
done
return 1
}
# Create temp directory for results
RESULTS_DIR=$(mktemp -d)
trap "rm -rf $RESULTS_DIR" EXIT
@ -241,17 +262,34 @@ run_test() {
local output_file="$4"
local testdir=$(mktemp -d)
echo "hello" > "$testdir/hello.txt"
# Agentic providers use a file-read prompt with known content marker;
# regular providers use the shell prompt that produces tool-call logs.
local prompt
if is_agentic_provider "$provider"; then
cp "$TEST_FILE" "$testdir/test-content.txt"
prompt="read ./test-content.txt and output its contents exactly"
else
echo "hello" > "$testdir/hello.txt"
prompt="Immediately use the shell tool to run 'ls'. Do not ask for confirmation."
fi
# Run the test and capture output
(
export GOOSE_PROVIDER="$provider"
export GOOSE_MODEL="$model"
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1
) > "$output_file" 2>&1
# Check result
if grep -qE "$SUCCESS_PATTERN" "$output_file"; then
# Check result: agentic providers return text containing the test content
# instead of producing tool-call log patterns
if is_agentic_provider "$provider"; then
if grep -qi "$TEST_CONTENT" "$output_file"; then
echo "success" > "$result_file"
else
echo "failure" > "$result_file"
fi
elif grep -qE "$SUCCESS_PATTERN" "$output_file"; then
echo "success" > "$result_file"
else
echo "failure" > "$result_file"
@ -273,6 +311,12 @@ for provider_config in "${PROVIDERS[@]}"; do
continue
fi
# Agentic providers don't use goose's code_execution system
if [ "$CODE_EXEC_MODE" = true ] && is_agentic_provider "$PROVIDER"; then
echo "⊘ Skipping agentic provider in code_exec mode: ${PROVIDER}"
continue
fi
IFS='|' read -ra MODELS <<< "$MODELS_STR"
for MODEL in "${MODELS[@]}"; do
JOBS+=("$PROVIDER|$MODEL|$job_index")