mirror of
https://github.com/block/goose.git
synced 2026-04-28 03:29:36 +00:00
ci: enable agentic provider live tests (claude-code, codex, gemini-cli) (#7088)
Some checks failed
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Canary / Prepare Version (push) Waiting to run
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check OpenAPI Schema is Up-to-Date (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Deploy Documentation / deploy (push) Has been cancelled
Publish Ask AI Bot Docker Image / docker (push) Has been cancelled
Some checks failed
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Canary / Prepare Version (push) Waiting to run
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check OpenAPI Schema is Up-to-Date (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Deploy Documentation / deploy (push) Has been cancelled
Publish Ask AI Bot Docker Image / docker (push) Has been cancelled
Signed-off-by: Adrian Cole <adrian@tetrate.io>
This commit is contained in:
parent
4572d42dfe
commit
3a304c6af3
2 changed files with 68 additions and 4 deletions
20
.github/workflows/pr-smoke-test.yml
vendored
20
.github/workflows/pr-smoke-test.yml
vendored
|
|
@ -95,11 +95,21 @@ jobs:
|
|||
- name: Make Binary Executable
|
||||
run: chmod +x target/debug/goose
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Install agentic providers
|
||||
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli
|
||||
|
||||
- name: Run Smoke Tests with Provider Script
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
|
||||
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
|
|
@ -171,11 +181,21 @@ jobs:
|
|||
- name: Make Binary Executable
|
||||
run: chmod +x target/debug/goose
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Install agentic providers
|
||||
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli
|
||||
|
||||
- name: Run Provider Tests (Code Execution Mode)
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
|
||||
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ ALLOWED_FAILURES=(
|
|||
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
|
||||
)
|
||||
|
||||
# Agentic providers handle tools internally and return text results.
|
||||
# They can't produce the normal tool-call log patterns (e.g. "shell | developer").
|
||||
AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent")
|
||||
|
||||
if [ -f .env ]; then
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
fi
|
||||
|
|
@ -40,6 +44,13 @@ fi
|
|||
|
||||
SCRIPT_DIR=$(pwd)
|
||||
|
||||
# Create a test file with known content in the current directory
|
||||
# This cannot be /tmp as some agents cannot work outside the PWD
|
||||
mkdir -p target
|
||||
TEST_CONTENT="test-content-abc123"
|
||||
TEST_FILE="./target/test-content.txt"
|
||||
echo "$TEST_CONTENT" > "$TEST_FILE"
|
||||
|
||||
# Format: "provider -> model1|model2|model3"
|
||||
# Base providers that are always tested (with appropriate env vars)
|
||||
PROVIDERS=(
|
||||
|
|
@ -224,6 +235,16 @@ should_skip_provider() {
|
|||
return 1
|
||||
}
|
||||
|
||||
is_agentic_provider() {
|
||||
local provider="$1"
|
||||
for agentic in "${AGENTIC_PROVIDERS[@]}"; do
|
||||
if [ "$agentic" = "$provider" ]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Create temp directory for results
|
||||
RESULTS_DIR=$(mktemp -d)
|
||||
trap "rm -rf $RESULTS_DIR" EXIT
|
||||
|
|
@ -241,17 +262,34 @@ run_test() {
|
|||
local output_file="$4"
|
||||
|
||||
local testdir=$(mktemp -d)
|
||||
echo "hello" > "$testdir/hello.txt"
|
||||
|
||||
# Agentic providers use a file-read prompt with known content marker;
|
||||
# regular providers use the shell prompt that produces tool-call logs.
|
||||
local prompt
|
||||
if is_agentic_provider "$provider"; then
|
||||
cp "$TEST_FILE" "$testdir/test-content.txt"
|
||||
prompt="read ./test-content.txt and output its contents exactly"
|
||||
else
|
||||
echo "hello" > "$testdir/hello.txt"
|
||||
prompt="Immediately use the shell tool to run 'ls'. Do not ask for confirmation."
|
||||
fi
|
||||
|
||||
# Run the test and capture output
|
||||
(
|
||||
export GOOSE_PROVIDER="$provider"
|
||||
export GOOSE_MODEL="$model"
|
||||
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1
|
||||
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1
|
||||
) > "$output_file" 2>&1
|
||||
|
||||
# Check result
|
||||
if grep -qE "$SUCCESS_PATTERN" "$output_file"; then
|
||||
# Check result: agentic providers return text containing the test content
|
||||
# instead of producing tool-call log patterns
|
||||
if is_agentic_provider "$provider"; then
|
||||
if grep -qi "$TEST_CONTENT" "$output_file"; then
|
||||
echo "success" > "$result_file"
|
||||
else
|
||||
echo "failure" > "$result_file"
|
||||
fi
|
||||
elif grep -qE "$SUCCESS_PATTERN" "$output_file"; then
|
||||
echo "success" > "$result_file"
|
||||
else
|
||||
echo "failure" > "$result_file"
|
||||
|
|
@ -273,6 +311,12 @@ for provider_config in "${PROVIDERS[@]}"; do
|
|||
continue
|
||||
fi
|
||||
|
||||
# Agentic providers don't use goose's code_execution system
|
||||
if [ "$CODE_EXEC_MODE" = true ] && is_agentic_provider "$PROVIDER"; then
|
||||
echo "⊘ Skipping agentic provider in code_exec mode: ${PROVIDER}"
|
||||
continue
|
||||
fi
|
||||
|
||||
IFS='|' read -ra MODELS <<< "$MODELS_STR"
|
||||
for MODEL in "${MODELS[@]}"; do
|
||||
JOBS+=("$PROVIDER|$MODEL|$job_index")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue