chore: cover code mode with end to end provider tests (#6183)

This commit is contained in:
Michael Neale 2025-12-19 15:02:06 +11:00 committed by GitHub
parent 879fdf7a82
commit d4814042e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 72 additions and 4 deletions

View file

@ -161,3 +161,40 @@ jobs:
SKIP_BUILD: 1
run: |
bash scripts/test_compaction.sh
smoke-tests-code-exec:
name: Smoke Tests (Code Execution)
runs-on: ubuntu-latest
needs: build-binary
steps:
- name: Checkout Code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Download Binary
uses: actions/download-artifact@v4
with:
name: goose-binary
path: target/release
- name: Make Binary Executable
run: chmod +x target/release/goose
- name: Run Provider Tests (Code Execution Mode)
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }}
HOME: /tmp/goose-home
GOOSE_DISABLE_KEYRING: 1
SKIP_BUILD: 1
run: |
mkdir -p $HOME/.local/share/goose/sessions
mkdir -p $HOME/.config/goose
bash scripts/test_providers.sh --code-exec

View file

@ -1,4 +1,18 @@
#!/bin/bash
# Test providers with optional code_execution mode
# Usage:
# ./test_providers.sh # Normal mode (direct tool calls)
# ./test_providers.sh --code-exec # Code execution mode (JS batching)
CODE_EXEC_MODE=false
for arg in "$@"; do
case $arg in
--code-exec)
CODE_EXEC_MODE=true
;;
esac
done
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
@ -37,6 +51,23 @@ else
PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
fi
# Configure mode-specific settings
if [ "$CODE_EXEC_MODE" = true ]; then
echo "Mode: code_execution (JS batching)"
BUILTINS="developer,code_execution"
# Match "execute_code | code_execution" or "read_module | code_execution" in output
SUCCESS_PATTERN="(execute_code \| code_execution)|(read_module \| code_execution)"
SUCCESS_MSG="code_execution tool called"
FAILURE_MSG="no code_execution tools called"
else
echo "Mode: normal (direct tool calls)"
BUILTINS="developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager"
SUCCESS_PATTERN="shell \| developer"
SUCCESS_MSG="developer tool called"
FAILURE_MSG="no developer tools called"
fi
echo ""
RESULTS=()
for provider_config in "${PROVIDERS[@]}"; do
@ -52,13 +83,13 @@ for provider_config in "${PROVIDERS[@]}"; do
echo "Model: ${MODEL}"
echo ""
TMPFILE=$(mktemp)
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager 2>&1) | tee "$TMPFILE"
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin "$BUILTINS" 2>&1) | tee "$TMPFILE"
echo ""
if grep -q "shell | developer" "$TMPFILE"; then
echo "✓ SUCCESS: Test passed - developer tool called"
if grep -qE "$SUCCESS_PATTERN" "$TMPFILE"; then
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
RESULTS+=("${PROVIDER}: ${MODEL}")
else
echo "✗ FAILED: Test failed - no developer tools called"
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("${PROVIDER}: ${MODEL}")
fi
rm "$TMPFILE"