mirror of
https://github.com/block/goose.git
synced 2026-04-28 03:29:36 +00:00
chore: cover code mode with end to end provider tests (#6183)
This commit is contained in:
parent
879fdf7a82
commit
d4814042e6
2 changed files with 72 additions and 4 deletions
37
.github/workflows/pr-smoke-test.yml
vendored
37
.github/workflows/pr-smoke-test.yml
vendored
|
|
@ -161,3 +161,40 @@ jobs:
|
|||
SKIP_BUILD: 1
|
||||
run: |
|
||||
bash scripts/test_compaction.sh
|
||||
|
||||
smoke-tests-code-exec:
|
||||
name: Smoke Tests (Code Execution)
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-binary
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.branch || github.ref }}
|
||||
|
||||
- name: Download Binary
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: goose-binary
|
||||
path: target/release
|
||||
|
||||
- name: Make Binary Executable
|
||||
run: chmod +x target/release/goose
|
||||
|
||||
- name: Run Provider Tests (Code Execution Mode)
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
|
||||
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }}
|
||||
HOME: /tmp/goose-home
|
||||
GOOSE_DISABLE_KEYRING: 1
|
||||
SKIP_BUILD: 1
|
||||
run: |
|
||||
mkdir -p $HOME/.local/share/goose/sessions
|
||||
mkdir -p $HOME/.config/goose
|
||||
bash scripts/test_providers.sh --code-exec
|
||||
|
|
|
|||
|
|
@ -1,4 +1,18 @@
|
|||
#!/bin/bash
|
||||
# Test providers with optional code_execution mode
|
||||
# Usage:
|
||||
# ./test_providers.sh # Normal mode (direct tool calls)
|
||||
# ./test_providers.sh --code-exec # Code execution mode (JS batching)
|
||||
|
||||
CODE_EXEC_MODE=false
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--code-exec)
|
||||
CODE_EXEC_MODE=true
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -f .env ]; then
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
fi
|
||||
|
|
@ -37,6 +51,23 @@ else
|
|||
PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
|
||||
fi
|
||||
|
||||
# Configure mode-specific settings
|
||||
if [ "$CODE_EXEC_MODE" = true ]; then
|
||||
echo "Mode: code_execution (JS batching)"
|
||||
BUILTINS="developer,code_execution"
|
||||
# Match "execute_code | code_execution" or "read_module | code_execution" in output
|
||||
SUCCESS_PATTERN="(execute_code \| code_execution)|(read_module \| code_execution)"
|
||||
SUCCESS_MSG="code_execution tool called"
|
||||
FAILURE_MSG="no code_execution tools called"
|
||||
else
|
||||
echo "Mode: normal (direct tool calls)"
|
||||
BUILTINS="developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager"
|
||||
SUCCESS_PATTERN="shell \| developer"
|
||||
SUCCESS_MSG="developer tool called"
|
||||
FAILURE_MSG="no developer tools called"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
RESULTS=()
|
||||
|
||||
for provider_config in "${PROVIDERS[@]}"; do
|
||||
|
|
@ -52,13 +83,13 @@ for provider_config in "${PROVIDERS[@]}"; do
|
|||
echo "Model: ${MODEL}"
|
||||
echo ""
|
||||
TMPFILE=$(mktemp)
|
||||
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager 2>&1) | tee "$TMPFILE"
|
||||
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin "$BUILTINS" 2>&1) | tee "$TMPFILE"
|
||||
echo ""
|
||||
if grep -q "shell | developer" "$TMPFILE"; then
|
||||
echo "✓ SUCCESS: Test passed - developer tool called"
|
||||
if grep -qE "$SUCCESS_PATTERN" "$TMPFILE"; then
|
||||
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
|
||||
RESULTS+=("✓ ${PROVIDER}: ${MODEL}")
|
||||
else
|
||||
echo "✗ FAILED: Test failed - no developer tools called"
|
||||
echo "✗ FAILED: Test failed - $FAILURE_MSG"
|
||||
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
|
||||
fi
|
||||
rm "$TMPFILE"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue