goose/evals/open-model-gym/config.yaml
Michael Neale 629108d0fc
fix: detect low balance and prompt for top up (#7166)
Signed-off-by: raj-subhankar <subhankar.rj@gmail.com>
Co-authored-by: Douwe Osinga <douwe@squareup.com>
Co-authored-by: raj-subhankar <subhankar.rj@gmail.com>
2026-02-19 02:20:16 +00:00

85 lines
2.3 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# =============================================================================
# Models - the LLMs to test
# =============================================================================
models:
- name: opus
provider: anthropic
model: claude-opus-4-5-20251101
- name: glm-4.7-flash
provider: ollama
model: glm-4.7-flash:latest
# too slow on 64g:
#- name: frob/qwen3-coder-next:latest
# provider: ollama
# model: frob/qwen3-coder-next:latest
- name: kimi-k2.5
provider: ollama
model: kimi-k2.5:cloud
- name: gpt-oss-120b
provider: ollama
model: gpt-oss:120b-cloud
- name: gpt-oss-20b
provider: ollama
model: gpt-oss:20b
- name: qwen3-coder:latest
provider: ollama
model: qwen3-coder:latest
# good but too slow on 64G
#- name: nemotron-3-nano
# provider: ollama
# model: nemotron-3-nano:latest
# =============================================================================
# Runners - agent frameworks with their specific configurations
# =============================================================================
# Each runner has its own binary, extensions/config, and isolated config directory
runners:
# - name: goose
# type: goose
# bin: goose
# extensions: [developer]
# stdio:
# - node mcp-harness/dist/index.js
- name: goose-full
type: goose
bin: goose
extensions: [developer, todo, skills, code_execution, extensionmanager]
stdio:
- node mcp-harness/dist/index.js
- name: opencode
type: opencode
bin: opencode
stdio:
- node mcp-harness/dist/index.js
- name: pi
type: pi
bin: pi
# Pi takes provider/model from the test matrix, not config
# MCP support via pi-mcp-adapter: `pi install npm:pi-mcp-adapter`
stdio:
- node mcp-harness/dist/index.js
# =============================================================================
# Test Matrix
# =============================================================================
# scenarios × models × runners
# - Omit 'models' to run against ALL models
# - Omit 'runners' to run against ALL runners
matrix:
# Single-turn scenarios: all models × all runners
- scenario: everyday-app-automation
- scenario: file-editing
# Multi-turn: goose and pi only (opencode doesn't support session continuation)
- scenario: multi-turn-edit
runners: [goose-full]