From c6755d325978f16e28fd8184d5c23d229354d344 Mon Sep 17 00:00:00 2001 From: Jack Amadeo Date: Fri, 24 Apr 2026 13:31:27 -0400 Subject: [PATCH] Port provider tests to typescript (#8237) Signed-off-by: Douwe Osinga Co-authored-by: Douwe Osinga --- .github/workflows/pr-smoke-test.yml | 22 +- RELEASE_CHECKLIST.md | 2 +- scripts/test_providers.sh | 71 ---- scripts/test_providers_code_exec.sh | 45 -- scripts/test_providers_lib.sh | 240 ----------- ui/desktop/package.json | 3 + .../tests/integration/test_providers.test.ts | 86 ++++ .../test_providers_code_exec.test.ts | 50 +++ .../tests/integration/test_providers_lib.ts | 389 ++++++++++++++++++ ui/desktop/vitest.config.ts | 10 +- 10 files changed, 549 insertions(+), 369 deletions(-) delete mode 100755 scripts/test_providers.sh delete mode 100755 scripts/test_providers_code_exec.sh delete mode 100755 scripts/test_providers_lib.sh create mode 100644 ui/desktop/tests/integration/test_providers.test.ts create mode 100644 ui/desktop/tests/integration/test_providers_code_exec.test.ts create mode 100644 ui/desktop/tests/integration/test_providers_lib.ts diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index 3f7e836d43..f49d983dae 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -110,7 +110,11 @@ jobs: - name: Install agentic providers run: npm install -g @anthropic-ai/claude-code @zed-industries/claude-agent-acp @zed-industries/codex-acp - - name: Run Smoke Tests with Provider Script + - name: Install Node.js Dependencies + run: source ../../bin/activate-hermit && pnpm install --frozen-lockfile + working-directory: ui/desktop + + - name: Run Smoke Tests (Normal Mode) env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -127,12 +131,10 @@ jobs: SKIP_BUILD: 1 SKIP_PROVIDERS: ${{ vars.SKIP_PROVIDERS || '' }} run: | - # Ensure the HOME directory structure exists mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - - # Run the provider test script (binary already built and downloaded) - bash scripts/test_providers.sh + source ../../bin/activate-hermit && pnpm run test:integration:providers + working-directory: ui/desktop - name: Set up Python uses: actions/setup-python@v5 @@ -188,6 +190,10 @@ jobs: - name: Make Binary Executable run: chmod +x target/debug/goose + - name: Install Node.js Dependencies + run: source ../../bin/activate-hermit && pnpm install --frozen-lockfile + working-directory: ui/desktop + - name: Run Provider Tests (Code Execution Mode) env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -205,7 +211,8 @@ jobs: run: | mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.config/goose - bash scripts/test_providers_code_exec.sh + source ../../bin/activate-hermit && pnpm run test:integration:providers-code-exec + working-directory: ui/desktop compaction-tests: name: Compaction Tests @@ -277,7 +284,8 @@ jobs: GOOSE_PROVIDER: anthropic GOOSE_MODEL: claude-sonnet-4-5-20250929 SHELL: /bin/bash + SKIP_BUILD: 1 run: | echo 'export PATH=/some/fake/path:$PATH' >> $HOME/.bash_profile - source ../../bin/activate-hermit && pnpm run test:integration:debug + source ../../bin/activate-hermit && pnpm run test:integration:goosed working-directory: ui/desktop diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index 9a10dc6b57..e031d00c0c 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -17,7 +17,7 @@ Make a copy of this document for each version and check off as steps are verifie ### Provider Testing -- [ ] Run `./scripts/test_providers.sh` locally from the release branch and verify all providers/models work +- [ ] Run `cd ui/desktop && pnpm run test:integration:providers` locally from the release branch and verify all providers/models work - [ ] Launch goose, click reset providers, choose databricks and a model ### Starting Conversations diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh deleted file mode 100755 index b6c28b8d44..0000000000 --- a/scripts/test_providers.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -LIB_DIR="$(cd "$(dirname "$0")" && pwd)" -source "$LIB_DIR/test_providers_lib.sh" - -echo "Mode: normal (direct tool calls)" -echo "" - -GOOSE_BIN=$(build_goose) -BUILTINS="developer" - -mkdir -p target -TEST_CONTENT="test-content-abc123" -TEST_FILE="./target/test-content.txt" -echo "$TEST_CONTENT" > "$TEST_FILE" - -run_test() { - local provider="$1" model="$2" result_file="$3" output_file="$4" - local testdir=$(mktemp -d) - - local prompt - if is_agentic_provider "$provider"; then - cp "$TEST_FILE" "$testdir/test-content.txt" - prompt="read ./test-content.txt and output its contents exactly" - else - # Write two files with unique random tokens. Validation checks that the shell - # tool was used and that both tokens appear in the output, proving the model - # actually read the files (random tokens can't be guessed or hallucinated). - local token_a="smoke-alpha-$RANDOM" - local token_b="smoke-bravo-$RANDOM" - echo "$token_a" > "$testdir/part-a.txt" - echo "$token_b" > "$testdir/part-b.txt" - # Store tokens so validation can check them - echo "$token_a" > "$testdir/.token_a" - echo "$token_b" > "$testdir/.token_b" - prompt="Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else." - fi - - ( - export GOOSE_PROVIDER="$provider" - export GOOSE_MODEL="$model" - cd "$testdir" && "$GOOSE_BIN" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1 - ) > "$output_file" 2>&1 - - if is_agentic_provider "$provider"; then - if grep -qi "$TEST_CONTENT" "$output_file"; then - echo "success|test content found by model" > "$result_file" - else - echo "failure|test content not found by model" > "$result_file" - fi - else - local token_a token_b - token_a=$(cat "$testdir/.token_a") - token_b=$(cat "$testdir/.token_b") - if ! grep -qE "(shell \| developer)|(▸.*shell)" "$output_file"; then - echo "failure|model did not use shell tool" > "$result_file" - elif ! grep -q "$token_a" "$output_file"; then - echo "failure|model did not return contents of part-a.txt ($token_a)" > "$result_file" - elif ! grep -q "$token_b" "$output_file"; then - echo "failure|model did not return contents of part-b.txt ($token_b)" > "$result_file" - else - echo "success|model read and returned both file contents" > "$result_file" - fi - fi - - rm -rf "$testdir" -} - -build_test_cases -run_test_cases run_test -report_results diff --git a/scripts/test_providers_code_exec.sh b/scripts/test_providers_code_exec.sh deleted file mode 100755 index c9d720d202..0000000000 --- a/scripts/test_providers_code_exec.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Provider smoke tests - code execution mode (JS batching) - -LIB_DIR="$(cd "$(dirname "$0")" && pwd)" -source "$LIB_DIR/test_providers_lib.sh" - -echo "Mode: code_execution (JS batching)" -echo "" - -# --- Setup --- - -GOOSE_BIN=$(build_goose) -BUILTINS="memory,code_execution" - -# --- Test case --- - -run_test() { - local provider="$1" model="$2" result_file="$3" output_file="$4" - local testdir=$(mktemp -d) - - local prompt="Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'." - - # Run goose - ( - export GOOSE_PROVIDER="$provider" - export GOOSE_MODEL="$model" - cd "$testdir" && "$GOOSE_BIN" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1 - ) > "$output_file" 2>&1 - - # Matches: "execute_typescript | code_execution", "get_function_details | code_execution", - # "tool call | execute", "tool calls | execute" (old format) - # "▸ execute N tool call" (new format with tool_graph) - # "▸ execute_typescript" (plain tool name in output) - if grep -qE "(execute_typescript \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)|(▸ execute_typescript)" "$output_file"; then - echo "success|code_execution tool called" > "$result_file" - else - echo "failure|no code_execution tool calls found" > "$result_file" - fi - - rm -rf "$testdir" -} - -build_test_cases --skip-agentic -run_test_cases run_test -report_results diff --git a/scripts/test_providers_lib.sh b/scripts/test_providers_lib.sh deleted file mode 100755 index cc0eff6631..0000000000 --- a/scripts/test_providers_lib.sh +++ /dev/null @@ -1,240 +0,0 @@ -#!/bin/bash - -PROVIDER_CONFIG=" -openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b -xai -> grok-3 -openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5 -anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-5-20251101 -google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview -tetrate -> claude-sonnet-4-20250514 -databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o -azure_openai -> ${AZURE_OPENAI_DEPLOYMENT_NAME} -aws_bedrock -> us.anthropic.claude-sonnet-4-5-20250929-v1:0 -gcp_vertex_ai -> gemini-2.5-pro -snowflake -> claude-sonnet-4-5 -venice -> llama-3.3-70b -litellm -> gpt-4o-mini -sagemaker_tgi -> sagemaker-tgi-endpoint -github_copilot -> gpt-4.1 -chatgpt_codex -> gpt-5.4 -claude-code -> default -cursor-agent -> auto -ollama -> qwen3 -" - -# Flaky models allowed to fail without blocking PRs. -ALLOWED_FAILURES=( - "google:gemini-2.5-flash" - "google:gemini-3-pro-preview" - "openrouter:nvidia/nemotron-3-nano-30b-a3b" - "openrouter:qwen/qwen3-coder:exacto" - "openai:gpt-3.5-turbo" -) - -AGENTIC_PROVIDERS=("claude-code" "cursor-agent") - -if [ -f .env ]; then - export $(grep -v '^#' .env | xargs) -fi - -build_goose() { - if [ -z "$SKIP_BUILD" ]; then - echo "Building goose..." >&2 - cargo build --bin goose >&2 - echo "" >&2 - else - echo "Skipping build (SKIP_BUILD is set)..." >&2 - echo "" >&2 - fi - - echo "$(pwd)/target/debug/goose" -} - -has_env() { [ -n "${!1}" ]; } -has_cmd() { command -v "$1" &>/dev/null; } -has_file() { [ -f "$1" ]; } - -is_provider_available() { - case "$1" in - openrouter) has_env OPENROUTER_API_KEY ;; - xai) has_env XAI_API_KEY ;; - openai) has_env OPENAI_API_KEY ;; - anthropic) has_env ANTHROPIC_API_KEY ;; - google) has_env GOOGLE_API_KEY ;; - tetrate) has_env TETRATE_API_KEY ;; - databricks) has_env DATABRICKS_HOST && has_env DATABRICKS_TOKEN ;; - azure_openai) has_env AZURE_OPENAI_ENDPOINT && has_env AZURE_OPENAI_DEPLOYMENT_NAME ;; - aws_bedrock) has_env AWS_REGION && { has_env AWS_PROFILE || has_env AWS_ACCESS_KEY_ID; } ;; - gcp_vertex_ai) has_env GCP_PROJECT_ID ;; - snowflake) has_env SNOWFLAKE_HOST && has_env SNOWFLAKE_TOKEN ;; - venice) has_env VENICE_API_KEY ;; - litellm) has_env LITELLM_API_KEY ;; - sagemaker_tgi) has_env SAGEMAKER_ENDPOINT_NAME && has_env AWS_REGION ;; - github_copilot) has_env GITHUB_COPILOT_TOKEN || has_file "$HOME/.config/goose/github_copilot_token.json" ;; - chatgpt_codex) has_env CHATGPT_CODEX_TOKEN || has_file "$HOME/.config/goose/chatgpt_codex/tokens.json" ;; - ollama) has_env OLLAMA_HOST || has_cmd ollama ;; - claude-code) has_cmd claude ;; - cursor-agent) has_cmd cursor-agent ;; - *) return 0 ;; - esac -} - -is_allowed_failure() { - local key="${1}:${2}" - for allowed in "${ALLOWED_FAILURES[@]}"; do - [ "$allowed" = "$key" ] && return 0 - done - return 1 -} - -should_skip_provider() { - [ -z "$SKIP_PROVIDERS" ] && return 1 - IFS=',' read -ra SKIP_LIST <<< "$SKIP_PROVIDERS" - for skip in "${SKIP_LIST[@]}"; do - skip=$(echo "$skip" | xargs) - [ "$skip" = "$1" ] && return 0 - done - return 1 -} - -is_agentic_provider() { - for agentic in "${AGENTIC_PROVIDERS[@]}"; do - [ "$agentic" = "$1" ] && return 0 - done - return 1 -} - -# build_test_cases [--skip-agentic] -build_test_cases() { - local skip_agentic=false - [ "$1" = "--skip-agentic" ] && skip_agentic=true - - local providers=() - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - local provider="${line%% -> *}" - if is_provider_available "$provider"; then - providers+=("$line") - echo "✓ Including $provider" - else - echo "⚠️ Skipping $provider (prerequisites not met)" - fi - done <<< "$PROVIDER_CONFIG" - echo "" - - TEST_CASES=() - local job_index=0 - for provider_config in "${providers[@]}"; do - local provider="${provider_config%% -> *}" - local models_str="${provider_config#* -> }" - - if should_skip_provider "$provider"; then - echo "⊘ Skipping provider: ${provider} (SKIP_PROVIDERS)" - continue - fi - - if [ "$skip_agentic" = true ] && is_agentic_provider "$provider"; then - echo "⊘ Skipping agentic provider: ${provider}" - continue - fi - - IFS='|' read -ra models <<< "$models_str" - for model in "${models[@]}"; do - TEST_CASES+=("$provider|$model|$job_index") - ((job_index++)) - done - done -} - -# run_test_cases -run_test_cases() { - local test_fn="$1" - - RESULTS_DIR=$(mktemp -d) - trap 'if [ -n "${RESULTS_DIR:-}" ]; then rm -rf -- "$RESULTS_DIR"; fi; if [ -n "${CLEANUP_DIR:-}" ]; then rm -rf -- "$CLEANUP_DIR"; fi' EXIT - MAX_PARALLEL=${MAX_PARALLEL:-$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 8)} - echo "Running ${#TEST_CASES[@]} tests (max $MAX_PARALLEL parallel)" - echo "" - - local running=0 - for ((i=0; i<${#TEST_CASES[@]}; i++)); do - IFS='|' read -r provider model idx <<< "${TEST_CASES[$i]}" - - if [ $i -eq 0 ]; then - # First test runs sequentially to catch early failures - "$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx" - else - "$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx" & - ((running++)) - if [ $running -ge $MAX_PARALLEL ]; then - wait -n 2>/dev/null || wait - ((running--)) - fi - fi - done - wait -} - -report_results() { - echo "" - echo "=== Test Results ===" - echo "" - - RESULTS=() - HARD_FAILURES=() - - for job in "${TEST_CASES[@]}"; do - IFS='|' read -r provider model idx <<< "$job" - - echo "Provider: $provider" - echo "Model: $model" - echo "" - cat "$RESULTS_DIR/output_$idx" - echo "" - - local result_line="" - [ -f "$RESULTS_DIR/result_$idx" ] && result_line=$(cat "$RESULTS_DIR/result_$idx") - local status="${result_line%%|*}" - local msg="${result_line#*|}" - - if [ "$status" = "success" ]; then - echo "✓ SUCCESS: $msg" - RESULTS+=("✓ ${provider}: ${model}") - else - if is_allowed_failure "$provider" "$model"; then - echo "⚠ FLAKY: $msg" - RESULTS+=("⚠ ${provider}: ${model} (flaky)") - else - echo "✗ FAILED: $msg" - RESULTS+=("✗ ${provider}: ${model}") - HARD_FAILURES+=("${provider}: ${model}") - fi - fi - echo "---" - done - - echo "" - echo "=== Test Summary ===" - for result in "${RESULTS[@]}"; do - echo "$result" - done - - if [ ${#HARD_FAILURES[@]} -gt 0 ]; then - echo "" - echo "Hard failures (${#HARD_FAILURES[@]}):" - for failure in "${HARD_FAILURES[@]}"; do - echo " - $failure" - done - echo "" - echo "Some tests failed!" - exit 1 - else - if echo "${RESULTS[@]}" | grep -q "⚠"; then - echo "" - echo "All required tests passed! (some flaky tests failed but are allowed)" - else - echo "" - echo "All tests passed!" - fi - fi -} diff --git a/ui/desktop/package.json b/ui/desktop/package.json index 643698f97b..0988f399e7 100644 --- a/ui/desktop/package.json +++ b/ui/desktop/package.json @@ -35,6 +35,9 @@ "test:ui": "vitest --ui", "test:coverage": "vitest run --coverage", "test:integration": "vitest run --config vitest.integration.config.ts", + "test:integration:goosed": "vitest run --config vitest.integration.config.ts tests/integration/goosed.test.ts", + "test:integration:providers": "vitest run --config vitest.integration.config.ts tests/integration/test_providers.test.ts", + "test:integration:providers-code-exec": "vitest run --config vitest.integration.config.ts tests/integration/test_providers_code_exec.test.ts", "test:integration:watch": "vitest --config vitest.integration.config.ts", "test:integration:debug": "DEBUG=1 vitest run --config vitest.integration.config.ts", "i18n:extract": "formatjs extract 'src/**/*.{ts,tsx}' --out-file src/i18n/messages/en.json --flatten && pnpm run i18n:compile", diff --git a/ui/desktop/tests/integration/test_providers.test.ts b/ui/desktop/tests/integration/test_providers.test.ts new file mode 100644 index 0000000000..44feb2a7c0 --- /dev/null +++ b/ui/desktop/tests/integration/test_providers.test.ts @@ -0,0 +1,86 @@ +/** + * Provider smoke tests — normal mode (direct tool calls). + * + * Each available provider/model pair gets its own test that spawns `goose run` + * with the developer builtin, asks the model to read files via the shell tool, + * and validates the output. + */ + +import { expect, beforeAll } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { buildGoose, discoverTestCases, runGoose, providerTest } from './test_providers_lib'; + +const BUILTINS = 'developer'; +const TEST_CONTENT = 'test-content-abc123'; + +let gooseBin: string; +let testFile: string; + +beforeAll(() => { + gooseBin = buildGoose(); + + const targetDir = path.resolve(process.cwd(), '..', '..', 'target'); + fs.mkdirSync(targetDir, { recursive: true }); + testFile = path.join(targetDir, 'test-content.txt'); + fs.writeFileSync(testFile, TEST_CONTENT + '\n'); +}); + +const { testAgentic, testNonAgentic } = providerTest(discoverTestCases()); + +testNonAgentic('reads files via shell tool', async (tc) => { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-test-')); + try { + const tokenA = `smoke-alpha-${Math.floor(Math.random() * 32768)}`; + const tokenB = `smoke-bravo-${Math.floor(Math.random() * 32768)}`; + fs.writeFileSync(path.join(testdir, 'part-a.txt'), tokenA + '\n'); + fs.writeFileSync(path.join(testdir, 'part-b.txt'), tokenB + '\n'); + + const output = await runGoose( + gooseBin, + testdir, + 'Use the shell tool to cat ./part-a.txt and ./part-b.txt, then reply with ONLY the contents of both files, one per line, nothing else.', + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); + + const shellToolPattern = /(shell \| developer)|(▸.*shell)/; + expect( + shellToolPattern.test(output), + `Expected model to use shell tool\n\nFull output:\n${output}` + ).toBe(true); + expect( + output, + `Expected output to contain token from part-a.txt (${tokenA})\n\nFull output:\n${output}` + ).toContain(tokenA); + expect( + output, + `Expected output to contain token from part-b.txt (${tokenB})\n\nFull output:\n${output}` + ).toContain(tokenB); + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +}); + +testAgentic('reads file contents', async (tc) => { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-test-')); + try { + fs.copyFileSync(testFile, path.join(testdir, 'test-content.txt')); + + const output = await runGoose( + gooseBin, + testdir, + 'read ./test-content.txt and output its contents exactly', + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); + + expect( + output.toLowerCase(), + `Expected model output to contain "${TEST_CONTENT}"\n\nFull output:\n${output}` + ).toContain(TEST_CONTENT.toLowerCase()); + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +}); diff --git a/ui/desktop/tests/integration/test_providers_code_exec.test.ts b/ui/desktop/tests/integration/test_providers_code_exec.test.ts new file mode 100644 index 0000000000..d166c126cd --- /dev/null +++ b/ui/desktop/tests/integration/test_providers_code_exec.test.ts @@ -0,0 +1,50 @@ +/** + * Provider smoke tests — code execution mode (JS batching). + * + * Each available (non-agentic) provider/model pair gets its own test that + * spawns `goose run` with the memory + code_execution builtins and validates + * that the code_execution tool was invoked. + */ + +import { expect, beforeAll } from 'vitest'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { buildGoose, discoverTestCases, runGoose, providerTest } from './test_providers_lib'; + +const BUILTINS = 'memory,code_execution'; + +let gooseBin: string; + +beforeAll(() => { + gooseBin = buildGoose(); +}); + +const { testAll } = providerTest(discoverTestCases({ skipAgentic: true })); + +testAll('invokes code_execution tool', async (tc) => { + const testdir = fs.mkdtempSync(path.join(os.tmpdir(), 'goose-codeexec-')); + try { + const output = await runGoose( + gooseBin, + testdir, + "Store a memory with category 'test' and data 'hello world', then retrieve all memories from category 'test'.", + BUILTINS, + { GOOSE_PROVIDER: tc.provider, GOOSE_MODEL: tc.model } + ); + + // Matches: "execute_typescript | code_execution", "get_function_details | code_execution", + // "tool call | execute", "tool calls | execute" (old format) + // "▸ execute N tool call" (new format with tool_graph) + // "▸ execute_typescript" (plain tool name in output) + const codeExecPattern = + /(execute_typescript \| code_execution)|(get_function_details \| code_execution)|(tool calls? \| execute)|(▸.*execute.*tool call)|(▸ execute_typescript)/; + + expect( + codeExecPattern.test(output), + `Expected code_execution tool to be called\n\nFull output:\n${output}` + ).toBe(true); + } finally { + fs.rmSync(testdir, { recursive: true, force: true }); + } +}); diff --git a/ui/desktop/tests/integration/test_providers_lib.ts b/ui/desktop/tests/integration/test_providers_lib.ts new file mode 100644 index 0000000000..ae578dc9e7 --- /dev/null +++ b/ui/desktop/tests/integration/test_providers_lib.ts @@ -0,0 +1,389 @@ +/** + * Shared library for provider smoke tests. + * + * Ported from scripts/test_providers_lib.sh — keeps the same provider config, + * allowed-failure list, agentic-provider list, and environment detection. + */ + +import { test } from 'vitest'; +import { execSync, spawn, type ChildProcess } from 'node:child_process'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +// --------------------------------------------------------------------------- +// Provider configuration +// --------------------------------------------------------------------------- + +type ModelEntry = string | { name: string; flaky: true }; + +interface ProviderConfig { + provider: string; + models: ModelEntry[]; + agentic?: boolean; + available: () => boolean; +} + +function modelName(entry: ModelEntry): string { + return typeof entry === 'string' ? entry : entry.name; +} + +function modelFlaky(entry: ModelEntry): boolean { + return typeof entry !== 'string' && entry.flaky; +} + +function hasEnv(name: string): boolean { + return !!process.env[name]; +} + +function hasCmd(name: string): boolean { + try { + execSync(`command -v ${name}`, { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} + +function hasFile(p: string): boolean { + return fs.existsSync(p); +} + +function getProviders(): ProviderConfig[] { + return [ + { + provider: 'openrouter', + models: [ + 'google/gemini-2.5-pro', + 'anthropic/claude-sonnet-4.5', + { name: 'qwen/qwen3-coder:exacto', flaky: true }, + 'z-ai/glm-4.6:exacto', + { name: 'nvidia/nemotron-3-nano-30b-a3b', flaky: true }, + ], + available: () => hasEnv('OPENROUTER_API_KEY'), + }, + { + provider: 'xai', + models: ['grok-3'], + available: () => hasEnv('XAI_API_KEY'), + }, + { + provider: 'openai', + models: ['gpt-4o', 'gpt-4o-mini', { name: 'gpt-3.5-turbo', flaky: true }, 'gpt-5'], + available: () => hasEnv('OPENAI_API_KEY'), + }, + { + provider: 'anthropic', + models: ['claude-sonnet-4-5-20250929', 'claude-opus-4-5-20251101'], + available: () => hasEnv('ANTHROPIC_API_KEY'), + }, + { + provider: 'google', + models: [ + 'gemini-2.5-pro', + { name: 'gemini-2.5-flash', flaky: true }, + { name: 'gemini-3-pro-preview', flaky: true }, + 'gemini-3-flash-preview', + ], + available: () => hasEnv('GOOGLE_API_KEY'), + }, + { + provider: 'tetrate', + models: ['claude-sonnet-4-20250514'], + available: () => hasEnv('TETRATE_API_KEY'), + }, + { + provider: 'databricks', + models: ['databricks-claude-sonnet-4', 'gemini-2-5-flash', 'gpt-4o'], + available: () => hasEnv('DATABRICKS_HOST') && hasEnv('DATABRICKS_TOKEN'), + }, + { + provider: 'azure_openai', + models: [process.env.AZURE_OPENAI_DEPLOYMENT_NAME ?? ''], + available: () => hasEnv('AZURE_OPENAI_ENDPOINT') && hasEnv('AZURE_OPENAI_DEPLOYMENT_NAME'), + }, + { + provider: 'aws_bedrock', + models: ['us.anthropic.claude-sonnet-4-5-20250929-v1:0'], + available: () => + hasEnv('AWS_REGION') && (hasEnv('AWS_PROFILE') || hasEnv('AWS_ACCESS_KEY_ID')), + }, + { + provider: 'gcp_vertex_ai', + models: ['gemini-2.5-pro'], + available: () => hasEnv('GCP_PROJECT_ID'), + }, + { + provider: 'snowflake', + models: ['claude-sonnet-4-5'], + available: () => hasEnv('SNOWFLAKE_HOST') && hasEnv('SNOWFLAKE_TOKEN'), + }, + { + provider: 'venice', + models: ['llama-3.3-70b'], + available: () => hasEnv('VENICE_API_KEY'), + }, + { + provider: 'litellm', + models: ['gpt-4o-mini'], + available: () => hasEnv('LITELLM_API_KEY'), + }, + { + provider: 'sagemaker_tgi', + models: ['sagemaker-tgi-endpoint'], + available: () => hasEnv('SAGEMAKER_ENDPOINT_NAME') && hasEnv('AWS_REGION'), + }, + { + provider: 'github_copilot', + models: ['gpt-4.1'], + available: () => + hasEnv('GITHUB_COPILOT_TOKEN') || + hasFile(path.join(os.homedir(), '.config/goose/github_copilot_token.json')), + }, + { + provider: 'chatgpt_codex', + models: ['gpt-5.4'], + available: () => + hasEnv('CHATGPT_CODEX_TOKEN') || + hasFile(path.join(os.homedir(), '.config/goose/chatgpt_codex/tokens.json')), + }, + { + provider: 'claude-code', + models: ['default'], + agentic: true, + available: () => hasCmd('claude'), + }, + { + provider: 'cursor-agent', + models: ['auto'], + agentic: true, + available: () => hasCmd('cursor-agent'), + }, + { + provider: 'ollama', + models: ['qwen3'], + available: () => hasEnv('OLLAMA_HOST') || hasCmd('ollama'), + }, + ]; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function stripQuotes(s: string): string { + if ( + s.length >= 2 && + ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) + ) { + return s.slice(1, -1); + } + return s; +} + +function loadDotenv(): void { + // Resolve .env from the repository root (two levels up from ui/desktop). + const repoRoot = path.resolve(__dirname, '..', '..', '..', '..'); + const envPath = path.join(repoRoot, '.env'); + if (!fs.existsSync(envPath)) return; + const lines = fs.readFileSync(envPath, 'utf-8').split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + const eqIdx = trimmed.indexOf('='); + if (eqIdx === -1) continue; + const key = trimmed.slice(0, eqIdx); + const value = stripQuotes(trimmed.slice(eqIdx + 1)); + if (!(key in process.env)) { + process.env[key] = value; + } + } +} + +function shouldSkipProvider(provider: string): boolean { + const skip = process.env.SKIP_PROVIDERS; + if (!skip) return false; + return skip + .split(',') + .map((s) => s.trim()) + .includes(provider); +} + +// --------------------------------------------------------------------------- +// Build goose binary +// --------------------------------------------------------------------------- + +export function buildGoose(): string { + if (!process.env.SKIP_BUILD) { + console.error('Building goose...'); + execSync('cargo build --bin goose', { stdio: 'inherit' }); + console.error(''); + } else { + console.error('Skipping build (SKIP_BUILD is set)...'); + console.error(''); + } + return path.resolve(process.cwd(), '..', '..', 'target/debug/goose'); +} + +// --------------------------------------------------------------------------- +// Test case discovery +// --------------------------------------------------------------------------- + +export interface TestCase { + provider: string; + model: string; + available: boolean; + flaky: boolean; + agentic: boolean; + skippedReason?: string; +} + +export function discoverTestCases(options?: { skipAgentic?: boolean }): TestCase[] { + loadDotenv(); + const skipAgentic = options?.skipAgentic ?? false; + const providers = getProviders(); + + const testCases: TestCase[] = []; + + for (const pc of providers) { + const providerAvailable = pc.available(); + const agentic = pc.agentic ?? false; + + for (const entry of pc.models) { + const model = modelName(entry); + const flaky = modelFlaky(entry); + + if (!providerAvailable) { + testCases.push({ + provider: pc.provider, + model, + available: false, + flaky, + agentic, + skippedReason: 'prerequisites not met', + }); + } else if (shouldSkipProvider(pc.provider)) { + testCases.push({ + provider: pc.provider, + model, + available: false, + flaky, + agentic, + skippedReason: 'SKIP_PROVIDERS', + }); + } else if (skipAgentic && agentic) { + testCases.push({ + provider: pc.provider, + model, + available: false, + flaky, + agentic, + skippedReason: 'agentic provider skipped in this mode', + }); + } else { + testCases.push({ + provider: pc.provider, + model, + available: true, + flaky, + agentic, + }); + } + } + } + + return testCases; +} + +// --------------------------------------------------------------------------- +// Test registration helpers +// --------------------------------------------------------------------------- + +type ProviderTestFn = (tc: TestCase) => Promise; + +function registerTests(label: string, cases: TestCase[], fn: ProviderTestFn): void { + const available = cases.filter((tc) => tc.available && !tc.flaky); + const flaky = cases.filter((tc) => tc.available && tc.flaky); + const skipped = cases.filter((tc) => !tc.available); + + if (available.length > 0) { + test.each(available)(`${label} — $provider / $model`, async (tc) => { + await fn(tc); + }); + } + + if (flaky.length > 0) { + test.each(flaky)(`${label} — $provider / $model (flaky)`, async (tc) => { + try { + await fn(tc); + } catch (err) { + console.warn(`Flaky test ${tc.provider}/${tc.model} failed (allowed): ${err}`); + } + }); + } + + if (skipped.length > 0) { + test.skip.each(skipped)(`${label} — $provider / $model — $skippedReason`, () => {}); + } +} + +/** + * Build decorator-style test registrars from a set of discovered test cases. + * + * Usage: + * const { testAll, testAgentic, testNonAgentic } = providerTest(cases); + * + * testAll('reads a file', async (tc) => { ... }); + * testAgentic('delegates work', async (tc) => { ... }); + * testNonAgentic('uses shell tool', async (tc) => { ... }); + */ +export function providerTest(cases: TestCase[]) { + const agentic = cases.filter((tc) => tc.agentic); + const nonAgentic = cases.filter((tc) => !tc.agentic); + + return { + testAll: (label: string, fn: ProviderTestFn) => registerTests(label, cases, fn), + testAgentic: (label: string, fn: ProviderTestFn) => registerTests(label, agentic, fn), + testNonAgentic: (label: string, fn: ProviderTestFn) => registerTests(label, nonAgentic, fn), + }; +} + +// --------------------------------------------------------------------------- +// Utility: run goose binary and capture output +// --------------------------------------------------------------------------- + +export function runGoose( + gooseBin: string, + cwd: string, + prompt: string, + builtins: string, + env: Record +): Promise { + return new Promise((resolve) => { + const child: ChildProcess = spawn( + gooseBin, + ['run', '--text', prompt, '--with-builtin', builtins], + { + cwd, + env: { ...process.env, ...env }, + stdio: ['ignore', 'pipe', 'pipe'], + } + ); + + let output = ''; + child.stdout?.on('data', (d) => { + output += String(d); + }); + child.stderr?.on('data', (d) => { + output += String(d); + }); + + child.on('close', () => { + resolve(output); + }); + + child.on('error', (err) => { + resolve(`spawn error: ${err.message}`); + }); + }); +} diff --git a/ui/desktop/vitest.config.ts b/ui/desktop/vitest.config.ts index 7a2965c12f..f745b9244d 100644 --- a/ui/desktop/vitest.config.ts +++ b/ui/desktop/vitest.config.ts @@ -1,7 +1,7 @@ /// -import { defineConfig } from 'vitest/config' -import react from '@vitejs/plugin-react' -import { resolve } from 'node:path' +import { defineConfig } from 'vitest/config'; +import react from '@vitejs/plugin-react'; +import { resolve } from 'node:path'; const cfg = { plugins: [react()], @@ -17,6 +17,6 @@ const cfg = { css: true, include: ['src/**/*.{test,spec}.{js,jsx,ts,tsx}'], }, -} satisfies Record +} satisfies Record; -export default defineConfig(cfg as any) +export default defineConfig(cfg as any);