#!/bin/bash # Compaction smoke test script # Tests both manual (trigger prompt) and auto compaction (threshold-based) # # Environment variable overrides: # COMPACTION_PROVIDER - Override the provider for tests 1 & 2 (default: use system default) # COMPACTION_MODEL - Override the model for tests 1 & 2 (default: use system default) # SKIP_BUILD - Skip cargo build if set if [ -f .env ]; then export $(grep -v '^#' .env | xargs) fi if [ -z "$SKIP_BUILD" ]; then echo "Building goose..." cargo build --bin goose echo "" else echo "Skipping build (SKIP_BUILD is set)..." echo "" fi SCRIPT_DIR=$(pwd) GOOSE_BIN="$SCRIPT_DIR/target/debug/goose" # Apply provider/model overrides if set if [ -n "$COMPACTION_PROVIDER" ]; then echo "Using override provider: $COMPACTION_PROVIDER" export GOOSE_PROVIDER="$COMPACTION_PROVIDER" fi if [ -n "$COMPACTION_MODEL" ]; then echo "Using override model: $COMPACTION_MODEL" export GOOSE_MODEL="$COMPACTION_MODEL" fi if [ -n "$COMPACTION_PROVIDER" ] || [ -n "$COMPACTION_MODEL" ]; then echo "" fi # Validation function to check compaction structure in session JSON validate_compaction() { local session_id=$1 local test_name=$2 echo "Validating compaction structure for session: $session_id" # Export the session to JSON local session_json=$($GOOSE_BIN session export --format json --session-id "$session_id" 2>&1) if [ $? -ne 0 ]; then echo "✗ FAILED: Could not export session JSON" echo " Error: $session_json" return 1 fi if ! command -v jq &> /dev/null; then echo "⚠ WARNING: jq not available, cannot validate compaction structure" return 0 fi # Check basic structure echo "$session_json" | jq -e '.conversation' > /dev/null 2>&1 if [ $? -ne 0 ]; then echo "✗ FAILED: Session JSON missing 'conversation' field" return 1 fi local message_count=$(echo "$session_json" | jq '.conversation | length' 2>/dev/null) echo " Session has $message_count messages" # Look for a summary message (assistant role with userVisible=false, agentVisible=true) local has_summary=$(echo "$session_json" | jq '[.conversation[] | select(.role == "assistant" and .metadata.userVisible == false and .metadata.agentVisible == true)] | length > 0' 2>/dev/null) if [ "$has_summary" != "true" ]; then echo "✗ FAILED: No summary message found (expected assistant message with userVisible=false, agentVisible=true)" return 1 fi echo "✓ Found summary message with correct visibility flags" # Check for original messages with userVisible=true, agentVisible=false local has_hidden_originals=$(echo "$session_json" | jq '[.conversation[] | select(.metadata.userVisible == true and .metadata.agentVisible == false)] | length > 0' 2>/dev/null) if [ "$has_hidden_originals" != "true" ]; then echo "⚠ WARNING: No original messages found with userVisible=true, agentVisible=false" echo " This might be OK if all messages were compacted" else echo "✓ Found original messages hidden from agent (userVisible=true, agentVisible=false)" fi # For auto-compaction, check for the preserved user message (userVisible=true, agentVisible=true) local has_preserved_user=$(echo "$session_json" | jq '[.conversation[] | select(.role == "user" and .metadata.userVisible == true and .metadata.agentVisible == true)] | length > 0' 2>/dev/null) if [ "$has_preserved_user" == "true" ]; then echo "✓ Found preserved user message (userVisible=true, agentVisible=true)" fi echo "✓ SUCCESS: Compaction structure is valid for $test_name" return 0 } echo "==================================================" echo "COMPACTION SMOKE TESTS" echo "==================================================" echo "" RESULTS=() # ================================================== # TEST 1: Manual Compaction # ================================================== echo "---------------------------------------------------" echo "TEST 1: Manual Compaction via trigger prompt" echo "---------------------------------------------------" TESTDIR=$(mktemp -d) echo "hello world" > "$TESTDIR/hello.txt" echo "Test directory: $TESTDIR" echo "" OUTPUT=$(mktemp) echo "Step 1: Creating session with initial messages..." (cd "$TESTDIR" && "$GOOSE_BIN" run --with-builtin developer --text "list files and read hello.txt" 2>&1) | tee "$OUTPUT" if ! command -v jq &> /dev/null; then echo "✗ FAILED: jq is required for this test" RESULTS+=("✗ Manual Compaction (jq required)") rm -f "$OUTPUT" rm -rf "$TESTDIR" else SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null) if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then echo "✗ FAILED: Could not create session" RESULTS+=("✗ Manual Compaction (no session)") else echo "" echo "Session created: $SESSION_ID" echo "Step 2: Sending manual compaction trigger..." # Send the manual compact trigger prompt (cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "Please compact this conversation" 2>&1) | tee -a "$OUTPUT" echo "" echo "Checking for compaction evidence..." if grep -qi "compacting\|compacted\|compaction" "$OUTPUT"; then echo "✓ SUCCESS: Manual compaction was triggered" if validate_compaction "$SESSION_ID" "manual compaction"; then RESULTS+=("✓ Manual Compaction") else RESULTS+=("✗ Manual Compaction (structure validation failed)") fi else echo "✗ FAILED: Manual compaction was not triggered" RESULTS+=("✗ Manual Compaction") fi fi rm -f "$OUTPUT" rm -rf "$TESTDIR" fi echo "" echo "" # ================================================== # TEST 2: Auto Compaction # ================================================== echo "---------------------------------------------------" echo "TEST 2: Auto Compaction via threshold (0.005)" echo "---------------------------------------------------" TESTDIR=$(mktemp -d) echo "test content" > "$TESTDIR/test.txt" echo "Test directory: $TESTDIR" echo "" # Set auto-compact threshold very low (.5%) to trigger it quickly export GOOSE_AUTO_COMPACT_THRESHOLD=0.005 OUTPUT=$(mktemp) LONG_RESPONSE_PROMPT="Count from 1 to 200, one number per line." echo "Step 1: Creating session with first message (generating tokens for threshold)..." (cd "$TESTDIR" && "$GOOSE_BIN" run --text "$LONG_RESPONSE_PROMPT" 2>&1) | tee "$OUTPUT" if ! command -v jq &> /dev/null; then echo "✗ FAILED: jq is required for this test" RESULTS+=("✗ Auto Compaction (jq required)") else SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null) if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then echo "✗ FAILED: Could not create session" RESULTS+=("✗ Auto Compaction (no session)") else echo "" echo "Session created: $SESSION_ID" echo "Step 2: Sending second message (should trigger auto-compact)..." # Send second message - auto-compaction should trigger before processing this (cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "hi again" 2>&1) | tee -a "$OUTPUT" echo "" echo "Checking for auto-compaction evidence..." if grep -qi "auto.*compact\|exceeded.*auto.*compact.*threshold" "$OUTPUT"; then echo "✓ SUCCESS: Auto compaction was triggered" if validate_compaction "$SESSION_ID" "auto compaction"; then RESULTS+=("✓ Auto Compaction") else RESULTS+=("✗ Auto Compaction (structure validation failed)") fi else echo "✗ FAILED: Auto compaction was not triggered" echo " Expected to see auto-compact messages with threshold of 0.005" RESULTS+=("✗ Auto Compaction") fi fi fi # Unset the env variable unset GOOSE_AUTO_COMPACT_THRESHOLD rm -f "$OUTPUT" rm -rf "$TESTDIR" echo "" echo "" # ================================================== # TEST 3: Out-of-Context Error Compaction # ================================================== echo "---------------------------------------------------" echo "TEST 3: Compaction via out-of-context error (proxy)" echo "---------------------------------------------------" TESTDIR=$(mktemp -d) echo "test content" > "$TESTDIR/test.txt" echo "Test directory: $TESTDIR" echo "" # Use a random port to avoid conflicts PROXY_PORT=$((9000 + RANDOM % 1000)) PROXY_DIR="$SCRIPT_DIR/scripts/provider-error-proxy" OUTPUT=$(mktemp) PROXY_LOG=$(mktemp) PROXY_SETUP_LOG=$(mktemp) # Pre-install proxy dependencies (so first run doesn't take forever) echo "Installing proxy dependencies..." export UV_INDEX_URL="https://pypi.org/simple" if ! (cd "$PROXY_DIR" && uv sync 2>&1 | tee "$PROXY_SETUP_LOG"); then echo "✗ FAILED: Could not install proxy dependencies" echo "Setup log:" cat "$PROXY_SETUP_LOG" RESULTS+=("✗ Out-of-Context Error (dependency install failed)") else echo "✓ Dependencies installed" # Start the error proxy in context-length error mode (3 errors) echo "Starting error proxy on port $PROXY_PORT with context-length error mode..." (cd "$PROXY_DIR" && UV_INDEX_URL="https://pypi.org/simple" uv run proxy.py --port "$PROXY_PORT" --mode "c 3" --no-stdin > "$PROXY_LOG" 2>&1) & PROXY_PID=$! # Wait for proxy to be ready (check if port is listening) echo "Waiting for proxy to be ready..." PROXY_READY=false for i in {1..60}; do if kill -0 $PROXY_PID 2>/dev/null; then # Check if port is listening using /dev/tcp if timeout 1 bash -c "echo -n > /dev/tcp/localhost/$PROXY_PORT" 2>/dev/null; then PROXY_READY=true echo "✓ Proxy is ready on port $PROXY_PORT" break fi else echo "✗ FAILED: Error proxy process died" break fi sleep 0.5 done # Check if proxy is running and ready if [ "$PROXY_READY" != "true" ]; then echo "✗ FAILED: Error proxy failed to become ready" echo "Proxy log:" cat "$PROXY_LOG" kill $PROXY_PID 2>/dev/null || true RESULTS+=("✗ Out-of-Context Test Error (proxy failed)") else # Configure provider to use proxy and skip backoff export ANTHROPIC_HOST="http://localhost:$PROXY_PORT" export GOOSE_PROVIDER_SKIP_BACKOFF=true export GOOSE_PROVIDER=anthropic export GOOSE_MODEL=claude-haiku-4-5 echo "Step 1: Creating session (should trigger context-length error and compaction)..." (cd "$TESTDIR" && "$GOOSE_BIN" run --text "hello world" 2>&1) | tee "$OUTPUT" SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null) if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then echo "✗ FAILED: Could not create session" RESULTS+=("✗ Out-of-Context Test Error (no session)") else echo "" echo "Session created: $SESSION_ID" echo "Checking for compaction evidence..." # Check for compaction in the output if grep -qi "context.*length\|compacting\|compacted\|compaction" "$OUTPUT"; then echo "✓ SUCCESS: Out-of-context Test error triggered compaction" if validate_compaction "$SESSION_ID" "out-of-context error compaction"; then RESULTS+=("✓ Out-of-Context Test Error") else RESULTS+=("✗ Out-of-Context Test Error (structure validation failed)") fi else echo "✗ FAILED: No evidence of compaction after context-length error" echo " Output:" cat "$OUTPUT" RESULTS+=("✗ Out-of-Context Test Error") fi fi # Clean up echo "" echo "Stopping error proxy..." # Kill the entire process group to ensure UV and Python processes are terminated kill -- -$PROXY_PID 2>/dev/null || true # Also explicitly kill any remaining UV processes on this port pkill -f "uv run.*--port $PROXY_PORT" 2>/dev/null || true wait $PROXY_PID 2>/dev/null || true unset ANTHROPIC_HOST unset GOOSE_PROVIDER_SKIP_BACKOFF unset GOOSE_PROVIDER unset GOOSE_MODEL unset UV_INDEX_URL fi fi rm -f "$OUTPUT" "$PROXY_LOG" "$PROXY_SETUP_LOG" rm -rf "$TESTDIR" echo "" echo "" # ================================================== # Summary # ================================================== echo "==================================================" echo "TEST SUMMARY" echo "==================================================" for result in "${RESULTS[@]}"; do echo "$result" done # Count results FAILURE_COUNT=$(echo "${RESULTS[@]}" | grep -o "✗" | wc -l | tr -d ' ') if [ "$FAILURE_COUNT" -gt 0 ]; then echo "" echo "❌ $FAILURE_COUNT test(s) failed!" exit 1 else echo "" echo "✅ All tests passed!" fi