goose/.github/workflows/test-finder.yml

name: Daily Test Coverage Finder

on:
  # schedule:
  #   # Run daily at 2 AM UTC - PAUSED
  #   - cron: '0 2 * * *'
  workflow_dispatch:
    inputs:
      dry_run:
        description: 'Dry run (no PR creation)'
        required: false
        default: false
        type: boolean

permissions:
  contents: write
  pull-requests: write

jobs:
  find-untested-code:
    runs-on: ubuntu-latest
    container:
      image: ghcr.io/block/goose:latest
      options: --user root
      env:
        GOOSE_PROVIDER: ${{ vars.GOOSE_PROVIDER || 'openai' }}
        GOOSE_MODEL: ${{ vars.GOOSE_MODEL || 'gpt-5' }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        HOME: /tmp/goose-home

    steps:
      - name: Checkout code
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4
        with:
          fetch-depth: 0

      - name: Install build and analysis tools
        run: |
          apt-get update
          apt-get install -y jq ripgrep build-essential

      - name: Find untested code and create working test
        id: find_untested
        run: |
          # Ensure the HOME directory structure exists
          mkdir -p $HOME/.local/share/goose/sessions
          mkdir -p $HOME/.config/goose

          # Create analysis and test creation script
          cat << 'EOF' > /tmp/create_working_test.txt
          Your task is to find ONE untested function in the Rust codebase and create a working test for it.

          Requirements:
          1. The function MUST be in the crates/ directory
          2. It MUST have actual logic (not just a simple getter/setter)
          3. It MUST not already have a test
          4. Prefer functions with complexity but that are still testable in isolation
          5. Focus on the goose crate first, then goose-cli, then others

          Process:
          0. Immediately write these requirements in your TODO list tool and periodically check against them
          1. Find a suitable untested function (use your `analyze` tool and ripgrep)
          2. Write a comprehensive unit test for it
          3. Apply your changes to the codebase
          4. Run the test with: cargo test --test <test_name>
          5. If the test fails:
             - Read and understand the error message
             - Fix the test code
             - Apply the fix and run the test again
             - Repeat up to 3 times until the test passes
          6. Once the test passes, run `cargo fmt` to format all the code properly
          7. After formatting, save the final changes as a git diff to /tmp/test_addition.patch
          8. If successful, write the name of the function you tested to /tmp/function_tested.txt (just the function name, e.g., "check_tool_call" or "MyStruct::my_method")
          9. Only create the patch file if the test actually passes

          Important:
          - Only add ONE test for ONE function
          - The test MUST compile and pass before creating the patch
          - Keep changes minimal and focused
          - Include a descriptive test name that explains what is being tested
          EOF

          goose run -i /tmp/create_working_test.txt --with-builtin developer

          # Debug: Check what files were created
          echo "Checking for patch file..."
          if [ -f /tmp/test_addition.patch ]; then
            echo "Patch file exists"
            echo "Patch size: $(wc -c < /tmp/test_addition.patch) bytes"
            echo "First few lines of patch:"
            head -5 /tmp/test_addition.patch || true
          else
            echo "No patch file found at /tmp/test_addition.patch"
          fi

          # Check for new commits that Goose might have made
          COMMITS_AHEAD=$(git rev-list HEAD --not --remotes=origin --count 2>/dev/null || echo "0")
          echo "Commits ahead of origin: $COMMITS_AHEAD"

          # Check if we have changes to create a PR from
          # Either: 1) A patch file exists, OR 2) There are new commits
          if [ -f /tmp/test_addition.patch ] && [ -s /tmp/test_addition.patch ] || [ "$COMMITS_AHEAD" -gt 0 ]; then
            echo "Changes detected (patch file or new commits)"
            echo "Attempting to apply patch..."
            # Apply the patch for the PR
            git apply /tmp/test_addition.patch 2>/dev/null || echo "Patch already applied or changes already present"
            echo "patch_created=true" >> $GITHUB_OUTPUT
            echo "Test creation successful - patch file created"

            # Try to get the function name from Goose's output file
            if [ -f /tmp/function_tested.txt ]; then
              FUNC_NAME=$(cat /tmp/function_tested.txt | head -1)
            else
              # Fallback: Extract from test name in the actual changes (staged or committed)
              # Try staged changes first, then last commit, then patch file
              FUNC_NAME=$(git diff --cached | grep "^+.*fn test_" | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || git diff HEAD~1 | grep "^+.*fn test_" | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || grep "fn test_" /tmp/test_addition.patch 2>/dev/null | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || echo "function")
            fi
            # Clean up the function name (remove any trailing whitespace or special chars)
            FUNC_NAME=$(echo "$FUNC_NAME" | tr -d '\n\r' | sed 's/[[:space:]]*$//')
            echo "function_name=${FUNC_NAME}" >> $GITHUB_OUTPUT
          else
            echo "patch_created=false" >> $GITHUB_OUTPUT
            echo "No patch file created - either no suitable function found or test failed"
          fi

      - name: Extract token metrics
        id: metrics
        run: |
          # Find the most recently created session file in the goose sessions directory
          SESSION_DIR="$HOME/.local/share/goose/sessions"
          if [ -d "$SESSION_DIR" ]; then
            SESSION_FILE=$(ls -t "$SESSION_DIR"/*.jsonl 2>/dev/null | head -1)
            if [ -f "$SESSION_FILE" ]; then
              echo "Found session file: $SESSION_FILE"
              # Current context size metrics
              TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.total_tokens // 0')
              INPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.input_tokens // 0')
              OUTPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.output_tokens // 0')
              echo "total_tokens=${TOKENS}" >> $GITHUB_OUTPUT
              echo "input_tokens=${INPUT_TOKENS}" >> $GITHUB_OUTPUT
              echo "output_tokens=${OUTPUT_TOKENS}" >> $GITHUB_OUTPUT
              # Accumulated API usage metrics
              ACC_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_total_tokens // 0')
              ACC_INPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_input_tokens // 0')
              ACC_OUTPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_output_tokens // 0')
              echo "accumulated_total_tokens=${ACC_TOKENS}" >> $GITHUB_OUTPUT
              echo "accumulated_input_tokens=${ACC_INPUT_TOKENS}" >> $GITHUB_OUTPUT
              echo "accumulated_output_tokens=${ACC_OUTPUT_TOKENS}" >> $GITHUB_OUTPUT
              echo "Token usage - Total: ${TOKENS}, Input: ${INPUT_TOKENS}, Output: ${OUTPUT_TOKENS}"
              echo "Accumulated usage - Total: ${ACC_TOKENS}, Input: ${ACC_INPUT_TOKENS}, Output: ${ACC_OUTPUT_TOKENS}"
            fi
          fi

      - name: Create Pull Request
        if: steps.find_untested.outputs.patch_created == 'true' && github.event.inputs.dry_run != 'true'
        uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # pin@v7.0.8
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: "test: add test for ${{ steps.find_untested.outputs.function_name }}"
          title: "test: add test coverage for ${{ steps.find_untested.outputs.function_name }}"
          draft: true
          body: |
            ## 🤖 Automated Test Addition

            This PR was automatically generated by goose to improve test coverage.

            ### What changed?
            Added a unit test for a previously untested function.

            ### Why?
            Part of our daily automated test coverage improvement initiative. goose analyzes the codebase to find untested but important functions and creates focused unit tests for them.

            ### Review checklist
            - [ ] Test is meaningful and actually tests the function
            - [ ] Test name is descriptive
            - [ ] Test passes locally
            - [ ] No unnecessary changes included

            ### Metrics
            #### Current Context Size
            - **Total**: ${{ steps.metrics.outputs.total_tokens }} tokens
            - **Input**: ${{ steps.metrics.outputs.input_tokens }} tokens
            - **Output**: ${{ steps.metrics.outputs.output_tokens }} tokens
            #### API Usage (Billable)
            - **Total**: ${{ steps.metrics.outputs.accumulated_total_tokens }} tokens
            - **Input**: ${{ steps.metrics.outputs.accumulated_input_tokens }} tokens
            - **Output**: ${{ steps.metrics.outputs.accumulated_output_tokens }} tokens

            ---
            *Generated by the Daily Test Coverage Finder workflow*
          branch: goose/test-coverage-${{ github.run_number }}
          delete-branch: true
          labels: |
            goose-generated
            test
            automated

      - name: Summary
        if: always()
        env:
          PATCH_CREATED: ${{ steps.find_untested.outputs.patch_created }}
          FUNCTION_NAME: ${{ steps.find_untested.outputs.function_name }}
        run: |
          if [ "$PATCH_CREATED" = "true" ]; then
            echo "✅ Successfully found untested code and created a test"
            echo "📝 Function tested: $FUNCTION_NAME"
          else
            echo "ℹ️ No suitable untested code found today"
          fi