mirror of
https://github.com/block/goose.git
synced 2026-04-29 03:59:36 +00:00
209 lines
9.7 KiB
YAML
209 lines
9.7 KiB
YAML
name: Daily Test Coverage Finder
|
||
|
||
on:
|
||
# schedule:
|
||
# # Run daily at 2 AM UTC - PAUSED
|
||
# - cron: '0 2 * * *'
|
||
workflow_dispatch:
|
||
inputs:
|
||
dry_run:
|
||
description: 'Dry run (no PR creation)'
|
||
required: false
|
||
default: false
|
||
type: boolean
|
||
|
||
permissions:
|
||
contents: write
|
||
pull-requests: write
|
||
|
||
jobs:
|
||
find-untested-code:
|
||
runs-on: ubuntu-latest
|
||
container:
|
||
image: ghcr.io/block/goose:latest
|
||
options: --user root
|
||
env:
|
||
GOOSE_PROVIDER: ${{ vars.GOOSE_PROVIDER || 'openai' }}
|
||
GOOSE_MODEL: ${{ vars.GOOSE_MODEL || 'gpt-5' }}
|
||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||
HOME: /tmp/goose-home
|
||
|
||
steps:
|
||
- name: Checkout code
|
||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4
|
||
with:
|
||
fetch-depth: 0
|
||
|
||
- name: Install build and analysis tools
|
||
run: |
|
||
apt-get update
|
||
apt-get install -y jq ripgrep build-essential
|
||
|
||
- name: Find untested code and create working test
|
||
id: find_untested
|
||
run: |
|
||
# Ensure the HOME directory structure exists
|
||
mkdir -p $HOME/.local/share/goose/sessions
|
||
mkdir -p $HOME/.config/goose
|
||
|
||
# Create analysis and test creation script
|
||
cat << 'EOF' > /tmp/create_working_test.txt
|
||
Your task is to find ONE untested function in the Rust codebase and create a working test for it.
|
||
|
||
Requirements:
|
||
1. The function MUST be in the crates/ directory
|
||
2. It MUST have actual logic (not just a simple getter/setter)
|
||
3. It MUST not already have a test
|
||
4. Prefer functions with complexity but that are still testable in isolation
|
||
5. Focus on the goose crate first, then goose-cli, then others
|
||
|
||
Process:
|
||
0. Immediately write these requirements in your TODO list tool and periodically check against them
|
||
1. Find a suitable untested function (use your `analyze` tool and ripgrep)
|
||
2. Write a comprehensive unit test for it
|
||
3. Apply your changes to the codebase
|
||
4. Run the test with: cargo test --test <test_name>
|
||
5. If the test fails:
|
||
- Read and understand the error message
|
||
- Fix the test code
|
||
- Apply the fix and run the test again
|
||
- Repeat up to 3 times until the test passes
|
||
6. Once the test passes, run `cargo fmt` to format all the code properly
|
||
7. After formatting, save the final changes as a git diff to /tmp/test_addition.patch
|
||
8. If successful, write the name of the function you tested to /tmp/function_tested.txt (just the function name, e.g., "check_tool_call" or "MyStruct::my_method")
|
||
9. Only create the patch file if the test actually passes
|
||
|
||
Important:
|
||
- Only add ONE test for ONE function
|
||
- The test MUST compile and pass before creating the patch
|
||
- Keep changes minimal and focused
|
||
- Include a descriptive test name that explains what is being tested
|
||
EOF
|
||
|
||
goose run -i /tmp/create_working_test.txt --with-builtin developer
|
||
|
||
# Debug: Check what files were created
|
||
echo "Checking for patch file..."
|
||
if [ -f /tmp/test_addition.patch ]; then
|
||
echo "Patch file exists"
|
||
echo "Patch size: $(wc -c < /tmp/test_addition.patch) bytes"
|
||
echo "First few lines of patch:"
|
||
head -5 /tmp/test_addition.patch || true
|
||
else
|
||
echo "No patch file found at /tmp/test_addition.patch"
|
||
fi
|
||
|
||
# Check for new commits that Goose might have made
|
||
COMMITS_AHEAD=$(git rev-list HEAD --not --remotes=origin --count 2>/dev/null || echo "0")
|
||
echo "Commits ahead of origin: $COMMITS_AHEAD"
|
||
|
||
# Check if we have changes to create a PR from
|
||
# Either: 1) A patch file exists, OR 2) There are new commits
|
||
if [ -f /tmp/test_addition.patch ] && [ -s /tmp/test_addition.patch ] || [ "$COMMITS_AHEAD" -gt 0 ]; then
|
||
echo "Changes detected (patch file or new commits)"
|
||
echo "Attempting to apply patch..."
|
||
# Apply the patch for the PR
|
||
git apply /tmp/test_addition.patch 2>/dev/null || echo "Patch already applied or changes already present"
|
||
echo "patch_created=true" >> $GITHUB_OUTPUT
|
||
echo "Test creation successful - patch file created"
|
||
|
||
# Try to get the function name from Goose's output file
|
||
if [ -f /tmp/function_tested.txt ]; then
|
||
FUNC_NAME=$(cat /tmp/function_tested.txt | head -1)
|
||
else
|
||
# Fallback: Extract from test name in the actual changes (staged or committed)
|
||
# Try staged changes first, then last commit, then patch file
|
||
FUNC_NAME=$(git diff --cached | grep "^+.*fn test_" | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || git diff HEAD~1 | grep "^+.*fn test_" | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || grep "fn test_" /tmp/test_addition.patch 2>/dev/null | head -1 | sed 's/.*fn test_//' | sed 's/(.*//' || echo "function")
|
||
fi
|
||
# Clean up the function name (remove any trailing whitespace or special chars)
|
||
FUNC_NAME=$(echo "$FUNC_NAME" | tr -d '\n\r' | sed 's/[[:space:]]*$//')
|
||
echo "function_name=${FUNC_NAME}" >> $GITHUB_OUTPUT
|
||
else
|
||
echo "patch_created=false" >> $GITHUB_OUTPUT
|
||
echo "No patch file created - either no suitable function found or test failed"
|
||
fi
|
||
|
||
- name: Extract token metrics
|
||
id: metrics
|
||
run: |
|
||
# Find the most recently created session file in the goose sessions directory
|
||
SESSION_DIR="$HOME/.local/share/goose/sessions"
|
||
if [ -d "$SESSION_DIR" ]; then
|
||
SESSION_FILE=$(ls -t "$SESSION_DIR"/*.jsonl 2>/dev/null | head -1)
|
||
if [ -f "$SESSION_FILE" ]; then
|
||
echo "Found session file: $SESSION_FILE"
|
||
# Current context size metrics
|
||
TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.total_tokens // 0')
|
||
INPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.input_tokens // 0')
|
||
OUTPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.output_tokens // 0')
|
||
echo "total_tokens=${TOKENS}" >> $GITHUB_OUTPUT
|
||
echo "input_tokens=${INPUT_TOKENS}" >> $GITHUB_OUTPUT
|
||
echo "output_tokens=${OUTPUT_TOKENS}" >> $GITHUB_OUTPUT
|
||
# Accumulated API usage metrics
|
||
ACC_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_total_tokens // 0')
|
||
ACC_INPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_input_tokens // 0')
|
||
ACC_OUTPUT_TOKENS=$(head -1 "$SESSION_FILE" | jq -r '.accumulated_output_tokens // 0')
|
||
echo "accumulated_total_tokens=${ACC_TOKENS}" >> $GITHUB_OUTPUT
|
||
echo "accumulated_input_tokens=${ACC_INPUT_TOKENS}" >> $GITHUB_OUTPUT
|
||
echo "accumulated_output_tokens=${ACC_OUTPUT_TOKENS}" >> $GITHUB_OUTPUT
|
||
echo "Token usage - Total: ${TOKENS}, Input: ${INPUT_TOKENS}, Output: ${OUTPUT_TOKENS}"
|
||
echo "Accumulated usage - Total: ${ACC_TOKENS}, Input: ${ACC_INPUT_TOKENS}, Output: ${ACC_OUTPUT_TOKENS}"
|
||
fi
|
||
fi
|
||
|
||
- name: Create Pull Request
|
||
if: steps.find_untested.outputs.patch_created == 'true' && github.event.inputs.dry_run != 'true'
|
||
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # pin@v7.0.8
|
||
with:
|
||
token: ${{ secrets.GITHUB_TOKEN }}
|
||
commit-message: "test: add test for ${{ steps.find_untested.outputs.function_name }}"
|
||
title: "test: add test coverage for ${{ steps.find_untested.outputs.function_name }}"
|
||
draft: true
|
||
body: |
|
||
## 🤖 Automated Test Addition
|
||
|
||
This PR was automatically generated by goose to improve test coverage.
|
||
|
||
### What changed?
|
||
Added a unit test for a previously untested function.
|
||
|
||
### Why?
|
||
Part of our daily automated test coverage improvement initiative. goose analyzes the codebase to find untested but important functions and creates focused unit tests for them.
|
||
|
||
### Review checklist
|
||
- [ ] Test is meaningful and actually tests the function
|
||
- [ ] Test name is descriptive
|
||
- [ ] Test passes locally
|
||
- [ ] No unnecessary changes included
|
||
|
||
### Metrics
|
||
#### Current Context Size
|
||
- **Total**: ${{ steps.metrics.outputs.total_tokens }} tokens
|
||
- **Input**: ${{ steps.metrics.outputs.input_tokens }} tokens
|
||
- **Output**: ${{ steps.metrics.outputs.output_tokens }} tokens
|
||
#### API Usage (Billable)
|
||
- **Total**: ${{ steps.metrics.outputs.accumulated_total_tokens }} tokens
|
||
- **Input**: ${{ steps.metrics.outputs.accumulated_input_tokens }} tokens
|
||
- **Output**: ${{ steps.metrics.outputs.accumulated_output_tokens }} tokens
|
||
|
||
---
|
||
*Generated by the Daily Test Coverage Finder workflow*
|
||
branch: goose/test-coverage-${{ github.run_number }}
|
||
delete-branch: true
|
||
labels: |
|
||
goose-generated
|
||
test
|
||
automated
|
||
|
||
- name: Summary
|
||
if: always()
|
||
env:
|
||
PATCH_CREATED: ${{ steps.find_untested.outputs.patch_created }}
|
||
FUNCTION_NAME: ${{ steps.find_untested.outputs.function_name }}
|
||
run: |
|
||
if [ "$PATCH_CREATED" = "true" ]; then
|
||
echo "✅ Successfully found untested code and created a test"
|
||
echo "📝 Function tested: $FUNCTION_NAME"
|
||
else
|
||
echo "ℹ️ No suitable untested code found today"
|
||
fi
|